1 /*
2 * This file includes functions to transform a concrete syntax tree (CST) to
3 * an abstract syntax tree (AST). The main function is PyAST_FromNode().
4 *
5 */
6 #include "Python.h"
7 #include "Python-ast.h"
8 #include "grammar.h"
9 #include "node.h"
10 #include "pyarena.h"
11 #include "ast.h"
12 #include "token.h"
13 #include "parsetok.h"
14 #include "graminit.h"
15
16 #include <assert.h>
17
18 /* Data structure used internally */
19 struct compiling {
20 char *c_encoding; /* source encoding */
21 int c_future_unicode; /* __future__ unicode literals flag */
22 PyArena *c_arena; /* arena for allocating memeory */
23 const char *c_filename; /* filename */
24 };
25
26 static asdl_seq *seq_for_testlist(struct compiling *, const node *);
27 static expr_ty ast_for_expr(struct compiling *, const node *);
28 static stmt_ty ast_for_stmt(struct compiling *, const node *);
29 static asdl_seq *ast_for_suite(struct compiling *, const node *);
30 static asdl_seq *ast_for_exprlist(struct compiling *, const node *,
31 expr_context_ty);
32 static expr_ty ast_for_testlist(struct compiling *, const node *);
33 static stmt_ty ast_for_classdef(struct compiling *, const node *, asdl_seq *);
34 static expr_ty ast_for_testlist_comp(struct compiling *, const node *);
35
36 /* Note different signature for ast_for_call */
37 static expr_ty ast_for_call(struct compiling *, const node *, expr_ty);
38
39 static PyObject *parsenumber(struct compiling *, const char *);
40 static PyObject *parsestr(struct compiling *, const node *n, const char *);
41 static PyObject *parsestrplus(struct compiling *, const node *n);
42
43 #ifndef LINENO
44 #define LINENO(n) ((n)->n_lineno)
45 #endif
46
47 #define COMP_GENEXP 0
48 #define COMP_SETCOMP 1
49
50 static identifier
new_identifier(const char * n,PyArena * arena)51 new_identifier(const char* n, PyArena *arena) {
52 PyObject* id = PyString_InternFromString(n);
53 if (id != NULL)
54 PyArena_AddPyObject(arena, id);
55 return id;
56 }
57
58 #define NEW_IDENTIFIER(n) new_identifier(STR(n), c->c_arena)
59
60 /* This routine provides an invalid object for the syntax error.
61 The outermost routine must unpack this error and create the
62 proper object. We do this so that we don't have to pass
63 the filename to everything function.
64
65 XXX Maybe we should just pass the filename...
66 */
67
68 static int
ast_error(const node * n,const char * errstr)69 ast_error(const node *n, const char *errstr)
70 {
71 PyObject *u = Py_BuildValue("zi", errstr, LINENO(n));
72 if (!u)
73 return 0;
74 PyErr_SetObject(PyExc_SyntaxError, u);
75 Py_DECREF(u);
76 return 0;
77 }
78
79 static void
ast_error_finish(const char * filename)80 ast_error_finish(const char *filename)
81 {
82 PyObject *type, *value, *tback, *errstr, *loc, *tmp;
83 long lineno;
84
85 assert(PyErr_Occurred());
86 if (!PyErr_ExceptionMatches(PyExc_SyntaxError))
87 return;
88
89 PyErr_Fetch(&type, &value, &tback);
90 errstr = PyTuple_GetItem(value, 0);
91 if (!errstr)
92 return;
93 Py_INCREF(errstr);
94 lineno = PyInt_AsLong(PyTuple_GetItem(value, 1));
95 if (lineno == -1) {
96 Py_DECREF(errstr);
97 return;
98 }
99 Py_DECREF(value);
100
101 loc = PyErr_ProgramText(filename, lineno);
102 if (!loc) {
103 Py_INCREF(Py_None);
104 loc = Py_None;
105 }
106 tmp = Py_BuildValue("(zlOO)", filename, lineno, Py_None, loc);
107 Py_DECREF(loc);
108 if (!tmp) {
109 Py_DECREF(errstr);
110 return;
111 }
112 value = PyTuple_Pack(2, errstr, tmp);
113 Py_DECREF(errstr);
114 Py_DECREF(tmp);
115 if (!value)
116 return;
117 PyErr_Restore(type, value, tback);
118 }
119
120 static int
ast_warn(struct compiling * c,const node * n,char * msg)121 ast_warn(struct compiling *c, const node *n, char *msg)
122 {
123 if (PyErr_WarnExplicit(PyExc_SyntaxWarning, msg, c->c_filename, LINENO(n),
124 NULL, NULL) < 0) {
125 /* if -Werr, change it to a SyntaxError */
126 if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_SyntaxWarning))
127 ast_error(n, msg);
128 return 0;
129 }
130 return 1;
131 }
132
133 static int
forbidden_check(struct compiling * c,const node * n,const char * x)134 forbidden_check(struct compiling *c, const node *n, const char *x)
135 {
136 if (!strcmp(x, "None"))
137 return ast_error(n, "cannot assign to None");
138 if (!strcmp(x, "__debug__"))
139 return ast_error(n, "cannot assign to __debug__");
140 if (Py_Py3kWarningFlag) {
141 if (!(strcmp(x, "True") && strcmp(x, "False")) &&
142 !ast_warn(c, n, "assignment to True or False is forbidden in 3.x"))
143 return 0;
144 if (!strcmp(x, "nonlocal") &&
145 !ast_warn(c, n, "nonlocal is a keyword in 3.x"))
146 return 0;
147 }
148 return 1;
149 }
150
151 /* num_stmts() returns number of contained statements.
152
153 Use this routine to determine how big a sequence is needed for
154 the statements in a parse tree. Its raison d'etre is this bit of
155 grammar:
156
157 stmt: simple_stmt | compound_stmt
158 simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
159
160 A simple_stmt can contain multiple small_stmt elements joined
161 by semicolons. If the arg is a simple_stmt, the number of
162 small_stmt elements is returned.
163 */
164
165 static int
num_stmts(const node * n)166 num_stmts(const node *n)
167 {
168 int i, l;
169 node *ch;
170
171 switch (TYPE(n)) {
172 case single_input:
173 if (TYPE(CHILD(n, 0)) == NEWLINE)
174 return 0;
175 else
176 return num_stmts(CHILD(n, 0));
177 case file_input:
178 l = 0;
179 for (i = 0; i < NCH(n); i++) {
180 ch = CHILD(n, i);
181 if (TYPE(ch) == stmt)
182 l += num_stmts(ch);
183 }
184 return l;
185 case stmt:
186 return num_stmts(CHILD(n, 0));
187 case compound_stmt:
188 return 1;
189 case simple_stmt:
190 return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */
191 case suite:
192 if (NCH(n) == 1)
193 return num_stmts(CHILD(n, 0));
194 else {
195 l = 0;
196 for (i = 2; i < (NCH(n) - 1); i++)
197 l += num_stmts(CHILD(n, i));
198 return l;
199 }
200 default: {
201 char buf[128];
202
203 sprintf(buf, "Non-statement found: %d %d",
204 TYPE(n), NCH(n));
205 Py_FatalError(buf);
206 }
207 }
208 assert(0);
209 return 0;
210 }
211
212 /* Transform the CST rooted at node * to the appropriate AST
213 */
214
215 mod_ty
PyAST_FromNode(const node * n,PyCompilerFlags * flags,const char * filename,PyArena * arena)216 PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,
217 PyArena *arena)
218 {
219 int i, j, k, num;
220 asdl_seq *stmts = NULL;
221 stmt_ty s;
222 node *ch;
223 struct compiling c;
224
225 if (flags && flags->cf_flags & PyCF_SOURCE_IS_UTF8) {
226 c.c_encoding = "utf-8";
227 if (TYPE(n) == encoding_decl) {
228 ast_error(n, "encoding declaration in Unicode string");
229 goto error;
230 }
231 } else if (TYPE(n) == encoding_decl) {
232 c.c_encoding = STR(n);
233 n = CHILD(n, 0);
234 } else {
235 c.c_encoding = NULL;
236 }
237 c.c_future_unicode = flags && flags->cf_flags & CO_FUTURE_UNICODE_LITERALS;
238 c.c_arena = arena;
239 c.c_filename = filename;
240
241 k = 0;
242 switch (TYPE(n)) {
243 case file_input:
244 stmts = asdl_seq_new(num_stmts(n), arena);
245 if (!stmts)
246 return NULL;
247 for (i = 0; i < NCH(n) - 1; i++) {
248 ch = CHILD(n, i);
249 if (TYPE(ch) == NEWLINE)
250 continue;
251 REQ(ch, stmt);
252 num = num_stmts(ch);
253 if (num == 1) {
254 s = ast_for_stmt(&c, ch);
255 if (!s)
256 goto error;
257 asdl_seq_SET(stmts, k++, s);
258 }
259 else {
260 ch = CHILD(ch, 0);
261 REQ(ch, simple_stmt);
262 for (j = 0; j < num; j++) {
263 s = ast_for_stmt(&c, CHILD(ch, j * 2));
264 if (!s)
265 goto error;
266 asdl_seq_SET(stmts, k++, s);
267 }
268 }
269 }
270 return Module(stmts, arena);
271 case eval_input: {
272 expr_ty testlist_ast;
273
274 /* XXX Why not comp_for here? */
275 testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
276 if (!testlist_ast)
277 goto error;
278 return Expression(testlist_ast, arena);
279 }
280 case single_input:
281 if (TYPE(CHILD(n, 0)) == NEWLINE) {
282 stmts = asdl_seq_new(1, arena);
283 if (!stmts)
284 goto error;
285 asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
286 arena));
287 if (!asdl_seq_GET(stmts, 0))
288 goto error;
289 return Interactive(stmts, arena);
290 }
291 else {
292 n = CHILD(n, 0);
293 num = num_stmts(n);
294 stmts = asdl_seq_new(num, arena);
295 if (!stmts)
296 goto error;
297 if (num == 1) {
298 s = ast_for_stmt(&c, n);
299 if (!s)
300 goto error;
301 asdl_seq_SET(stmts, 0, s);
302 }
303 else {
304 /* Only a simple_stmt can contain multiple statements. */
305 REQ(n, simple_stmt);
306 for (i = 0; i < NCH(n); i += 2) {
307 if (TYPE(CHILD(n, i)) == NEWLINE)
308 break;
309 s = ast_for_stmt(&c, CHILD(n, i));
310 if (!s)
311 goto error;
312 asdl_seq_SET(stmts, i / 2, s);
313 }
314 }
315
316 return Interactive(stmts, arena);
317 }
318 default:
319 PyErr_Format(PyExc_SystemError,
320 "invalid node %d for PyAST_FromNode", TYPE(n));
321 goto error;
322 }
323 error:
324 ast_error_finish(filename);
325 return NULL;
326 }
327
328 /* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
329 */
330
331 static operator_ty
get_operator(const node * n)332 get_operator(const node *n)
333 {
334 switch (TYPE(n)) {
335 case VBAR:
336 return BitOr;
337 case CIRCUMFLEX:
338 return BitXor;
339 case AMPER:
340 return BitAnd;
341 case LEFTSHIFT:
342 return LShift;
343 case RIGHTSHIFT:
344 return RShift;
345 case PLUS:
346 return Add;
347 case MINUS:
348 return Sub;
349 case STAR:
350 return Mult;
351 case SLASH:
352 return Div;
353 case DOUBLESLASH:
354 return FloorDiv;
355 case PERCENT:
356 return Mod;
357 default:
358 return (operator_ty)0;
359 }
360 }
361
362 /* Set the context ctx for expr_ty e, recursively traversing e.
363
364 Only sets context for expr kinds that "can appear in assignment context"
365 (according to ../Parser/Python.asdl). For other expr kinds, it sets
366 an appropriate syntax error and returns false.
367 */
368
369 static int
set_context(struct compiling * c,expr_ty e,expr_context_ty ctx,const node * n)370 set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)
371 {
372 asdl_seq *s = NULL;
373 /* If a particular expression type can't be used for assign / delete,
374 set expr_name to its name and an error message will be generated.
375 */
376 const char* expr_name = NULL;
377
378 /* The ast defines augmented store and load contexts, but the
379 implementation here doesn't actually use them. The code may be
380 a little more complex than necessary as a result. It also means
381 that expressions in an augmented assignment have a Store context.
382 Consider restructuring so that augmented assignment uses
383 set_context(), too.
384 */
385 assert(ctx != AugStore && ctx != AugLoad);
386
387 switch (e->kind) {
388 case Attribute_kind:
389 if (ctx == Store && !forbidden_check(c, n,
390 PyBytes_AS_STRING(e->v.Attribute.attr)))
391 return 0;
392 e->v.Attribute.ctx = ctx;
393 break;
394 case Subscript_kind:
395 e->v.Subscript.ctx = ctx;
396 break;
397 case Name_kind:
398 if (ctx == Store && !forbidden_check(c, n,
399 PyBytes_AS_STRING(e->v.Name.id)))
400 return 0;
401 e->v.Name.ctx = ctx;
402 break;
403 case List_kind:
404 e->v.List.ctx = ctx;
405 s = e->v.List.elts;
406 break;
407 case Tuple_kind:
408 if (asdl_seq_LEN(e->v.Tuple.elts)) {
409 e->v.Tuple.ctx = ctx;
410 s = e->v.Tuple.elts;
411 }
412 else {
413 expr_name = "()";
414 }
415 break;
416 case Lambda_kind:
417 expr_name = "lambda";
418 break;
419 case Call_kind:
420 expr_name = "function call";
421 break;
422 case BoolOp_kind:
423 case BinOp_kind:
424 case UnaryOp_kind:
425 expr_name = "operator";
426 break;
427 case GeneratorExp_kind:
428 expr_name = "generator expression";
429 break;
430 case Yield_kind:
431 expr_name = "yield expression";
432 break;
433 case ListComp_kind:
434 expr_name = "list comprehension";
435 break;
436 case SetComp_kind:
437 expr_name = "set comprehension";
438 break;
439 case DictComp_kind:
440 expr_name = "dict comprehension";
441 break;
442 case Dict_kind:
443 case Set_kind:
444 case Num_kind:
445 case Str_kind:
446 expr_name = "literal";
447 break;
448 case Compare_kind:
449 expr_name = "comparison";
450 break;
451 case Repr_kind:
452 expr_name = "repr";
453 break;
454 case IfExp_kind:
455 expr_name = "conditional expression";
456 break;
457 default:
458 PyErr_Format(PyExc_SystemError,
459 "unexpected expression in assignment %d (line %d)",
460 e->kind, e->lineno);
461 return 0;
462 }
463 /* Check for error string set by switch */
464 if (expr_name) {
465 char buf[300];
466 PyOS_snprintf(buf, sizeof(buf),
467 "can't %s %s",
468 ctx == Store ? "assign to" : "delete",
469 expr_name);
470 return ast_error(n, buf);
471 }
472
473 /* If the LHS is a list or tuple, we need to set the assignment
474 context for all the contained elements.
475 */
476 if (s) {
477 int i;
478
479 for (i = 0; i < asdl_seq_LEN(s); i++) {
480 if (!set_context(c, (expr_ty)asdl_seq_GET(s, i), ctx, n))
481 return 0;
482 }
483 }
484 return 1;
485 }
486
487 static operator_ty
ast_for_augassign(struct compiling * c,const node * n)488 ast_for_augassign(struct compiling *c, const node *n)
489 {
490 REQ(n, augassign);
491 n = CHILD(n, 0);
492 switch (STR(n)[0]) {
493 case '+':
494 return Add;
495 case '-':
496 return Sub;
497 case '/':
498 if (STR(n)[1] == '/')
499 return FloorDiv;
500 else
501 return Div;
502 case '%':
503 return Mod;
504 case '<':
505 return LShift;
506 case '>':
507 return RShift;
508 case '&':
509 return BitAnd;
510 case '^':
511 return BitXor;
512 case '|':
513 return BitOr;
514 case '*':
515 if (STR(n)[1] == '*')
516 return Pow;
517 else
518 return Mult;
519 default:
520 PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n));
521 return (operator_ty)0;
522 }
523 }
524
525 static cmpop_ty
ast_for_comp_op(struct compiling * c,const node * n)526 ast_for_comp_op(struct compiling *c, const node *n)
527 {
528 /* comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'
529 |'is' 'not'
530 */
531 REQ(n, comp_op);
532 if (NCH(n) == 1) {
533 n = CHILD(n, 0);
534 switch (TYPE(n)) {
535 case LESS:
536 return Lt;
537 case GREATER:
538 return Gt;
539 case EQEQUAL: /* == */
540 return Eq;
541 case LESSEQUAL:
542 return LtE;
543 case GREATEREQUAL:
544 return GtE;
545 case NOTEQUAL:
546 return NotEq;
547 case NAME:
548 if (strcmp(STR(n), "in") == 0)
549 return In;
550 if (strcmp(STR(n), "is") == 0)
551 return Is;
552 default:
553 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s",
554 STR(n));
555 return (cmpop_ty)0;
556 }
557 }
558 else if (NCH(n) == 2) {
559 /* handle "not in" and "is not" */
560 switch (TYPE(CHILD(n, 0))) {
561 case NAME:
562 if (strcmp(STR(CHILD(n, 1)), "in") == 0)
563 return NotIn;
564 if (strcmp(STR(CHILD(n, 0)), "is") == 0)
565 return IsNot;
566 default:
567 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s",
568 STR(CHILD(n, 0)), STR(CHILD(n, 1)));
569 return (cmpop_ty)0;
570 }
571 }
572 PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children",
573 NCH(n));
574 return (cmpop_ty)0;
575 }
576
577 static asdl_seq *
seq_for_testlist(struct compiling * c,const node * n)578 seq_for_testlist(struct compiling *c, const node *n)
579 {
580 /* testlist: test (',' test)* [','] */
581 asdl_seq *seq;
582 expr_ty expression;
583 int i;
584 assert(TYPE(n) == testlist ||
585 TYPE(n) == listmaker ||
586 TYPE(n) == testlist_comp ||
587 TYPE(n) == testlist_safe ||
588 TYPE(n) == testlist1);
589
590 seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
591 if (!seq)
592 return NULL;
593
594 for (i = 0; i < NCH(n); i += 2) {
595 assert(TYPE(CHILD(n, i)) == test || TYPE(CHILD(n, i)) == old_test);
596
597 expression = ast_for_expr(c, CHILD(n, i));
598 if (!expression)
599 return NULL;
600
601 assert(i / 2 < seq->size);
602 asdl_seq_SET(seq, i / 2, expression);
603 }
604 return seq;
605 }
606
607 static expr_ty
compiler_complex_args(struct compiling * c,const node * n)608 compiler_complex_args(struct compiling *c, const node *n)
609 {
610 int i, len = (NCH(n) + 1) / 2;
611 expr_ty result;
612 asdl_seq *args = asdl_seq_new(len, c->c_arena);
613 if (!args)
614 return NULL;
615
616 /* fpdef: NAME | '(' fplist ')'
617 fplist: fpdef (',' fpdef)* [',']
618 */
619 REQ(n, fplist);
620 for (i = 0; i < len; i++) {
621 PyObject *arg_id;
622 const node *fpdef_node = CHILD(n, 2*i);
623 const node *child;
624 expr_ty arg;
625 set_name:
626 /* fpdef_node is either a NAME or an fplist */
627 child = CHILD(fpdef_node, 0);
628 if (TYPE(child) == NAME) {
629 if (!forbidden_check(c, n, STR(child)))
630 return NULL;
631 arg_id = NEW_IDENTIFIER(child);
632 if (!arg_id)
633 return NULL;
634 arg = Name(arg_id, Store, LINENO(child), child->n_col_offset,
635 c->c_arena);
636 }
637 else {
638 assert(TYPE(fpdef_node) == fpdef);
639 /* fpdef_node[0] is not a name, so it must be '(', get CHILD[1] */
640 child = CHILD(fpdef_node, 1);
641 assert(TYPE(child) == fplist);
642 /* NCH == 1 means we have (x), we need to elide the extra parens */
643 if (NCH(child) == 1) {
644 fpdef_node = CHILD(child, 0);
645 assert(TYPE(fpdef_node) == fpdef);
646 goto set_name;
647 }
648 arg = compiler_complex_args(c, child);
649 }
650 asdl_seq_SET(args, i, arg);
651 }
652
653 result = Tuple(args, Store, LINENO(n), n->n_col_offset, c->c_arena);
654 if (!set_context(c, result, Store, n))
655 return NULL;
656 return result;
657 }
658
659
660 /* Create AST for argument list. */
661
662 static arguments_ty
ast_for_arguments(struct compiling * c,const node * n)663 ast_for_arguments(struct compiling *c, const node *n)
664 {
665 /* parameters: '(' [varargslist] ')'
666 varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME]
667 | '**' NAME) | fpdef ['=' test] (',' fpdef ['=' test])* [',']
668 */
669 int i, j, k, n_args = 0, n_defaults = 0, found_default = 0;
670 asdl_seq *args, *defaults;
671 identifier vararg = NULL, kwarg = NULL;
672 node *ch;
673
674 if (TYPE(n) == parameters) {
675 if (NCH(n) == 2) /* () as argument list */
676 return arguments(NULL, NULL, NULL, NULL, c->c_arena);
677 n = CHILD(n, 1);
678 }
679 REQ(n, varargslist);
680
681 /* first count the number of normal args & defaults */
682 for (i = 0; i < NCH(n); i++) {
683 ch = CHILD(n, i);
684 if (TYPE(ch) == fpdef)
685 n_args++;
686 if (TYPE(ch) == EQUAL)
687 n_defaults++;
688 }
689 args = (n_args ? asdl_seq_new(n_args, c->c_arena) : NULL);
690 if (!args && n_args)
691 return NULL;
692 defaults = (n_defaults ? asdl_seq_new(n_defaults, c->c_arena) : NULL);
693 if (!defaults && n_defaults)
694 return NULL;
695
696 /* fpdef: NAME | '(' fplist ')'
697 fplist: fpdef (',' fpdef)* [',']
698 */
699 i = 0;
700 j = 0; /* index for defaults */
701 k = 0; /* index for args */
702 while (i < NCH(n)) {
703 ch = CHILD(n, i);
704 switch (TYPE(ch)) {
705 case fpdef: {
706 int complex_args = 0, parenthesized = 0;
707 handle_fpdef:
708 /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is
709 anything other than EQUAL or a comma? */
710 /* XXX Should NCH(n) check be made a separate check? */
711 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
712 expr_ty expression = ast_for_expr(c, CHILD(n, i + 2));
713 if (!expression)
714 return NULL;
715 assert(defaults != NULL);
716 asdl_seq_SET(defaults, j++, expression);
717 i += 2;
718 found_default = 1;
719 }
720 else if (found_default) {
721 /* def f((x)=4): pass should raise an error.
722 def f((x, (y))): pass will just incur the tuple unpacking warning. */
723 if (parenthesized && !complex_args) {
724 ast_error(n, "parenthesized arg with default");
725 return NULL;
726 }
727 ast_error(n,
728 "non-default argument follows default argument");
729 return NULL;
730 }
731 if (NCH(ch) == 3) {
732 ch = CHILD(ch, 1);
733 /* def foo((x)): is not complex, special case. */
734 if (NCH(ch) != 1) {
735 /* We have complex arguments, setup for unpacking. */
736 if (Py_Py3kWarningFlag && !ast_warn(c, ch,
737 "tuple parameter unpacking has been removed in 3.x"))
738 return NULL;
739 complex_args = 1;
740 asdl_seq_SET(args, k++, compiler_complex_args(c, ch));
741 if (!asdl_seq_GET(args, k-1))
742 return NULL;
743 } else {
744 /* def foo((x)): setup for checking NAME below. */
745 /* Loop because there can be many parens and tuple
746 unpacking mixed in. */
747 parenthesized = 1;
748 ch = CHILD(ch, 0);
749 assert(TYPE(ch) == fpdef);
750 goto handle_fpdef;
751 }
752 }
753 if (TYPE(CHILD(ch, 0)) == NAME) {
754 PyObject *id;
755 expr_ty name;
756 if (!forbidden_check(c, n, STR(CHILD(ch, 0))))
757 return NULL;
758 id = NEW_IDENTIFIER(CHILD(ch, 0));
759 if (!id)
760 return NULL;
761 name = Name(id, Param, LINENO(ch), ch->n_col_offset,
762 c->c_arena);
763 if (!name)
764 return NULL;
765 asdl_seq_SET(args, k++, name);
766
767 }
768 i += 2; /* the name and the comma */
769 if (parenthesized && Py_Py3kWarningFlag &&
770 !ast_warn(c, ch, "parenthesized argument names "
771 "are invalid in 3.x"))
772 return NULL;
773
774 break;
775 }
776 case STAR:
777 if (!forbidden_check(c, CHILD(n, i+1), STR(CHILD(n, i+1))))
778 return NULL;
779 vararg = NEW_IDENTIFIER(CHILD(n, i+1));
780 if (!vararg)
781 return NULL;
782 i += 3;
783 break;
784 case DOUBLESTAR:
785 if (!forbidden_check(c, CHILD(n, i+1), STR(CHILD(n, i+1))))
786 return NULL;
787 kwarg = NEW_IDENTIFIER(CHILD(n, i+1));
788 if (!kwarg)
789 return NULL;
790 i += 3;
791 break;
792 default:
793 PyErr_Format(PyExc_SystemError,
794 "unexpected node in varargslist: %d @ %d",
795 TYPE(ch), i);
796 return NULL;
797 }
798 }
799
800 return arguments(args, vararg, kwarg, defaults, c->c_arena);
801 }
802
803 static expr_ty
ast_for_dotted_name(struct compiling * c,const node * n)804 ast_for_dotted_name(struct compiling *c, const node *n)
805 {
806 expr_ty e;
807 identifier id;
808 int lineno, col_offset;
809 int i;
810
811 REQ(n, dotted_name);
812
813 lineno = LINENO(n);
814 col_offset = n->n_col_offset;
815
816 id = NEW_IDENTIFIER(CHILD(n, 0));
817 if (!id)
818 return NULL;
819 e = Name(id, Load, lineno, col_offset, c->c_arena);
820 if (!e)
821 return NULL;
822
823 for (i = 2; i < NCH(n); i+=2) {
824 id = NEW_IDENTIFIER(CHILD(n, i));
825 if (!id)
826 return NULL;
827 e = Attribute(e, id, Load, lineno, col_offset, c->c_arena);
828 if (!e)
829 return NULL;
830 }
831
832 return e;
833 }
834
835 static expr_ty
ast_for_decorator(struct compiling * c,const node * n)836 ast_for_decorator(struct compiling *c, const node *n)
837 {
838 /* decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE */
839 expr_ty d = NULL;
840 expr_ty name_expr;
841
842 REQ(n, decorator);
843 REQ(CHILD(n, 0), AT);
844 REQ(RCHILD(n, -1), NEWLINE);
845
846 name_expr = ast_for_dotted_name(c, CHILD(n, 1));
847 if (!name_expr)
848 return NULL;
849
850 if (NCH(n) == 3) { /* No arguments */
851 d = name_expr;
852 name_expr = NULL;
853 }
854 else if (NCH(n) == 5) { /* Call with no arguments */
855 d = Call(name_expr, NULL, NULL, NULL, NULL, LINENO(n),
856 n->n_col_offset, c->c_arena);
857 if (!d)
858 return NULL;
859 name_expr = NULL;
860 }
861 else {
862 d = ast_for_call(c, CHILD(n, 3), name_expr);
863 if (!d)
864 return NULL;
865 name_expr = NULL;
866 }
867
868 return d;
869 }
870
871 static asdl_seq*
ast_for_decorators(struct compiling * c,const node * n)872 ast_for_decorators(struct compiling *c, const node *n)
873 {
874 asdl_seq* decorator_seq;
875 expr_ty d;
876 int i;
877
878 REQ(n, decorators);
879 decorator_seq = asdl_seq_new(NCH(n), c->c_arena);
880 if (!decorator_seq)
881 return NULL;
882
883 for (i = 0; i < NCH(n); i++) {
884 d = ast_for_decorator(c, CHILD(n, i));
885 if (!d)
886 return NULL;
887 asdl_seq_SET(decorator_seq, i, d);
888 }
889 return decorator_seq;
890 }
891
892 static stmt_ty
ast_for_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)893 ast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
894 {
895 /* funcdef: 'def' NAME parameters ':' suite */
896 identifier name;
897 arguments_ty args;
898 asdl_seq *body;
899 int name_i = 1;
900
901 REQ(n, funcdef);
902
903 name = NEW_IDENTIFIER(CHILD(n, name_i));
904 if (!name)
905 return NULL;
906 else if (!forbidden_check(c, CHILD(n, name_i), STR(CHILD(n, name_i))))
907 return NULL;
908 args = ast_for_arguments(c, CHILD(n, name_i + 1));
909 if (!args)
910 return NULL;
911 body = ast_for_suite(c, CHILD(n, name_i + 3));
912 if (!body)
913 return NULL;
914
915 return FunctionDef(name, args, body, decorator_seq, LINENO(n),
916 n->n_col_offset, c->c_arena);
917 }
918
919 static stmt_ty
ast_for_decorated(struct compiling * c,const node * n)920 ast_for_decorated(struct compiling *c, const node *n)
921 {
922 /* decorated: decorators (classdef | funcdef) */
923 stmt_ty thing = NULL;
924 asdl_seq *decorator_seq = NULL;
925
926 REQ(n, decorated);
927
928 decorator_seq = ast_for_decorators(c, CHILD(n, 0));
929 if (!decorator_seq)
930 return NULL;
931
932 assert(TYPE(CHILD(n, 1)) == funcdef ||
933 TYPE(CHILD(n, 1)) == classdef);
934
935 if (TYPE(CHILD(n, 1)) == funcdef) {
936 thing = ast_for_funcdef(c, CHILD(n, 1), decorator_seq);
937 } else if (TYPE(CHILD(n, 1)) == classdef) {
938 thing = ast_for_classdef(c, CHILD(n, 1), decorator_seq);
939 }
940 /* we count the decorators in when talking about the class' or
941 function's line number */
942 if (thing) {
943 thing->lineno = LINENO(n);
944 thing->col_offset = n->n_col_offset;
945 }
946 return thing;
947 }
948
949 static expr_ty
ast_for_lambdef(struct compiling * c,const node * n)950 ast_for_lambdef(struct compiling *c, const node *n)
951 {
952 /* lambdef: 'lambda' [varargslist] ':' test */
953 arguments_ty args;
954 expr_ty expression;
955
956 if (NCH(n) == 3) {
957 args = arguments(NULL, NULL, NULL, NULL, c->c_arena);
958 if (!args)
959 return NULL;
960 expression = ast_for_expr(c, CHILD(n, 2));
961 if (!expression)
962 return NULL;
963 }
964 else {
965 args = ast_for_arguments(c, CHILD(n, 1));
966 if (!args)
967 return NULL;
968 expression = ast_for_expr(c, CHILD(n, 3));
969 if (!expression)
970 return NULL;
971 }
972
973 return Lambda(args, expression, LINENO(n), n->n_col_offset, c->c_arena);
974 }
975
976 static expr_ty
ast_for_ifexpr(struct compiling * c,const node * n)977 ast_for_ifexpr(struct compiling *c, const node *n)
978 {
979 /* test: or_test 'if' or_test 'else' test */
980 expr_ty expression, body, orelse;
981
982 assert(NCH(n) == 5);
983 body = ast_for_expr(c, CHILD(n, 0));
984 if (!body)
985 return NULL;
986 expression = ast_for_expr(c, CHILD(n, 2));
987 if (!expression)
988 return NULL;
989 orelse = ast_for_expr(c, CHILD(n, 4));
990 if (!orelse)
991 return NULL;
992 return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset,
993 c->c_arena);
994 }
995
996 /* XXX(nnorwitz): the listcomp and genexpr code should be refactored
997 so there is only a single version. Possibly for loops can also re-use
998 the code.
999 */
1000
1001 /* Count the number of 'for' loop in a list comprehension.
1002
1003 Helper for ast_for_listcomp().
1004 */
1005
1006 static int
count_list_fors(struct compiling * c,const node * n)1007 count_list_fors(struct compiling *c, const node *n)
1008 {
1009 int n_fors = 0;
1010 node *ch = CHILD(n, 1);
1011
1012 count_list_for:
1013 n_fors++;
1014 REQ(ch, list_for);
1015 if (NCH(ch) == 5)
1016 ch = CHILD(ch, 4);
1017 else
1018 return n_fors;
1019 count_list_iter:
1020 REQ(ch, list_iter);
1021 ch = CHILD(ch, 0);
1022 if (TYPE(ch) == list_for)
1023 goto count_list_for;
1024 else if (TYPE(ch) == list_if) {
1025 if (NCH(ch) == 3) {
1026 ch = CHILD(ch, 2);
1027 goto count_list_iter;
1028 }
1029 else
1030 return n_fors;
1031 }
1032
1033 /* Should never be reached */
1034 PyErr_SetString(PyExc_SystemError, "logic error in count_list_fors");
1035 return -1;
1036 }
1037
1038 /* Count the number of 'if' statements in a list comprehension.
1039
1040 Helper for ast_for_listcomp().
1041 */
1042
1043 static int
count_list_ifs(struct compiling * c,const node * n)1044 count_list_ifs(struct compiling *c, const node *n)
1045 {
1046 int n_ifs = 0;
1047
1048 count_list_iter:
1049 REQ(n, list_iter);
1050 if (TYPE(CHILD(n, 0)) == list_for)
1051 return n_ifs;
1052 n = CHILD(n, 0);
1053 REQ(n, list_if);
1054 n_ifs++;
1055 if (NCH(n) == 2)
1056 return n_ifs;
1057 n = CHILD(n, 2);
1058 goto count_list_iter;
1059 }
1060
1061 static expr_ty
ast_for_listcomp(struct compiling * c,const node * n)1062 ast_for_listcomp(struct compiling *c, const node *n)
1063 {
1064 /* listmaker: test ( list_for | (',' test)* [','] )
1065 list_for: 'for' exprlist 'in' testlist_safe [list_iter]
1066 list_iter: list_for | list_if
1067 list_if: 'if' test [list_iter]
1068 testlist_safe: test [(',' test)+ [',']]
1069 */
1070 expr_ty elt, first;
1071 asdl_seq *listcomps;
1072 int i, n_fors;
1073 node *ch;
1074
1075 REQ(n, listmaker);
1076 assert(NCH(n) > 1);
1077
1078 elt = ast_for_expr(c, CHILD(n, 0));
1079 if (!elt)
1080 return NULL;
1081
1082 n_fors = count_list_fors(c, n);
1083 if (n_fors == -1)
1084 return NULL;
1085
1086 listcomps = asdl_seq_new(n_fors, c->c_arena);
1087 if (!listcomps)
1088 return NULL;
1089
1090 ch = CHILD(n, 1);
1091 for (i = 0; i < n_fors; i++) {
1092 comprehension_ty lc;
1093 asdl_seq *t;
1094 expr_ty expression;
1095 node *for_ch;
1096
1097 REQ(ch, list_for);
1098
1099 for_ch = CHILD(ch, 1);
1100 t = ast_for_exprlist(c, for_ch, Store);
1101 if (!t)
1102 return NULL;
1103 expression = ast_for_testlist(c, CHILD(ch, 3));
1104 if (!expression)
1105 return NULL;
1106
1107 /* Check the # of children rather than the length of t, since
1108 [x for x, in ... ] has 1 element in t, but still requires a Tuple.
1109 */
1110 first = (expr_ty)asdl_seq_GET(t, 0);
1111 if (NCH(for_ch) == 1)
1112 lc = comprehension(first, expression, NULL, c->c_arena);
1113 else
1114 lc = comprehension(Tuple(t, Store, first->lineno, first->col_offset,
1115 c->c_arena),
1116 expression, NULL, c->c_arena);
1117 if (!lc)
1118 return NULL;
1119
1120 if (NCH(ch) == 5) {
1121 int j, n_ifs;
1122 asdl_seq *ifs;
1123 expr_ty list_for_expr;
1124
1125 ch = CHILD(ch, 4);
1126 n_ifs = count_list_ifs(c, ch);
1127 if (n_ifs == -1)
1128 return NULL;
1129
1130 ifs = asdl_seq_new(n_ifs, c->c_arena);
1131 if (!ifs)
1132 return NULL;
1133
1134 for (j = 0; j < n_ifs; j++) {
1135 REQ(ch, list_iter);
1136 ch = CHILD(ch, 0);
1137 REQ(ch, list_if);
1138
1139 list_for_expr = ast_for_expr(c, CHILD(ch, 1));
1140 if (!list_for_expr)
1141 return NULL;
1142
1143 asdl_seq_SET(ifs, j, list_for_expr);
1144 if (NCH(ch) == 3)
1145 ch = CHILD(ch, 2);
1146 }
1147 /* on exit, must guarantee that ch is a list_for */
1148 if (TYPE(ch) == list_iter)
1149 ch = CHILD(ch, 0);
1150 lc->ifs = ifs;
1151 }
1152 asdl_seq_SET(listcomps, i, lc);
1153 }
1154
1155 return ListComp(elt, listcomps, LINENO(n), n->n_col_offset, c->c_arena);
1156 }
1157
1158 /*
1159 Count the number of 'for' loops in a comprehension.
1160
1161 Helper for ast_for_comprehension().
1162 */
1163
1164 static int
count_comp_fors(struct compiling * c,const node * n)1165 count_comp_fors(struct compiling *c, const node *n)
1166 {
1167 int n_fors = 0;
1168
1169 count_comp_for:
1170 n_fors++;
1171 REQ(n, comp_for);
1172 if (NCH(n) == 5)
1173 n = CHILD(n, 4);
1174 else
1175 return n_fors;
1176 count_comp_iter:
1177 REQ(n, comp_iter);
1178 n = CHILD(n, 0);
1179 if (TYPE(n) == comp_for)
1180 goto count_comp_for;
1181 else if (TYPE(n) == comp_if) {
1182 if (NCH(n) == 3) {
1183 n = CHILD(n, 2);
1184 goto count_comp_iter;
1185 }
1186 else
1187 return n_fors;
1188 }
1189
1190 /* Should never be reached */
1191 PyErr_SetString(PyExc_SystemError,
1192 "logic error in count_comp_fors");
1193 return -1;
1194 }
1195
1196 /* Count the number of 'if' statements in a comprehension.
1197
1198 Helper for ast_for_comprehension().
1199 */
1200
1201 static int
count_comp_ifs(struct compiling * c,const node * n)1202 count_comp_ifs(struct compiling *c, const node *n)
1203 {
1204 int n_ifs = 0;
1205
1206 while (1) {
1207 REQ(n, comp_iter);
1208 if (TYPE(CHILD(n, 0)) == comp_for)
1209 return n_ifs;
1210 n = CHILD(n, 0);
1211 REQ(n, comp_if);
1212 n_ifs++;
1213 if (NCH(n) == 2)
1214 return n_ifs;
1215 n = CHILD(n, 2);
1216 }
1217 }
1218
1219 static asdl_seq *
ast_for_comprehension(struct compiling * c,const node * n)1220 ast_for_comprehension(struct compiling *c, const node *n)
1221 {
1222 int i, n_fors;
1223 asdl_seq *comps;
1224
1225 n_fors = count_comp_fors(c, n);
1226 if (n_fors == -1)
1227 return NULL;
1228
1229 comps = asdl_seq_new(n_fors, c->c_arena);
1230 if (!comps)
1231 return NULL;
1232
1233 for (i = 0; i < n_fors; i++) {
1234 comprehension_ty comp;
1235 asdl_seq *t;
1236 expr_ty expression, first;
1237 node *for_ch;
1238
1239 REQ(n, comp_for);
1240
1241 for_ch = CHILD(n, 1);
1242 t = ast_for_exprlist(c, for_ch, Store);
1243 if (!t)
1244 return NULL;
1245 expression = ast_for_expr(c, CHILD(n, 3));
1246 if (!expression)
1247 return NULL;
1248
1249 /* Check the # of children rather than the length of t, since
1250 (x for x, in ...) has 1 element in t, but still requires a Tuple. */
1251 first = (expr_ty)asdl_seq_GET(t, 0);
1252 if (NCH(for_ch) == 1)
1253 comp = comprehension(first, expression, NULL, c->c_arena);
1254 else
1255 comp = comprehension(Tuple(t, Store, first->lineno, first->col_offset,
1256 c->c_arena),
1257 expression, NULL, c->c_arena);
1258 if (!comp)
1259 return NULL;
1260
1261 if (NCH(n) == 5) {
1262 int j, n_ifs;
1263 asdl_seq *ifs;
1264
1265 n = CHILD(n, 4);
1266 n_ifs = count_comp_ifs(c, n);
1267 if (n_ifs == -1)
1268 return NULL;
1269
1270 ifs = asdl_seq_new(n_ifs, c->c_arena);
1271 if (!ifs)
1272 return NULL;
1273
1274 for (j = 0; j < n_ifs; j++) {
1275 REQ(n, comp_iter);
1276 n = CHILD(n, 0);
1277 REQ(n, comp_if);
1278
1279 expression = ast_for_expr(c, CHILD(n, 1));
1280 if (!expression)
1281 return NULL;
1282 asdl_seq_SET(ifs, j, expression);
1283 if (NCH(n) == 3)
1284 n = CHILD(n, 2);
1285 }
1286 /* on exit, must guarantee that n is a comp_for */
1287 if (TYPE(n) == comp_iter)
1288 n = CHILD(n, 0);
1289 comp->ifs = ifs;
1290 }
1291 asdl_seq_SET(comps, i, comp);
1292 }
1293 return comps;
1294 }
1295
1296 static expr_ty
ast_for_itercomp(struct compiling * c,const node * n,int type)1297 ast_for_itercomp(struct compiling *c, const node *n, int type)
1298 {
1299 expr_ty elt;
1300 asdl_seq *comps;
1301
1302 assert(NCH(n) > 1);
1303
1304 elt = ast_for_expr(c, CHILD(n, 0));
1305 if (!elt)
1306 return NULL;
1307
1308 comps = ast_for_comprehension(c, CHILD(n, 1));
1309 if (!comps)
1310 return NULL;
1311
1312 if (type == COMP_GENEXP)
1313 return GeneratorExp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1314 else if (type == COMP_SETCOMP)
1315 return SetComp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1316 else
1317 /* Should never happen */
1318 return NULL;
1319 }
1320
1321 static expr_ty
ast_for_dictcomp(struct compiling * c,const node * n)1322 ast_for_dictcomp(struct compiling *c, const node *n)
1323 {
1324 expr_ty key, value;
1325 asdl_seq *comps;
1326
1327 assert(NCH(n) > 3);
1328 REQ(CHILD(n, 1), COLON);
1329
1330 key = ast_for_expr(c, CHILD(n, 0));
1331 if (!key)
1332 return NULL;
1333
1334 value = ast_for_expr(c, CHILD(n, 2));
1335 if (!value)
1336 return NULL;
1337
1338 comps = ast_for_comprehension(c, CHILD(n, 3));
1339 if (!comps)
1340 return NULL;
1341
1342 return DictComp(key, value, comps, LINENO(n), n->n_col_offset, c->c_arena);
1343 }
1344
1345 static expr_ty
ast_for_genexp(struct compiling * c,const node * n)1346 ast_for_genexp(struct compiling *c, const node *n)
1347 {
1348 assert(TYPE(n) == (testlist_comp) || TYPE(n) == (argument));
1349 return ast_for_itercomp(c, n, COMP_GENEXP);
1350 }
1351
1352 static expr_ty
ast_for_setcomp(struct compiling * c,const node * n)1353 ast_for_setcomp(struct compiling *c, const node *n)
1354 {
1355 assert(TYPE(n) == (dictorsetmaker));
1356 return ast_for_itercomp(c, n, COMP_SETCOMP);
1357 }
1358
1359 static expr_ty
ast_for_atom(struct compiling * c,const node * n)1360 ast_for_atom(struct compiling *c, const node *n)
1361 {
1362 /* atom: '(' [yield_expr|testlist_comp] ')' | '[' [listmaker] ']'
1363 | '{' [dictmaker] '}' | '`' testlist '`' | NAME | NUMBER | STRING+
1364 */
1365 node *ch = CHILD(n, 0);
1366
1367 switch (TYPE(ch)) {
1368 case NAME: {
1369 /* All names start in Load context, but may later be
1370 changed. */
1371 PyObject *name = NEW_IDENTIFIER(ch);
1372 if (!name)
1373 return NULL;
1374 return Name(name, Load, LINENO(n), n->n_col_offset, c->c_arena);
1375 }
1376 case STRING: {
1377 PyObject *str = parsestrplus(c, n);
1378 if (!str) {
1379 #ifdef Py_USING_UNICODE
1380 if (PyErr_ExceptionMatches(PyExc_UnicodeError)){
1381 PyObject *type, *value, *tback, *errstr;
1382 PyErr_Fetch(&type, &value, &tback);
1383 errstr = PyObject_Str(value);
1384 if (errstr) {
1385 char *s = "";
1386 char buf[128];
1387 s = PyString_AsString(errstr);
1388 PyOS_snprintf(buf, sizeof(buf), "(unicode error) %s", s);
1389 ast_error(n, buf);
1390 Py_DECREF(errstr);
1391 } else {
1392 ast_error(n, "(unicode error) unknown error");
1393 }
1394 Py_DECREF(type);
1395 Py_DECREF(value);
1396 Py_XDECREF(tback);
1397 }
1398 #endif
1399 return NULL;
1400 }
1401 PyArena_AddPyObject(c->c_arena, str);
1402 return Str(str, LINENO(n), n->n_col_offset, c->c_arena);
1403 }
1404 case NUMBER: {
1405 PyObject *pynum = parsenumber(c, STR(ch));
1406 if (!pynum)
1407 return NULL;
1408
1409 PyArena_AddPyObject(c->c_arena, pynum);
1410 return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena);
1411 }
1412 case LPAR: /* some parenthesized expressions */
1413 ch = CHILD(n, 1);
1414
1415 if (TYPE(ch) == RPAR)
1416 return Tuple(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
1417
1418 if (TYPE(ch) == yield_expr)
1419 return ast_for_expr(c, ch);
1420
1421 return ast_for_testlist_comp(c, ch);
1422 case LSQB: /* list (or list comprehension) */
1423 ch = CHILD(n, 1);
1424
1425 if (TYPE(ch) == RSQB)
1426 return List(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
1427
1428 REQ(ch, listmaker);
1429 if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
1430 asdl_seq *elts = seq_for_testlist(c, ch);
1431 if (!elts)
1432 return NULL;
1433
1434 return List(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
1435 }
1436 else
1437 return ast_for_listcomp(c, ch);
1438 case LBRACE: {
1439 /* dictorsetmaker:
1440 * (test ':' test (comp_for | (',' test ':' test)* [','])) |
1441 * (test (comp_for | (',' test)* [',']))
1442 */
1443 int i, size;
1444 asdl_seq *keys, *values;
1445
1446 ch = CHILD(n, 1);
1447 if (TYPE(ch) == RBRACE) {
1448 /* it's an empty dict */
1449 return Dict(NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena);
1450 } else if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
1451 /* it's a simple set */
1452 asdl_seq *elts;
1453 size = (NCH(ch) + 1) / 2; /* +1 in case no trailing comma */
1454 elts = asdl_seq_new(size, c->c_arena);
1455 if (!elts)
1456 return NULL;
1457 for (i = 0; i < NCH(ch); i += 2) {
1458 expr_ty expression;
1459 expression = ast_for_expr(c, CHILD(ch, i));
1460 if (!expression)
1461 return NULL;
1462 asdl_seq_SET(elts, i / 2, expression);
1463 }
1464 return Set(elts, LINENO(n), n->n_col_offset, c->c_arena);
1465 } else if (TYPE(CHILD(ch, 1)) == comp_for) {
1466 /* it's a set comprehension */
1467 return ast_for_setcomp(c, ch);
1468 } else if (NCH(ch) > 3 && TYPE(CHILD(ch, 3)) == comp_for) {
1469 return ast_for_dictcomp(c, ch);
1470 } else {
1471 /* it's a dict */
1472 size = (NCH(ch) + 1) / 4; /* +1 in case no trailing comma */
1473 keys = asdl_seq_new(size, c->c_arena);
1474 if (!keys)
1475 return NULL;
1476
1477 values = asdl_seq_new(size, c->c_arena);
1478 if (!values)
1479 return NULL;
1480
1481 for (i = 0; i < NCH(ch); i += 4) {
1482 expr_ty expression;
1483
1484 expression = ast_for_expr(c, CHILD(ch, i));
1485 if (!expression)
1486 return NULL;
1487
1488 asdl_seq_SET(keys, i / 4, expression);
1489
1490 expression = ast_for_expr(c, CHILD(ch, i + 2));
1491 if (!expression)
1492 return NULL;
1493
1494 asdl_seq_SET(values, i / 4, expression);
1495 }
1496 return Dict(keys, values, LINENO(n), n->n_col_offset, c->c_arena);
1497 }
1498 }
1499 case BACKQUOTE: { /* repr */
1500 expr_ty expression;
1501 if (Py_Py3kWarningFlag &&
1502 !ast_warn(c, n, "backquote not supported in 3.x; use repr()"))
1503 return NULL;
1504 expression = ast_for_testlist(c, CHILD(n, 1));
1505 if (!expression)
1506 return NULL;
1507
1508 return Repr(expression, LINENO(n), n->n_col_offset, c->c_arena);
1509 }
1510 default:
1511 PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch));
1512 return NULL;
1513 }
1514 }
1515
1516 static slice_ty
ast_for_slice(struct compiling * c,const node * n)1517 ast_for_slice(struct compiling *c, const node *n)
1518 {
1519 node *ch;
1520 expr_ty lower = NULL, upper = NULL, step = NULL;
1521
1522 REQ(n, subscript);
1523
1524 /*
1525 subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
1526 sliceop: ':' [test]
1527 */
1528 ch = CHILD(n, 0);
1529 if (TYPE(ch) == DOT)
1530 return Ellipsis(c->c_arena);
1531
1532 if (NCH(n) == 1 && TYPE(ch) == test) {
1533 /* 'step' variable hold no significance in terms of being used over
1534 other vars */
1535 step = ast_for_expr(c, ch);
1536 if (!step)
1537 return NULL;
1538
1539 return Index(step, c->c_arena);
1540 }
1541
1542 if (TYPE(ch) == test) {
1543 lower = ast_for_expr(c, ch);
1544 if (!lower)
1545 return NULL;
1546 }
1547
1548 /* If there's an upper bound it's in the second or third position. */
1549 if (TYPE(ch) == COLON) {
1550 if (NCH(n) > 1) {
1551 node *n2 = CHILD(n, 1);
1552
1553 if (TYPE(n2) == test) {
1554 upper = ast_for_expr(c, n2);
1555 if (!upper)
1556 return NULL;
1557 }
1558 }
1559 } else if (NCH(n) > 2) {
1560 node *n2 = CHILD(n, 2);
1561
1562 if (TYPE(n2) == test) {
1563 upper = ast_for_expr(c, n2);
1564 if (!upper)
1565 return NULL;
1566 }
1567 }
1568
1569 ch = CHILD(n, NCH(n) - 1);
1570 if (TYPE(ch) == sliceop) {
1571 if (NCH(ch) == 1) {
1572 /*
1573 This is an extended slice (ie "x[::]") with no expression in the
1574 step field. We set this literally to "None" in order to
1575 disambiguate it from x[:]. (The interpreter might have to call
1576 __getslice__ for x[:], but it must call __getitem__ for x[::].)
1577 */
1578 identifier none = new_identifier("None", c->c_arena);
1579 if (!none)
1580 return NULL;
1581 ch = CHILD(ch, 0);
1582 step = Name(none, Load, LINENO(ch), ch->n_col_offset, c->c_arena);
1583 if (!step)
1584 return NULL;
1585 } else {
1586 ch = CHILD(ch, 1);
1587 if (TYPE(ch) == test) {
1588 step = ast_for_expr(c, ch);
1589 if (!step)
1590 return NULL;
1591 }
1592 }
1593 }
1594
1595 return Slice(lower, upper, step, c->c_arena);
1596 }
1597
1598 static expr_ty
ast_for_binop(struct compiling * c,const node * n)1599 ast_for_binop(struct compiling *c, const node *n)
1600 {
1601 /* Must account for a sequence of expressions.
1602 How should A op B op C by represented?
1603 BinOp(BinOp(A, op, B), op, C).
1604 */
1605
1606 int i, nops;
1607 expr_ty expr1, expr2, result;
1608 operator_ty newoperator;
1609
1610 expr1 = ast_for_expr(c, CHILD(n, 0));
1611 if (!expr1)
1612 return NULL;
1613
1614 expr2 = ast_for_expr(c, CHILD(n, 2));
1615 if (!expr2)
1616 return NULL;
1617
1618 newoperator = get_operator(CHILD(n, 1));
1619 if (!newoperator)
1620 return NULL;
1621
1622 result = BinOp(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
1623 c->c_arena);
1624 if (!result)
1625 return NULL;
1626
1627 nops = (NCH(n) - 1) / 2;
1628 for (i = 1; i < nops; i++) {
1629 expr_ty tmp_result, tmp;
1630 const node* next_oper = CHILD(n, i * 2 + 1);
1631
1632 newoperator = get_operator(next_oper);
1633 if (!newoperator)
1634 return NULL;
1635
1636 tmp = ast_for_expr(c, CHILD(n, i * 2 + 2));
1637 if (!tmp)
1638 return NULL;
1639
1640 tmp_result = BinOp(result, newoperator, tmp,
1641 LINENO(next_oper), next_oper->n_col_offset,
1642 c->c_arena);
1643 if (!tmp_result)
1644 return NULL;
1645 result = tmp_result;
1646 }
1647 return result;
1648 }
1649
1650 static expr_ty
ast_for_trailer(struct compiling * c,const node * n,expr_ty left_expr)1651 ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr)
1652 {
1653 /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
1654 subscriptlist: subscript (',' subscript)* [',']
1655 subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
1656 */
1657 REQ(n, trailer);
1658 if (TYPE(CHILD(n, 0)) == LPAR) {
1659 if (NCH(n) == 2)
1660 return Call(left_expr, NULL, NULL, NULL, NULL, LINENO(n),
1661 n->n_col_offset, c->c_arena);
1662 else
1663 return ast_for_call(c, CHILD(n, 1), left_expr);
1664 }
1665 else if (TYPE(CHILD(n, 0)) == DOT ) {
1666 PyObject *attr_id = NEW_IDENTIFIER(CHILD(n, 1));
1667 if (!attr_id)
1668 return NULL;
1669 return Attribute(left_expr, attr_id, Load,
1670 LINENO(n), n->n_col_offset, c->c_arena);
1671 }
1672 else {
1673 REQ(CHILD(n, 0), LSQB);
1674 REQ(CHILD(n, 2), RSQB);
1675 n = CHILD(n, 1);
1676 if (NCH(n) == 1) {
1677 slice_ty slc = ast_for_slice(c, CHILD(n, 0));
1678 if (!slc)
1679 return NULL;
1680 return Subscript(left_expr, slc, Load, LINENO(n), n->n_col_offset,
1681 c->c_arena);
1682 }
1683 else {
1684 /* The grammar is ambiguous here. The ambiguity is resolved
1685 by treating the sequence as a tuple literal if there are
1686 no slice features.
1687 */
1688 int j;
1689 slice_ty slc;
1690 expr_ty e;
1691 bool simple = true;
1692 asdl_seq *slices, *elts;
1693 slices = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1694 if (!slices)
1695 return NULL;
1696 for (j = 0; j < NCH(n); j += 2) {
1697 slc = ast_for_slice(c, CHILD(n, j));
1698 if (!slc)
1699 return NULL;
1700 if (slc->kind != Index_kind)
1701 simple = false;
1702 asdl_seq_SET(slices, j / 2, slc);
1703 }
1704 if (!simple) {
1705 return Subscript(left_expr, ExtSlice(slices, c->c_arena),
1706 Load, LINENO(n), n->n_col_offset, c->c_arena);
1707 }
1708 /* extract Index values and put them in a Tuple */
1709 elts = asdl_seq_new(asdl_seq_LEN(slices), c->c_arena);
1710 if (!elts)
1711 return NULL;
1712 for (j = 0; j < asdl_seq_LEN(slices); ++j) {
1713 slc = (slice_ty)asdl_seq_GET(slices, j);
1714 assert(slc->kind == Index_kind && slc->v.Index.value);
1715 asdl_seq_SET(elts, j, slc->v.Index.value);
1716 }
1717 e = Tuple(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
1718 if (!e)
1719 return NULL;
1720 return Subscript(left_expr, Index(e, c->c_arena),
1721 Load, LINENO(n), n->n_col_offset, c->c_arena);
1722 }
1723 }
1724 }
1725
1726 static expr_ty
ast_for_factor(struct compiling * c,const node * n)1727 ast_for_factor(struct compiling *c, const node *n)
1728 {
1729 node *pfactor, *ppower, *patom, *pnum;
1730 expr_ty expression;
1731
1732 /* If the unary - operator is applied to a constant, don't generate
1733 a UNARY_NEGATIVE opcode. Just store the approriate value as a
1734 constant. The peephole optimizer already does something like
1735 this but it doesn't handle the case where the constant is
1736 (sys.maxint - 1). In that case, we want a PyIntObject, not a
1737 PyLongObject.
1738 */
1739 if (TYPE(CHILD(n, 0)) == MINUS &&
1740 NCH(n) == 2 &&
1741 TYPE((pfactor = CHILD(n, 1))) == factor &&
1742 NCH(pfactor) == 1 &&
1743 TYPE((ppower = CHILD(pfactor, 0))) == power &&
1744 NCH(ppower) == 1 &&
1745 TYPE((patom = CHILD(ppower, 0))) == atom &&
1746 TYPE((pnum = CHILD(patom, 0))) == NUMBER) {
1747 PyObject *pynum;
1748 char *s = PyObject_MALLOC(strlen(STR(pnum)) + 2);
1749 if (s == NULL)
1750 return NULL;
1751 s[0] = '-';
1752 strcpy(s + 1, STR(pnum));
1753 pynum = parsenumber(c, s);
1754 PyObject_FREE(s);
1755 if (!pynum)
1756 return NULL;
1757
1758 PyArena_AddPyObject(c->c_arena, pynum);
1759 return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena);
1760 }
1761
1762 expression = ast_for_expr(c, CHILD(n, 1));
1763 if (!expression)
1764 return NULL;
1765
1766 switch (TYPE(CHILD(n, 0))) {
1767 case PLUS:
1768 return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset,
1769 c->c_arena);
1770 case MINUS:
1771 return UnaryOp(USub, expression, LINENO(n), n->n_col_offset,
1772 c->c_arena);
1773 case TILDE:
1774 return UnaryOp(Invert, expression, LINENO(n),
1775 n->n_col_offset, c->c_arena);
1776 }
1777 PyErr_Format(PyExc_SystemError, "unhandled factor: %d",
1778 TYPE(CHILD(n, 0)));
1779 return NULL;
1780 }
1781
1782 static expr_ty
ast_for_power(struct compiling * c,const node * n)1783 ast_for_power(struct compiling *c, const node *n)
1784 {
1785 /* power: atom trailer* ('**' factor)*
1786 */
1787 int i;
1788 expr_ty e, tmp;
1789 REQ(n, power);
1790 e = ast_for_atom(c, CHILD(n, 0));
1791 if (!e)
1792 return NULL;
1793 if (NCH(n) == 1)
1794 return e;
1795 for (i = 1; i < NCH(n); i++) {
1796 node *ch = CHILD(n, i);
1797 if (TYPE(ch) != trailer)
1798 break;
1799 tmp = ast_for_trailer(c, ch, e);
1800 if (!tmp)
1801 return NULL;
1802 tmp->lineno = e->lineno;
1803 tmp->col_offset = e->col_offset;
1804 e = tmp;
1805 }
1806 if (TYPE(CHILD(n, NCH(n) - 1)) == factor) {
1807 expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1));
1808 if (!f)
1809 return NULL;
1810 tmp = BinOp(e, Pow, f, LINENO(n), n->n_col_offset, c->c_arena);
1811 if (!tmp)
1812 return NULL;
1813 e = tmp;
1814 }
1815 return e;
1816 }
1817
1818 /* Do not name a variable 'expr'! Will cause a compile error.
1819 */
1820
1821 static expr_ty
ast_for_expr(struct compiling * c,const node * n)1822 ast_for_expr(struct compiling *c, const node *n)
1823 {
1824 /* handle the full range of simple expressions
1825 test: or_test ['if' or_test 'else' test] | lambdef
1826 or_test: and_test ('or' and_test)*
1827 and_test: not_test ('and' not_test)*
1828 not_test: 'not' not_test | comparison
1829 comparison: expr (comp_op expr)*
1830 expr: xor_expr ('|' xor_expr)*
1831 xor_expr: and_expr ('^' and_expr)*
1832 and_expr: shift_expr ('&' shift_expr)*
1833 shift_expr: arith_expr (('<<'|'>>') arith_expr)*
1834 arith_expr: term (('+'|'-') term)*
1835 term: factor (('*'|'/'|'%'|'//') factor)*
1836 factor: ('+'|'-'|'~') factor | power
1837 power: atom trailer* ('**' factor)*
1838
1839 As well as modified versions that exist for backward compatibility,
1840 to explicitly allow:
1841 [ x for x in lambda: 0, lambda: 1 ]
1842 (which would be ambiguous without these extra rules)
1843
1844 old_test: or_test | old_lambdef
1845 old_lambdef: 'lambda' [vararglist] ':' old_test
1846
1847 */
1848
1849 asdl_seq *seq;
1850 int i;
1851
1852 loop:
1853 switch (TYPE(n)) {
1854 case test:
1855 case old_test:
1856 if (TYPE(CHILD(n, 0)) == lambdef ||
1857 TYPE(CHILD(n, 0)) == old_lambdef)
1858 return ast_for_lambdef(c, CHILD(n, 0));
1859 else if (NCH(n) > 1)
1860 return ast_for_ifexpr(c, n);
1861 /* Fallthrough */
1862 case or_test:
1863 case and_test:
1864 if (NCH(n) == 1) {
1865 n = CHILD(n, 0);
1866 goto loop;
1867 }
1868 seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1869 if (!seq)
1870 return NULL;
1871 for (i = 0; i < NCH(n); i += 2) {
1872 expr_ty e = ast_for_expr(c, CHILD(n, i));
1873 if (!e)
1874 return NULL;
1875 asdl_seq_SET(seq, i / 2, e);
1876 }
1877 if (!strcmp(STR(CHILD(n, 1)), "and"))
1878 return BoolOp(And, seq, LINENO(n), n->n_col_offset,
1879 c->c_arena);
1880 assert(!strcmp(STR(CHILD(n, 1)), "or"));
1881 return BoolOp(Or, seq, LINENO(n), n->n_col_offset, c->c_arena);
1882 case not_test:
1883 if (NCH(n) == 1) {
1884 n = CHILD(n, 0);
1885 goto loop;
1886 }
1887 else {
1888 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
1889 if (!expression)
1890 return NULL;
1891
1892 return UnaryOp(Not, expression, LINENO(n), n->n_col_offset,
1893 c->c_arena);
1894 }
1895 case comparison:
1896 if (NCH(n) == 1) {
1897 n = CHILD(n, 0);
1898 goto loop;
1899 }
1900 else {
1901 expr_ty expression;
1902 asdl_int_seq *ops;
1903 asdl_seq *cmps;
1904 ops = asdl_int_seq_new(NCH(n) / 2, c->c_arena);
1905 if (!ops)
1906 return NULL;
1907 cmps = asdl_seq_new(NCH(n) / 2, c->c_arena);
1908 if (!cmps) {
1909 return NULL;
1910 }
1911 for (i = 1; i < NCH(n); i += 2) {
1912 cmpop_ty newoperator;
1913
1914 newoperator = ast_for_comp_op(c, CHILD(n, i));
1915 if (!newoperator) {
1916 return NULL;
1917 }
1918
1919 expression = ast_for_expr(c, CHILD(n, i + 1));
1920 if (!expression) {
1921 return NULL;
1922 }
1923
1924 asdl_seq_SET(ops, i / 2, newoperator);
1925 asdl_seq_SET(cmps, i / 2, expression);
1926 }
1927 expression = ast_for_expr(c, CHILD(n, 0));
1928 if (!expression) {
1929 return NULL;
1930 }
1931
1932 return Compare(expression, ops, cmps, LINENO(n),
1933 n->n_col_offset, c->c_arena);
1934 }
1935 break;
1936
1937 /* The next five cases all handle BinOps. The main body of code
1938 is the same in each case, but the switch turned inside out to
1939 reuse the code for each type of operator.
1940 */
1941 case expr:
1942 case xor_expr:
1943 case and_expr:
1944 case shift_expr:
1945 case arith_expr:
1946 case term:
1947 if (NCH(n) == 1) {
1948 n = CHILD(n, 0);
1949 goto loop;
1950 }
1951 return ast_for_binop(c, n);
1952 case yield_expr: {
1953 expr_ty exp = NULL;
1954 if (NCH(n) == 2) {
1955 exp = ast_for_testlist(c, CHILD(n, 1));
1956 if (!exp)
1957 return NULL;
1958 }
1959 return Yield(exp, LINENO(n), n->n_col_offset, c->c_arena);
1960 }
1961 case factor:
1962 if (NCH(n) == 1) {
1963 n = CHILD(n, 0);
1964 goto loop;
1965 }
1966 return ast_for_factor(c, n);
1967 case power:
1968 return ast_for_power(c, n);
1969 default:
1970 PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n));
1971 return NULL;
1972 }
1973 /* should never get here unless if error is set */
1974 return NULL;
1975 }
1976
1977 static expr_ty
ast_for_call(struct compiling * c,const node * n,expr_ty func)1978 ast_for_call(struct compiling *c, const node *n, expr_ty func)
1979 {
1980 /*
1981 arglist: (argument ',')* (argument [',']| '*' test [',' '**' test]
1982 | '**' test)
1983 argument: [test '='] test [comp_for] # Really [keyword '='] test
1984 */
1985
1986 int i, nargs, nkeywords, ngens;
1987 asdl_seq *args;
1988 asdl_seq *keywords;
1989 expr_ty vararg = NULL, kwarg = NULL;
1990
1991 REQ(n, arglist);
1992
1993 nargs = 0;
1994 nkeywords = 0;
1995 ngens = 0;
1996 for (i = 0; i < NCH(n); i++) {
1997 node *ch = CHILD(n, i);
1998 if (TYPE(ch) == argument) {
1999 if (NCH(ch) == 1)
2000 nargs++;
2001 else if (TYPE(CHILD(ch, 1)) == comp_for)
2002 ngens++;
2003 else
2004 nkeywords++;
2005 }
2006 }
2007 if (ngens > 1 || (ngens && (nargs || nkeywords))) {
2008 ast_error(n, "Generator expression must be parenthesized "
2009 "if not sole argument");
2010 return NULL;
2011 }
2012
2013 if (nargs + nkeywords + ngens > 255) {
2014 ast_error(n, "more than 255 arguments");
2015 return NULL;
2016 }
2017
2018 args = asdl_seq_new(nargs + ngens, c->c_arena);
2019 if (!args)
2020 return NULL;
2021 keywords = asdl_seq_new(nkeywords, c->c_arena);
2022 if (!keywords)
2023 return NULL;
2024 nargs = 0;
2025 nkeywords = 0;
2026 for (i = 0; i < NCH(n); i++) {
2027 node *ch = CHILD(n, i);
2028 if (TYPE(ch) == argument) {
2029 expr_ty e;
2030 if (NCH(ch) == 1) {
2031 if (nkeywords) {
2032 ast_error(CHILD(ch, 0),
2033 "non-keyword arg after keyword arg");
2034 return NULL;
2035 }
2036 if (vararg) {
2037 ast_error(CHILD(ch, 0),
2038 "only named arguments may follow *expression");
2039 return NULL;
2040 }
2041 e = ast_for_expr(c, CHILD(ch, 0));
2042 if (!e)
2043 return NULL;
2044 asdl_seq_SET(args, nargs++, e);
2045 }
2046 else if (TYPE(CHILD(ch, 1)) == comp_for) {
2047 e = ast_for_genexp(c, ch);
2048 if (!e)
2049 return NULL;
2050 asdl_seq_SET(args, nargs++, e);
2051 }
2052 else {
2053 keyword_ty kw;
2054 identifier key;
2055 int k;
2056 char *tmp;
2057
2058 /* CHILD(ch, 0) is test, but must be an identifier? */
2059 e = ast_for_expr(c, CHILD(ch, 0));
2060 if (!e)
2061 return NULL;
2062 /* f(lambda x: x[0] = 3) ends up getting parsed with
2063 * LHS test = lambda x: x[0], and RHS test = 3.
2064 * SF bug 132313 points out that complaining about a keyword
2065 * then is very confusing.
2066 */
2067 if (e->kind == Lambda_kind) {
2068 ast_error(CHILD(ch, 0),
2069 "lambda cannot contain assignment");
2070 return NULL;
2071 } else if (e->kind != Name_kind) {
2072 ast_error(CHILD(ch, 0), "keyword can't be an expression");
2073 return NULL;
2074 }
2075 key = e->v.Name.id;
2076 if (!forbidden_check(c, CHILD(ch, 0), PyBytes_AS_STRING(key)))
2077 return NULL;
2078 for (k = 0; k < nkeywords; k++) {
2079 tmp = PyString_AS_STRING(
2080 ((keyword_ty)asdl_seq_GET(keywords, k))->arg);
2081 if (!strcmp(tmp, PyString_AS_STRING(key))) {
2082 ast_error(CHILD(ch, 0), "keyword argument repeated");
2083 return NULL;
2084 }
2085 }
2086 e = ast_for_expr(c, CHILD(ch, 2));
2087 if (!e)
2088 return NULL;
2089 kw = keyword(key, e, c->c_arena);
2090 if (!kw)
2091 return NULL;
2092 asdl_seq_SET(keywords, nkeywords++, kw);
2093 }
2094 }
2095 else if (TYPE(ch) == STAR) {
2096 vararg = ast_for_expr(c, CHILD(n, i+1));
2097 if (!vararg)
2098 return NULL;
2099 i++;
2100 }
2101 else if (TYPE(ch) == DOUBLESTAR) {
2102 kwarg = ast_for_expr(c, CHILD(n, i+1));
2103 if (!kwarg)
2104 return NULL;
2105 i++;
2106 }
2107 }
2108
2109 return Call(func, args, keywords, vararg, kwarg, func->lineno,
2110 func->col_offset, c->c_arena);
2111 }
2112
2113 static expr_ty
ast_for_testlist(struct compiling * c,const node * n)2114 ast_for_testlist(struct compiling *c, const node* n)
2115 {
2116 /* testlist_comp: test (',' test)* [','] */
2117 /* testlist: test (',' test)* [','] */
2118 /* testlist_safe: test (',' test)+ [','] */
2119 /* testlist1: test (',' test)* */
2120 assert(NCH(n) > 0);
2121 if (TYPE(n) == testlist_comp) {
2122 if (NCH(n) > 1)
2123 assert(TYPE(CHILD(n, 1)) != comp_for);
2124 }
2125 else {
2126 assert(TYPE(n) == testlist ||
2127 TYPE(n) == testlist_safe ||
2128 TYPE(n) == testlist1);
2129 }
2130 if (NCH(n) == 1)
2131 return ast_for_expr(c, CHILD(n, 0));
2132 else {
2133 asdl_seq *tmp = seq_for_testlist(c, n);
2134 if (!tmp)
2135 return NULL;
2136 return Tuple(tmp, Load, LINENO(n), n->n_col_offset, c->c_arena);
2137 }
2138 }
2139
2140 static expr_ty
ast_for_testlist_comp(struct compiling * c,const node * n)2141 ast_for_testlist_comp(struct compiling *c, const node* n)
2142 {
2143 /* testlist_comp: test ( comp_for | (',' test)* [','] ) */
2144 /* argument: test [ comp_for ] */
2145 assert(TYPE(n) == testlist_comp || TYPE(n) == argument);
2146 if (NCH(n) > 1 && TYPE(CHILD(n, 1)) == comp_for)
2147 return ast_for_genexp(c, n);
2148 return ast_for_testlist(c, n);
2149 }
2150
2151 /* like ast_for_testlist() but returns a sequence */
2152 static asdl_seq*
ast_for_class_bases(struct compiling * c,const node * n)2153 ast_for_class_bases(struct compiling *c, const node* n)
2154 {
2155 /* testlist: test (',' test)* [','] */
2156 assert(NCH(n) > 0);
2157 REQ(n, testlist);
2158 if (NCH(n) == 1) {
2159 expr_ty base;
2160 asdl_seq *bases = asdl_seq_new(1, c->c_arena);
2161 if (!bases)
2162 return NULL;
2163 base = ast_for_expr(c, CHILD(n, 0));
2164 if (!base)
2165 return NULL;
2166 asdl_seq_SET(bases, 0, base);
2167 return bases;
2168 }
2169
2170 return seq_for_testlist(c, n);
2171 }
2172
2173 static stmt_ty
ast_for_expr_stmt(struct compiling * c,const node * n)2174 ast_for_expr_stmt(struct compiling *c, const node *n)
2175 {
2176 REQ(n, expr_stmt);
2177 /* expr_stmt: testlist (augassign (yield_expr|testlist)
2178 | ('=' (yield_expr|testlist))*)
2179 testlist: test (',' test)* [',']
2180 augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^='
2181 | '<<=' | '>>=' | '**=' | '//='
2182 test: ... here starts the operator precedence dance
2183 */
2184
2185 if (NCH(n) == 1) {
2186 expr_ty e = ast_for_testlist(c, CHILD(n, 0));
2187 if (!e)
2188 return NULL;
2189
2190 return Expr(e, LINENO(n), n->n_col_offset, c->c_arena);
2191 }
2192 else if (TYPE(CHILD(n, 1)) == augassign) {
2193 expr_ty expr1, expr2;
2194 operator_ty newoperator;
2195 node *ch = CHILD(n, 0);
2196
2197 expr1 = ast_for_testlist(c, ch);
2198 if (!expr1)
2199 return NULL;
2200 if(!set_context(c, expr1, Store, ch))
2201 return NULL;
2202 /* set_context checks that most expressions are not the left side.
2203 Augmented assignments can only have a name, a subscript, or an
2204 attribute on the left, though, so we have to explicitly check for
2205 those. */
2206 switch (expr1->kind) {
2207 case Name_kind:
2208 case Attribute_kind:
2209 case Subscript_kind:
2210 break;
2211 default:
2212 ast_error(ch, "illegal expression for augmented assignment");
2213 return NULL;
2214 }
2215
2216 ch = CHILD(n, 2);
2217 if (TYPE(ch) == testlist)
2218 expr2 = ast_for_testlist(c, ch);
2219 else
2220 expr2 = ast_for_expr(c, ch);
2221 if (!expr2)
2222 return NULL;
2223
2224 newoperator = ast_for_augassign(c, CHILD(n, 1));
2225 if (!newoperator)
2226 return NULL;
2227
2228 return AugAssign(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
2229 c->c_arena);
2230 }
2231 else {
2232 int i;
2233 asdl_seq *targets;
2234 node *value;
2235 expr_ty expression;
2236
2237 /* a normal assignment */
2238 REQ(CHILD(n, 1), EQUAL);
2239 targets = asdl_seq_new(NCH(n) / 2, c->c_arena);
2240 if (!targets)
2241 return NULL;
2242 for (i = 0; i < NCH(n) - 2; i += 2) {
2243 expr_ty e;
2244 node *ch = CHILD(n, i);
2245 if (TYPE(ch) == yield_expr) {
2246 ast_error(ch, "assignment to yield expression not possible");
2247 return NULL;
2248 }
2249 e = ast_for_testlist(c, ch);
2250 if (!e)
2251 return NULL;
2252
2253 /* set context to assign */
2254 if (!set_context(c, e, Store, CHILD(n, i)))
2255 return NULL;
2256
2257 asdl_seq_SET(targets, i / 2, e);
2258 }
2259 value = CHILD(n, NCH(n) - 1);
2260 if (TYPE(value) == testlist)
2261 expression = ast_for_testlist(c, value);
2262 else
2263 expression = ast_for_expr(c, value);
2264 if (!expression)
2265 return NULL;
2266 return Assign(targets, expression, LINENO(n), n->n_col_offset,
2267 c->c_arena);
2268 }
2269 }
2270
2271 static stmt_ty
ast_for_print_stmt(struct compiling * c,const node * n)2272 ast_for_print_stmt(struct compiling *c, const node *n)
2273 {
2274 /* print_stmt: 'print' ( [ test (',' test)* [','] ]
2275 | '>>' test [ (',' test)+ [','] ] )
2276 */
2277 expr_ty dest = NULL, expression;
2278 asdl_seq *seq = NULL;
2279 bool nl;
2280 int i, j, values_count, start = 1;
2281
2282 REQ(n, print_stmt);
2283 if (NCH(n) >= 2 && TYPE(CHILD(n, 1)) == RIGHTSHIFT) {
2284 dest = ast_for_expr(c, CHILD(n, 2));
2285 if (!dest)
2286 return NULL;
2287 start = 4;
2288 }
2289 values_count = (NCH(n) + 1 - start) / 2;
2290 if (values_count) {
2291 seq = asdl_seq_new(values_count, c->c_arena);
2292 if (!seq)
2293 return NULL;
2294 for (i = start, j = 0; i < NCH(n); i += 2, ++j) {
2295 expression = ast_for_expr(c, CHILD(n, i));
2296 if (!expression)
2297 return NULL;
2298 asdl_seq_SET(seq, j, expression);
2299 }
2300 }
2301 nl = (TYPE(CHILD(n, NCH(n) - 1)) == COMMA) ? false : true;
2302 return Print(dest, seq, nl, LINENO(n), n->n_col_offset, c->c_arena);
2303 }
2304
2305 static asdl_seq *
ast_for_exprlist(struct compiling * c,const node * n,expr_context_ty context)2306 ast_for_exprlist(struct compiling *c, const node *n, expr_context_ty context)
2307 {
2308 asdl_seq *seq;
2309 int i;
2310 expr_ty e;
2311
2312 REQ(n, exprlist);
2313
2314 seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2315 if (!seq)
2316 return NULL;
2317 for (i = 0; i < NCH(n); i += 2) {
2318 e = ast_for_expr(c, CHILD(n, i));
2319 if (!e)
2320 return NULL;
2321 asdl_seq_SET(seq, i / 2, e);
2322 if (context && !set_context(c, e, context, CHILD(n, i)))
2323 return NULL;
2324 }
2325 return seq;
2326 }
2327
2328 static stmt_ty
ast_for_del_stmt(struct compiling * c,const node * n)2329 ast_for_del_stmt(struct compiling *c, const node *n)
2330 {
2331 asdl_seq *expr_list;
2332
2333 /* del_stmt: 'del' exprlist */
2334 REQ(n, del_stmt);
2335
2336 expr_list = ast_for_exprlist(c, CHILD(n, 1), Del);
2337 if (!expr_list)
2338 return NULL;
2339 return Delete(expr_list, LINENO(n), n->n_col_offset, c->c_arena);
2340 }
2341
2342 static stmt_ty
ast_for_flow_stmt(struct compiling * c,const node * n)2343 ast_for_flow_stmt(struct compiling *c, const node *n)
2344 {
2345 /*
2346 flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt
2347 | yield_stmt
2348 break_stmt: 'break'
2349 continue_stmt: 'continue'
2350 return_stmt: 'return' [testlist]
2351 yield_stmt: yield_expr
2352 yield_expr: 'yield' testlist
2353 raise_stmt: 'raise' [test [',' test [',' test]]]
2354 */
2355 node *ch;
2356
2357 REQ(n, flow_stmt);
2358 ch = CHILD(n, 0);
2359 switch (TYPE(ch)) {
2360 case break_stmt:
2361 return Break(LINENO(n), n->n_col_offset, c->c_arena);
2362 case continue_stmt:
2363 return Continue(LINENO(n), n->n_col_offset, c->c_arena);
2364 case yield_stmt: { /* will reduce to yield_expr */
2365 expr_ty exp = ast_for_expr(c, CHILD(ch, 0));
2366 if (!exp)
2367 return NULL;
2368 return Expr(exp, LINENO(n), n->n_col_offset, c->c_arena);
2369 }
2370 case return_stmt:
2371 if (NCH(ch) == 1)
2372 return Return(NULL, LINENO(n), n->n_col_offset, c->c_arena);
2373 else {
2374 expr_ty expression = ast_for_testlist(c, CHILD(ch, 1));
2375 if (!expression)
2376 return NULL;
2377 return Return(expression, LINENO(n), n->n_col_offset,
2378 c->c_arena);
2379 }
2380 case raise_stmt:
2381 if (NCH(ch) == 1)
2382 return Raise(NULL, NULL, NULL, LINENO(n), n->n_col_offset,
2383 c->c_arena);
2384 else if (NCH(ch) == 2) {
2385 expr_ty expression = ast_for_expr(c, CHILD(ch, 1));
2386 if (!expression)
2387 return NULL;
2388 return Raise(expression, NULL, NULL, LINENO(n),
2389 n->n_col_offset, c->c_arena);
2390 }
2391 else if (NCH(ch) == 4) {
2392 expr_ty expr1, expr2;
2393
2394 expr1 = ast_for_expr(c, CHILD(ch, 1));
2395 if (!expr1)
2396 return NULL;
2397 expr2 = ast_for_expr(c, CHILD(ch, 3));
2398 if (!expr2)
2399 return NULL;
2400
2401 return Raise(expr1, expr2, NULL, LINENO(n), n->n_col_offset,
2402 c->c_arena);
2403 }
2404 else if (NCH(ch) == 6) {
2405 expr_ty expr1, expr2, expr3;
2406
2407 expr1 = ast_for_expr(c, CHILD(ch, 1));
2408 if (!expr1)
2409 return NULL;
2410 expr2 = ast_for_expr(c, CHILD(ch, 3));
2411 if (!expr2)
2412 return NULL;
2413 expr3 = ast_for_expr(c, CHILD(ch, 5));
2414 if (!expr3)
2415 return NULL;
2416
2417 return Raise(expr1, expr2, expr3, LINENO(n), n->n_col_offset,
2418 c->c_arena);
2419 }
2420 default:
2421 PyErr_Format(PyExc_SystemError,
2422 "unexpected flow_stmt: %d", TYPE(ch));
2423 return NULL;
2424 }
2425
2426 PyErr_SetString(PyExc_SystemError, "unhandled flow statement");
2427 return NULL;
2428 }
2429
2430 static alias_ty
alias_for_import_name(struct compiling * c,const node * n,int store)2431 alias_for_import_name(struct compiling *c, const node *n, int store)
2432 {
2433 /*
2434 import_as_name: NAME ['as' NAME]
2435 dotted_as_name: dotted_name ['as' NAME]
2436 dotted_name: NAME ('.' NAME)*
2437 */
2438 PyObject *str, *name;
2439
2440 loop:
2441 switch (TYPE(n)) {
2442 case import_as_name: {
2443 node *name_node = CHILD(n, 0);
2444 str = NULL;
2445 if (NCH(n) == 3) {
2446 node *str_node = CHILD(n, 2);
2447 if (store && !forbidden_check(c, str_node, STR(str_node)))
2448 return NULL;
2449 str = NEW_IDENTIFIER(str_node);
2450 if (!str)
2451 return NULL;
2452 }
2453 else {
2454 if (!forbidden_check(c, name_node, STR(name_node)))
2455 return NULL;
2456 }
2457 name = NEW_IDENTIFIER(name_node);
2458 if (!name)
2459 return NULL;
2460 return alias(name, str, c->c_arena);
2461 }
2462 case dotted_as_name:
2463 if (NCH(n) == 1) {
2464 n = CHILD(n, 0);
2465 goto loop;
2466 }
2467 else {
2468 node *asname_node = CHILD(n, 2);
2469 alias_ty a = alias_for_import_name(c, CHILD(n, 0), 0);
2470 if (!a)
2471 return NULL;
2472 assert(!a->asname);
2473 if (!forbidden_check(c, asname_node, STR(asname_node)))
2474 return NULL;
2475 a->asname = NEW_IDENTIFIER(asname_node);
2476 if (!a->asname)
2477 return NULL;
2478 return a;
2479 }
2480 break;
2481 case dotted_name:
2482 if (NCH(n) == 1) {
2483 node *name_node = CHILD(n, 0);
2484 if (store && !forbidden_check(c, name_node, STR(name_node)))
2485 return NULL;
2486 name = NEW_IDENTIFIER(name_node);
2487 if (!name)
2488 return NULL;
2489 return alias(name, NULL, c->c_arena);
2490 }
2491 else {
2492 /* Create a string of the form "a.b.c" */
2493 int i;
2494 size_t len;
2495 char *s;
2496
2497 len = 0;
2498 for (i = 0; i < NCH(n); i += 2)
2499 /* length of string plus one for the dot */
2500 len += strlen(STR(CHILD(n, i))) + 1;
2501 len--; /* the last name doesn't have a dot */
2502 str = PyString_FromStringAndSize(NULL, len);
2503 if (!str)
2504 return NULL;
2505 s = PyString_AS_STRING(str);
2506 if (!s)
2507 return NULL;
2508 for (i = 0; i < NCH(n); i += 2) {
2509 char *sch = STR(CHILD(n, i));
2510 strcpy(s, STR(CHILD(n, i)));
2511 s += strlen(sch);
2512 *s++ = '.';
2513 }
2514 --s;
2515 *s = '\0';
2516 PyString_InternInPlace(&str);
2517 PyArena_AddPyObject(c->c_arena, str);
2518 return alias(str, NULL, c->c_arena);
2519 }
2520 break;
2521 case STAR:
2522 str = PyString_InternFromString("*");
2523 if (!str)
2524 return NULL;
2525 PyArena_AddPyObject(c->c_arena, str);
2526 return alias(str, NULL, c->c_arena);
2527 default:
2528 PyErr_Format(PyExc_SystemError,
2529 "unexpected import name: %d", TYPE(n));
2530 return NULL;
2531 }
2532
2533 PyErr_SetString(PyExc_SystemError, "unhandled import name condition");
2534 return NULL;
2535 }
2536
2537 static stmt_ty
ast_for_import_stmt(struct compiling * c,const node * n)2538 ast_for_import_stmt(struct compiling *c, const node *n)
2539 {
2540 /*
2541 import_stmt: import_name | import_from
2542 import_name: 'import' dotted_as_names
2543 import_from: 'from' ('.'* dotted_name | '.') 'import'
2544 ('*' | '(' import_as_names ')' | import_as_names)
2545 */
2546 int lineno;
2547 int col_offset;
2548 int i;
2549 asdl_seq *aliases;
2550
2551 REQ(n, import_stmt);
2552 lineno = LINENO(n);
2553 col_offset = n->n_col_offset;
2554 n = CHILD(n, 0);
2555 if (TYPE(n) == import_name) {
2556 n = CHILD(n, 1);
2557 REQ(n, dotted_as_names);
2558 aliases = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2559 if (!aliases)
2560 return NULL;
2561 for (i = 0; i < NCH(n); i += 2) {
2562 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
2563 if (!import_alias)
2564 return NULL;
2565 asdl_seq_SET(aliases, i / 2, import_alias);
2566 }
2567 return Import(aliases, lineno, col_offset, c->c_arena);
2568 }
2569 else if (TYPE(n) == import_from) {
2570 int n_children;
2571 int idx, ndots = 0;
2572 alias_ty mod = NULL;
2573 identifier modname = NULL;
2574
2575 /* Count the number of dots (for relative imports) and check for the
2576 optional module name */
2577 for (idx = 1; idx < NCH(n); idx++) {
2578 if (TYPE(CHILD(n, idx)) == dotted_name) {
2579 mod = alias_for_import_name(c, CHILD(n, idx), 0);
2580 if (!mod)
2581 return NULL;
2582 idx++;
2583 break;
2584 } else if (TYPE(CHILD(n, idx)) != DOT) {
2585 break;
2586 }
2587 ndots++;
2588 }
2589 idx++; /* skip over the 'import' keyword */
2590 switch (TYPE(CHILD(n, idx))) {
2591 case STAR:
2592 /* from ... import * */
2593 n = CHILD(n, idx);
2594 n_children = 1;
2595 break;
2596 case LPAR:
2597 /* from ... import (x, y, z) */
2598 n = CHILD(n, idx + 1);
2599 n_children = NCH(n);
2600 break;
2601 case import_as_names:
2602 /* from ... import x, y, z */
2603 n = CHILD(n, idx);
2604 n_children = NCH(n);
2605 if (n_children % 2 == 0) {
2606 ast_error(n, "trailing comma not allowed without"
2607 " surrounding parentheses");
2608 return NULL;
2609 }
2610 break;
2611 default:
2612 ast_error(n, "Unexpected node-type in from-import");
2613 return NULL;
2614 }
2615
2616 aliases = asdl_seq_new((n_children + 1) / 2, c->c_arena);
2617 if (!aliases)
2618 return NULL;
2619
2620 /* handle "from ... import *" special b/c there's no children */
2621 if (TYPE(n) == STAR) {
2622 alias_ty import_alias = alias_for_import_name(c, n, 1);
2623 if (!import_alias)
2624 return NULL;
2625 asdl_seq_SET(aliases, 0, import_alias);
2626 }
2627 else {
2628 for (i = 0; i < NCH(n); i += 2) {
2629 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
2630 if (!import_alias)
2631 return NULL;
2632 asdl_seq_SET(aliases, i / 2, import_alias);
2633 }
2634 }
2635 if (mod != NULL)
2636 modname = mod->name;
2637 return ImportFrom(modname, aliases, ndots, lineno, col_offset,
2638 c->c_arena);
2639 }
2640 PyErr_Format(PyExc_SystemError,
2641 "unknown import statement: starts with command '%s'",
2642 STR(CHILD(n, 0)));
2643 return NULL;
2644 }
2645
2646 static stmt_ty
ast_for_global_stmt(struct compiling * c,const node * n)2647 ast_for_global_stmt(struct compiling *c, const node *n)
2648 {
2649 /* global_stmt: 'global' NAME (',' NAME)* */
2650 identifier name;
2651 asdl_seq *s;
2652 int i;
2653
2654 REQ(n, global_stmt);
2655 s = asdl_seq_new(NCH(n) / 2, c->c_arena);
2656 if (!s)
2657 return NULL;
2658 for (i = 1; i < NCH(n); i += 2) {
2659 name = NEW_IDENTIFIER(CHILD(n, i));
2660 if (!name)
2661 return NULL;
2662 asdl_seq_SET(s, i / 2, name);
2663 }
2664 return Global(s, LINENO(n), n->n_col_offset, c->c_arena);
2665 }
2666
2667 static stmt_ty
ast_for_exec_stmt(struct compiling * c,const node * n)2668 ast_for_exec_stmt(struct compiling *c, const node *n)
2669 {
2670 expr_ty expr1, globals = NULL, locals = NULL;
2671 int n_children = NCH(n);
2672 if (n_children != 2 && n_children != 4 && n_children != 6) {
2673 PyErr_Format(PyExc_SystemError,
2674 "poorly formed 'exec' statement: %d parts to statement",
2675 n_children);
2676 return NULL;
2677 }
2678
2679 /* exec_stmt: 'exec' expr ['in' test [',' test]] */
2680 REQ(n, exec_stmt);
2681 expr1 = ast_for_expr(c, CHILD(n, 1));
2682 if (!expr1)
2683 return NULL;
2684
2685 if (expr1->kind == Tuple_kind && n_children < 4 &&
2686 (asdl_seq_LEN(expr1->v.Tuple.elts) == 2 ||
2687 asdl_seq_LEN(expr1->v.Tuple.elts) == 3)) {
2688 /* Backwards compatibility: passing exec args as a tuple */
2689 globals = asdl_seq_GET(expr1->v.Tuple.elts, 1);
2690 if (asdl_seq_LEN(expr1->v.Tuple.elts) == 3) {
2691 locals = asdl_seq_GET(expr1->v.Tuple.elts, 2);
2692 }
2693 expr1 = asdl_seq_GET(expr1->v.Tuple.elts, 0);
2694 }
2695
2696 if (n_children >= 4) {
2697 globals = ast_for_expr(c, CHILD(n, 3));
2698 if (!globals)
2699 return NULL;
2700 }
2701 if (n_children == 6) {
2702 locals = ast_for_expr(c, CHILD(n, 5));
2703 if (!locals)
2704 return NULL;
2705 }
2706
2707 return Exec(expr1, globals, locals, LINENO(n), n->n_col_offset,
2708 c->c_arena);
2709 }
2710
2711 static stmt_ty
ast_for_assert_stmt(struct compiling * c,const node * n)2712 ast_for_assert_stmt(struct compiling *c, const node *n)
2713 {
2714 /* assert_stmt: 'assert' test [',' test] */
2715 REQ(n, assert_stmt);
2716 if (NCH(n) == 2) {
2717 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
2718 if (!expression)
2719 return NULL;
2720 return Assert(expression, NULL, LINENO(n), n->n_col_offset,
2721 c->c_arena);
2722 }
2723 else if (NCH(n) == 4) {
2724 expr_ty expr1, expr2;
2725
2726 expr1 = ast_for_expr(c, CHILD(n, 1));
2727 if (!expr1)
2728 return NULL;
2729 expr2 = ast_for_expr(c, CHILD(n, 3));
2730 if (!expr2)
2731 return NULL;
2732
2733 return Assert(expr1, expr2, LINENO(n), n->n_col_offset, c->c_arena);
2734 }
2735 PyErr_Format(PyExc_SystemError,
2736 "improper number of parts to 'assert' statement: %d",
2737 NCH(n));
2738 return NULL;
2739 }
2740
2741 static asdl_seq *
ast_for_suite(struct compiling * c,const node * n)2742 ast_for_suite(struct compiling *c, const node *n)
2743 {
2744 /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */
2745 asdl_seq *seq;
2746 stmt_ty s;
2747 int i, total, num, end, pos = 0;
2748 node *ch;
2749
2750 REQ(n, suite);
2751
2752 total = num_stmts(n);
2753 seq = asdl_seq_new(total, c->c_arena);
2754 if (!seq)
2755 return NULL;
2756 if (TYPE(CHILD(n, 0)) == simple_stmt) {
2757 n = CHILD(n, 0);
2758 /* simple_stmt always ends with a NEWLINE,
2759 and may have a trailing SEMI
2760 */
2761 end = NCH(n) - 1;
2762 if (TYPE(CHILD(n, end - 1)) == SEMI)
2763 end--;
2764 /* loop by 2 to skip semi-colons */
2765 for (i = 0; i < end; i += 2) {
2766 ch = CHILD(n, i);
2767 s = ast_for_stmt(c, ch);
2768 if (!s)
2769 return NULL;
2770 asdl_seq_SET(seq, pos++, s);
2771 }
2772 }
2773 else {
2774 for (i = 2; i < (NCH(n) - 1); i++) {
2775 ch = CHILD(n, i);
2776 REQ(ch, stmt);
2777 num = num_stmts(ch);
2778 if (num == 1) {
2779 /* small_stmt or compound_stmt with only one child */
2780 s = ast_for_stmt(c, ch);
2781 if (!s)
2782 return NULL;
2783 asdl_seq_SET(seq, pos++, s);
2784 }
2785 else {
2786 int j;
2787 ch = CHILD(ch, 0);
2788 REQ(ch, simple_stmt);
2789 for (j = 0; j < NCH(ch); j += 2) {
2790 /* statement terminates with a semi-colon ';' */
2791 if (NCH(CHILD(ch, j)) == 0) {
2792 assert((j + 1) == NCH(ch));
2793 break;
2794 }
2795 s = ast_for_stmt(c, CHILD(ch, j));
2796 if (!s)
2797 return NULL;
2798 asdl_seq_SET(seq, pos++, s);
2799 }
2800 }
2801 }
2802 }
2803 assert(pos == seq->size);
2804 return seq;
2805 }
2806
2807 static stmt_ty
ast_for_if_stmt(struct compiling * c,const node * n)2808 ast_for_if_stmt(struct compiling *c, const node *n)
2809 {
2810 /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)*
2811 ['else' ':' suite]
2812 */
2813 char *s;
2814
2815 REQ(n, if_stmt);
2816
2817 if (NCH(n) == 4) {
2818 expr_ty expression;
2819 asdl_seq *suite_seq;
2820
2821 expression = ast_for_expr(c, CHILD(n, 1));
2822 if (!expression)
2823 return NULL;
2824 suite_seq = ast_for_suite(c, CHILD(n, 3));
2825 if (!suite_seq)
2826 return NULL;
2827
2828 return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
2829 c->c_arena);
2830 }
2831
2832 s = STR(CHILD(n, 4));
2833 /* s[2], the third character in the string, will be
2834 's' for el_s_e, or
2835 'i' for el_i_f
2836 */
2837 if (s[2] == 's') {
2838 expr_ty expression;
2839 asdl_seq *seq1, *seq2;
2840
2841 expression = ast_for_expr(c, CHILD(n, 1));
2842 if (!expression)
2843 return NULL;
2844 seq1 = ast_for_suite(c, CHILD(n, 3));
2845 if (!seq1)
2846 return NULL;
2847 seq2 = ast_for_suite(c, CHILD(n, 6));
2848 if (!seq2)
2849 return NULL;
2850
2851 return If(expression, seq1, seq2, LINENO(n), n->n_col_offset,
2852 c->c_arena);
2853 }
2854 else if (s[2] == 'i') {
2855 int i, n_elif, has_else = 0;
2856 expr_ty expression;
2857 asdl_seq *suite_seq;
2858 asdl_seq *orelse = NULL;
2859 n_elif = NCH(n) - 4;
2860 /* must reference the child n_elif+1 since 'else' token is third,
2861 not fourth, child from the end. */
2862 if (TYPE(CHILD(n, (n_elif + 1))) == NAME
2863 && STR(CHILD(n, (n_elif + 1)))[2] == 's') {
2864 has_else = 1;
2865 n_elif -= 3;
2866 }
2867 n_elif /= 4;
2868
2869 if (has_else) {
2870 asdl_seq *suite_seq2;
2871
2872 orelse = asdl_seq_new(1, c->c_arena);
2873 if (!orelse)
2874 return NULL;
2875 expression = ast_for_expr(c, CHILD(n, NCH(n) - 6));
2876 if (!expression)
2877 return NULL;
2878 suite_seq = ast_for_suite(c, CHILD(n, NCH(n) - 4));
2879 if (!suite_seq)
2880 return NULL;
2881 suite_seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1));
2882 if (!suite_seq2)
2883 return NULL;
2884
2885 asdl_seq_SET(orelse, 0,
2886 If(expression, suite_seq, suite_seq2,
2887 LINENO(CHILD(n, NCH(n) - 6)),
2888 CHILD(n, NCH(n) - 6)->n_col_offset,
2889 c->c_arena));
2890 /* the just-created orelse handled the last elif */
2891 n_elif--;
2892 }
2893
2894 for (i = 0; i < n_elif; i++) {
2895 int off = 5 + (n_elif - i - 1) * 4;
2896 asdl_seq *newobj = asdl_seq_new(1, c->c_arena);
2897 if (!newobj)
2898 return NULL;
2899 expression = ast_for_expr(c, CHILD(n, off));
2900 if (!expression)
2901 return NULL;
2902 suite_seq = ast_for_suite(c, CHILD(n, off + 2));
2903 if (!suite_seq)
2904 return NULL;
2905
2906 asdl_seq_SET(newobj, 0,
2907 If(expression, suite_seq, orelse,
2908 LINENO(CHILD(n, off)),
2909 CHILD(n, off)->n_col_offset, c->c_arena));
2910 orelse = newobj;
2911 }
2912 expression = ast_for_expr(c, CHILD(n, 1));
2913 if (!expression)
2914 return NULL;
2915 suite_seq = ast_for_suite(c, CHILD(n, 3));
2916 if (!suite_seq)
2917 return NULL;
2918 return If(expression, suite_seq, orelse,
2919 LINENO(n), n->n_col_offset, c->c_arena);
2920 }
2921
2922 PyErr_Format(PyExc_SystemError,
2923 "unexpected token in 'if' statement: %s", s);
2924 return NULL;
2925 }
2926
2927 static stmt_ty
ast_for_while_stmt(struct compiling * c,const node * n)2928 ast_for_while_stmt(struct compiling *c, const node *n)
2929 {
2930 /* while_stmt: 'while' test ':' suite ['else' ':' suite] */
2931 REQ(n, while_stmt);
2932
2933 if (NCH(n) == 4) {
2934 expr_ty expression;
2935 asdl_seq *suite_seq;
2936
2937 expression = ast_for_expr(c, CHILD(n, 1));
2938 if (!expression)
2939 return NULL;
2940 suite_seq = ast_for_suite(c, CHILD(n, 3));
2941 if (!suite_seq)
2942 return NULL;
2943 return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
2944 c->c_arena);
2945 }
2946 else if (NCH(n) == 7) {
2947 expr_ty expression;
2948 asdl_seq *seq1, *seq2;
2949
2950 expression = ast_for_expr(c, CHILD(n, 1));
2951 if (!expression)
2952 return NULL;
2953 seq1 = ast_for_suite(c, CHILD(n, 3));
2954 if (!seq1)
2955 return NULL;
2956 seq2 = ast_for_suite(c, CHILD(n, 6));
2957 if (!seq2)
2958 return NULL;
2959
2960 return While(expression, seq1, seq2, LINENO(n), n->n_col_offset,
2961 c->c_arena);
2962 }
2963
2964 PyErr_Format(PyExc_SystemError,
2965 "wrong number of tokens for 'while' statement: %d",
2966 NCH(n));
2967 return NULL;
2968 }
2969
2970 static stmt_ty
ast_for_for_stmt(struct compiling * c,const node * n)2971 ast_for_for_stmt(struct compiling *c, const node *n)
2972 {
2973 asdl_seq *_target, *seq = NULL, *suite_seq;
2974 expr_ty expression;
2975 expr_ty target, first;
2976 const node *node_target;
2977 /* for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] */
2978 REQ(n, for_stmt);
2979
2980 if (NCH(n) == 9) {
2981 seq = ast_for_suite(c, CHILD(n, 8));
2982 if (!seq)
2983 return NULL;
2984 }
2985
2986 node_target = CHILD(n, 1);
2987 _target = ast_for_exprlist(c, node_target, Store);
2988 if (!_target)
2989 return NULL;
2990 /* Check the # of children rather than the length of _target, since
2991 for x, in ... has 1 element in _target, but still requires a Tuple. */
2992 first = (expr_ty)asdl_seq_GET(_target, 0);
2993 if (NCH(node_target) == 1)
2994 target = first;
2995 else
2996 target = Tuple(_target, Store, first->lineno, first->col_offset, c->c_arena);
2997
2998 expression = ast_for_testlist(c, CHILD(n, 3));
2999 if (!expression)
3000 return NULL;
3001 suite_seq = ast_for_suite(c, CHILD(n, 5));
3002 if (!suite_seq)
3003 return NULL;
3004
3005 return For(target, expression, suite_seq, seq, LINENO(n), n->n_col_offset,
3006 c->c_arena);
3007 }
3008
3009 static excepthandler_ty
ast_for_except_clause(struct compiling * c,const node * exc,node * body)3010 ast_for_except_clause(struct compiling *c, const node *exc, node *body)
3011 {
3012 /* except_clause: 'except' [test [(',' | 'as') test]] */
3013 REQ(exc, except_clause);
3014 REQ(body, suite);
3015
3016 if (NCH(exc) == 1) {
3017 asdl_seq *suite_seq = ast_for_suite(c, body);
3018 if (!suite_seq)
3019 return NULL;
3020
3021 return ExceptHandler(NULL, NULL, suite_seq, LINENO(exc),
3022 exc->n_col_offset, c->c_arena);
3023 }
3024 else if (NCH(exc) == 2) {
3025 expr_ty expression;
3026 asdl_seq *suite_seq;
3027
3028 expression = ast_for_expr(c, CHILD(exc, 1));
3029 if (!expression)
3030 return NULL;
3031 suite_seq = ast_for_suite(c, body);
3032 if (!suite_seq)
3033 return NULL;
3034
3035 return ExceptHandler(expression, NULL, suite_seq, LINENO(exc),
3036 exc->n_col_offset, c->c_arena);
3037 }
3038 else if (NCH(exc) == 4) {
3039 asdl_seq *suite_seq;
3040 expr_ty expression;
3041 expr_ty e = ast_for_expr(c, CHILD(exc, 3));
3042 if (!e)
3043 return NULL;
3044 if (!set_context(c, e, Store, CHILD(exc, 3)))
3045 return NULL;
3046 expression = ast_for_expr(c, CHILD(exc, 1));
3047 if (!expression)
3048 return NULL;
3049 suite_seq = ast_for_suite(c, body);
3050 if (!suite_seq)
3051 return NULL;
3052
3053 return ExceptHandler(expression, e, suite_seq, LINENO(exc),
3054 exc->n_col_offset, c->c_arena);
3055 }
3056
3057 PyErr_Format(PyExc_SystemError,
3058 "wrong number of children for 'except' clause: %d",
3059 NCH(exc));
3060 return NULL;
3061 }
3062
3063 static stmt_ty
ast_for_try_stmt(struct compiling * c,const node * n)3064 ast_for_try_stmt(struct compiling *c, const node *n)
3065 {
3066 const int nch = NCH(n);
3067 int n_except = (nch - 3)/3;
3068 asdl_seq *body, *orelse = NULL, *finally = NULL;
3069
3070 REQ(n, try_stmt);
3071
3072 body = ast_for_suite(c, CHILD(n, 2));
3073 if (body == NULL)
3074 return NULL;
3075
3076 if (TYPE(CHILD(n, nch - 3)) == NAME) {
3077 if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) {
3078 if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) {
3079 /* we can assume it's an "else",
3080 because nch >= 9 for try-else-finally and
3081 it would otherwise have a type of except_clause */
3082 orelse = ast_for_suite(c, CHILD(n, nch - 4));
3083 if (orelse == NULL)
3084 return NULL;
3085 n_except--;
3086 }
3087
3088 finally = ast_for_suite(c, CHILD(n, nch - 1));
3089 if (finally == NULL)
3090 return NULL;
3091 n_except--;
3092 }
3093 else {
3094 /* we can assume it's an "else",
3095 otherwise it would have a type of except_clause */
3096 orelse = ast_for_suite(c, CHILD(n, nch - 1));
3097 if (orelse == NULL)
3098 return NULL;
3099 n_except--;
3100 }
3101 }
3102 else if (TYPE(CHILD(n, nch - 3)) != except_clause) {
3103 ast_error(n, "malformed 'try' statement");
3104 return NULL;
3105 }
3106
3107 if (n_except > 0) {
3108 int i;
3109 stmt_ty except_st;
3110 /* process except statements to create a try ... except */
3111 asdl_seq *handlers = asdl_seq_new(n_except, c->c_arena);
3112 if (handlers == NULL)
3113 return NULL;
3114
3115 for (i = 0; i < n_except; i++) {
3116 excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3),
3117 CHILD(n, 5 + i * 3));
3118 if (!e)
3119 return NULL;
3120 asdl_seq_SET(handlers, i, e);
3121 }
3122
3123 except_st = TryExcept(body, handlers, orelse, LINENO(n),
3124 n->n_col_offset, c->c_arena);
3125 if (!finally)
3126 return except_st;
3127
3128 /* if a 'finally' is present too, we nest the TryExcept within a
3129 TryFinally to emulate try ... except ... finally */
3130 body = asdl_seq_new(1, c->c_arena);
3131 if (body == NULL)
3132 return NULL;
3133 asdl_seq_SET(body, 0, except_st);
3134 }
3135
3136 /* must be a try ... finally (except clauses are in body, if any exist) */
3137 assert(finally != NULL);
3138 return TryFinally(body, finally, LINENO(n), n->n_col_offset, c->c_arena);
3139 }
3140
3141 /* with_item: test ['as' expr] */
3142 static stmt_ty
ast_for_with_item(struct compiling * c,const node * n,asdl_seq * content)3143 ast_for_with_item(struct compiling *c, const node *n, asdl_seq *content)
3144 {
3145 expr_ty context_expr, optional_vars = NULL;
3146
3147 REQ(n, with_item);
3148 context_expr = ast_for_expr(c, CHILD(n, 0));
3149 if (!context_expr)
3150 return NULL;
3151 if (NCH(n) == 3) {
3152 optional_vars = ast_for_expr(c, CHILD(n, 2));
3153
3154 if (!optional_vars) {
3155 return NULL;
3156 }
3157 if (!set_context(c, optional_vars, Store, n)) {
3158 return NULL;
3159 }
3160 }
3161
3162 return With(context_expr, optional_vars, content, LINENO(n),
3163 n->n_col_offset, c->c_arena);
3164 }
3165
3166 /* with_stmt: 'with' with_item (',' with_item)* ':' suite */
3167 static stmt_ty
ast_for_with_stmt(struct compiling * c,const node * n)3168 ast_for_with_stmt(struct compiling *c, const node *n)
3169 {
3170 int i;
3171 stmt_ty ret;
3172 asdl_seq *inner;
3173
3174 REQ(n, with_stmt);
3175
3176 /* process the with items inside-out */
3177 i = NCH(n) - 1;
3178 /* the suite of the innermost with item is the suite of the with stmt */
3179 inner = ast_for_suite(c, CHILD(n, i));
3180 if (!inner)
3181 return NULL;
3182
3183 for (;;) {
3184 i -= 2;
3185 ret = ast_for_with_item(c, CHILD(n, i), inner);
3186 if (!ret)
3187 return NULL;
3188 /* was this the last item? */
3189 if (i == 1)
3190 break;
3191 /* if not, wrap the result so far in a new sequence */
3192 inner = asdl_seq_new(1, c->c_arena);
3193 if (!inner)
3194 return NULL;
3195 asdl_seq_SET(inner, 0, ret);
3196 }
3197
3198 return ret;
3199 }
3200
3201 static stmt_ty
ast_for_classdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)3202 ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
3203 {
3204 /* classdef: 'class' NAME ['(' testlist ')'] ':' suite */
3205 PyObject *classname;
3206 asdl_seq *bases, *s;
3207
3208 REQ(n, classdef);
3209
3210 if (!forbidden_check(c, n, STR(CHILD(n, 1))))
3211 return NULL;
3212
3213 if (NCH(n) == 4) {
3214 s = ast_for_suite(c, CHILD(n, 3));
3215 if (!s)
3216 return NULL;
3217 classname = NEW_IDENTIFIER(CHILD(n, 1));
3218 if (!classname)
3219 return NULL;
3220 return ClassDef(classname, NULL, s, decorator_seq, LINENO(n),
3221 n->n_col_offset, c->c_arena);
3222 }
3223 /* check for empty base list */
3224 if (TYPE(CHILD(n,3)) == RPAR) {
3225 s = ast_for_suite(c, CHILD(n,5));
3226 if (!s)
3227 return NULL;
3228 classname = NEW_IDENTIFIER(CHILD(n, 1));
3229 if (!classname)
3230 return NULL;
3231 return ClassDef(classname, NULL, s, decorator_seq, LINENO(n),
3232 n->n_col_offset, c->c_arena);
3233 }
3234
3235 /* else handle the base class list */
3236 bases = ast_for_class_bases(c, CHILD(n, 3));
3237 if (!bases)
3238 return NULL;
3239
3240 s = ast_for_suite(c, CHILD(n, 6));
3241 if (!s)
3242 return NULL;
3243 classname = NEW_IDENTIFIER(CHILD(n, 1));
3244 if (!classname)
3245 return NULL;
3246 return ClassDef(classname, bases, s, decorator_seq,
3247 LINENO(n), n->n_col_offset, c->c_arena);
3248 }
3249
3250 static stmt_ty
ast_for_stmt(struct compiling * c,const node * n)3251 ast_for_stmt(struct compiling *c, const node *n)
3252 {
3253 if (TYPE(n) == stmt) {
3254 assert(NCH(n) == 1);
3255 n = CHILD(n, 0);
3256 }
3257 if (TYPE(n) == simple_stmt) {
3258 assert(num_stmts(n) == 1);
3259 n = CHILD(n, 0);
3260 }
3261 if (TYPE(n) == small_stmt) {
3262 n = CHILD(n, 0);
3263 /* small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt
3264 | flow_stmt | import_stmt | global_stmt | exec_stmt
3265 | assert_stmt
3266 */
3267 switch (TYPE(n)) {
3268 case expr_stmt:
3269 return ast_for_expr_stmt(c, n);
3270 case print_stmt:
3271 return ast_for_print_stmt(c, n);
3272 case del_stmt:
3273 return ast_for_del_stmt(c, n);
3274 case pass_stmt:
3275 return Pass(LINENO(n), n->n_col_offset, c->c_arena);
3276 case flow_stmt:
3277 return ast_for_flow_stmt(c, n);
3278 case import_stmt:
3279 return ast_for_import_stmt(c, n);
3280 case global_stmt:
3281 return ast_for_global_stmt(c, n);
3282 case exec_stmt:
3283 return ast_for_exec_stmt(c, n);
3284 case assert_stmt:
3285 return ast_for_assert_stmt(c, n);
3286 default:
3287 PyErr_Format(PyExc_SystemError,
3288 "unhandled small_stmt: TYPE=%d NCH=%d\n",
3289 TYPE(n), NCH(n));
3290 return NULL;
3291 }
3292 }
3293 else {
3294 /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt
3295 | funcdef | classdef | decorated
3296 */
3297 node *ch = CHILD(n, 0);
3298 REQ(n, compound_stmt);
3299 switch (TYPE(ch)) {
3300 case if_stmt:
3301 return ast_for_if_stmt(c, ch);
3302 case while_stmt:
3303 return ast_for_while_stmt(c, ch);
3304 case for_stmt:
3305 return ast_for_for_stmt(c, ch);
3306 case try_stmt:
3307 return ast_for_try_stmt(c, ch);
3308 case with_stmt:
3309 return ast_for_with_stmt(c, ch);
3310 case funcdef:
3311 return ast_for_funcdef(c, ch, NULL);
3312 case classdef:
3313 return ast_for_classdef(c, ch, NULL);
3314 case decorated:
3315 return ast_for_decorated(c, ch);
3316 default:
3317 PyErr_Format(PyExc_SystemError,
3318 "unhandled small_stmt: TYPE=%d NCH=%d\n",
3319 TYPE(n), NCH(n));
3320 return NULL;
3321 }
3322 }
3323 }
3324
3325 static PyObject *
parsenumber(struct compiling * c,const char * s)3326 parsenumber(struct compiling *c, const char *s)
3327 {
3328 const char *end;
3329 long x;
3330 double dx;
3331 #ifndef WITHOUT_COMPLEX
3332 Py_complex complex;
3333 int imflag;
3334 #endif
3335
3336 assert(s != NULL);
3337 errno = 0;
3338 end = s + strlen(s) - 1;
3339 #ifndef WITHOUT_COMPLEX
3340 imflag = *end == 'j' || *end == 'J';
3341 #endif
3342 if (*end == 'l' || *end == 'L')
3343 return PyLong_FromString((char *)s, (char **)0, 0);
3344 x = PyOS_strtol((char *)s, (char **)&end, 0);
3345 if (*end == '\0') {
3346 if (errno != 0)
3347 return PyLong_FromString((char *)s, (char **)0, 0);
3348 return PyInt_FromLong(x);
3349 }
3350 /* XXX Huge floats may silently fail */
3351 #ifndef WITHOUT_COMPLEX
3352 if (imflag) {
3353 complex.real = 0.;
3354 complex.imag = PyOS_string_to_double(s, (char **)&end, NULL);
3355 if (complex.imag == -1.0 && PyErr_Occurred())
3356 return NULL;
3357 return PyComplex_FromCComplex(complex);
3358 }
3359 else
3360 #endif
3361 {
3362 dx = PyOS_string_to_double(s, NULL, NULL);
3363 if (dx == -1.0 && PyErr_Occurred())
3364 return NULL;
3365 return PyFloat_FromDouble(dx);
3366 }
3367 }
3368
3369 static PyObject *
decode_utf8(struct compiling * c,const char ** sPtr,const char * end,char * encoding)3370 decode_utf8(struct compiling *c, const char **sPtr, const char *end, char* encoding)
3371 {
3372 #ifndef Py_USING_UNICODE
3373 Py_FatalError("decode_utf8 should not be called in this build.");
3374 return NULL;
3375 #else
3376 PyObject *u, *v;
3377 char *s, *t;
3378 t = s = (char *)*sPtr;
3379 /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
3380 while (s < end && (*s & 0x80)) s++;
3381 *sPtr = s;
3382 u = PyUnicode_DecodeUTF8(t, s - t, NULL);
3383 if (u == NULL)
3384 return NULL;
3385 v = PyUnicode_AsEncodedString(u, encoding, NULL);
3386 Py_DECREF(u);
3387 return v;
3388 #endif
3389 }
3390
3391 #ifdef Py_USING_UNICODE
3392 static PyObject *
decode_unicode(struct compiling * c,const char * s,size_t len,int rawmode,const char * encoding)3393 decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, const char *encoding)
3394 {
3395 PyObject *v;
3396 PyObject *u = NULL;
3397 char *buf;
3398 char *p;
3399 const char *end;
3400 if (encoding != NULL && strcmp(encoding, "iso-8859-1")) {
3401 /* check for integer overflow */
3402 if (len > PY_SIZE_MAX / 6)
3403 return NULL;
3404 /* "<C3><A4>" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
3405 "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
3406 u = PyString_FromStringAndSize((char *)NULL, len * 6);
3407 if (u == NULL)
3408 return NULL;
3409 p = buf = PyString_AsString(u);
3410 end = s + len;
3411 while (s < end) {
3412 if (*s == '\\') {
3413 *p++ = *s++;
3414 if (*s & 0x80) {
3415 strcpy(p, "u005c");
3416 p += 5;
3417 }
3418 }
3419 if (*s & 0x80) { /* XXX inefficient */
3420 PyObject *w;
3421 char *r;
3422 Py_ssize_t rn, i;
3423 w = decode_utf8(c, &s, end, "utf-32-be");
3424 if (w == NULL) {
3425 Py_DECREF(u);
3426 return NULL;
3427 }
3428 r = PyString_AsString(w);
3429 rn = PyString_Size(w);
3430 assert(rn % 4 == 0);
3431 for (i = 0; i < rn; i += 4) {
3432 sprintf(p, "\\U%02x%02x%02x%02x",
3433 r[i + 0] & 0xFF,
3434 r[i + 1] & 0xFF,
3435 r[i + 2] & 0xFF,
3436 r[i + 3] & 0xFF);
3437 p += 10;
3438 }
3439 Py_DECREF(w);
3440 } else {
3441 *p++ = *s++;
3442 }
3443 }
3444 len = p - buf;
3445 s = buf;
3446 }
3447 if (rawmode)
3448 v = PyUnicode_DecodeRawUnicodeEscape(s, len, NULL);
3449 else
3450 v = PyUnicode_DecodeUnicodeEscape(s, len, NULL);
3451 Py_XDECREF(u);
3452 return v;
3453 }
3454 #endif
3455
3456 /* s is a Python string literal, including the bracketing quote characters,
3457 * and r &/or u prefixes (if any), and embedded escape sequences (if any).
3458 * parsestr parses it, and returns the decoded Python string object.
3459 */
3460 static PyObject *
parsestr(struct compiling * c,const node * n,const char * s)3461 parsestr(struct compiling *c, const node *n, const char *s)
3462 {
3463 size_t len, i;
3464 int quote = Py_CHARMASK(*s);
3465 int rawmode = 0;
3466 int need_encoding;
3467 int unicode = c->c_future_unicode;
3468 int bytes = 0;
3469
3470 if (isalpha(quote) || quote == '_') {
3471 if (quote == 'u' || quote == 'U') {
3472 quote = *++s;
3473 unicode = 1;
3474 }
3475 if (quote == 'b' || quote == 'B') {
3476 quote = *++s;
3477 unicode = 0;
3478 bytes = 1;
3479 }
3480 if (quote == 'r' || quote == 'R') {
3481 quote = *++s;
3482 rawmode = 1;
3483 }
3484 }
3485 if (quote != '\'' && quote != '\"') {
3486 PyErr_BadInternalCall();
3487 return NULL;
3488 }
3489 s++;
3490 len = strlen(s);
3491 if (len > INT_MAX) {
3492 PyErr_SetString(PyExc_OverflowError,
3493 "string to parse is too long");
3494 return NULL;
3495 }
3496 if (s[--len] != quote) {
3497 PyErr_BadInternalCall();
3498 return NULL;
3499 }
3500 if (len >= 4 && s[0] == quote && s[1] == quote) {
3501 s += 2;
3502 len -= 2;
3503 if (s[--len] != quote || s[--len] != quote) {
3504 PyErr_BadInternalCall();
3505 return NULL;
3506 }
3507 }
3508 if (Py_Py3kWarningFlag && bytes) {
3509 for (i = 0; i < len; i++) {
3510 if ((unsigned char)s[i] > 127) {
3511 if (!ast_warn(c, n,
3512 "non-ascii bytes literals not supported in 3.x"))
3513 return NULL;
3514 break;
3515 }
3516 }
3517 }
3518 #ifdef Py_USING_UNICODE
3519 if (unicode || Py_UnicodeFlag) {
3520 return decode_unicode(c, s, len, rawmode, c->c_encoding);
3521 }
3522 #endif
3523 need_encoding = (c->c_encoding != NULL &&
3524 strcmp(c->c_encoding, "utf-8") != 0 &&
3525 strcmp(c->c_encoding, "iso-8859-1") != 0);
3526 if (rawmode || strchr(s, '\\') == NULL) {
3527 if (need_encoding) {
3528 #ifndef Py_USING_UNICODE
3529 /* This should not happen - we never see any other
3530 encoding. */
3531 Py_FatalError(
3532 "cannot deal with encodings in this build.");
3533 #else
3534 PyObject *v, *u = PyUnicode_DecodeUTF8(s, len, NULL);
3535 if (u == NULL)
3536 return NULL;
3537 v = PyUnicode_AsEncodedString(u, c->c_encoding, NULL);
3538 Py_DECREF(u);
3539 return v;
3540 #endif
3541 } else {
3542 return PyString_FromStringAndSize(s, len);
3543 }
3544 }
3545
3546 return PyString_DecodeEscape(s, len, NULL, unicode,
3547 need_encoding ? c->c_encoding : NULL);
3548 }
3549
3550 /* Build a Python string object out of a STRING atom. This takes care of
3551 * compile-time literal catenation, calling parsestr() on each piece, and
3552 * pasting the intermediate results together.
3553 */
3554 static PyObject *
parsestrplus(struct compiling * c,const node * n)3555 parsestrplus(struct compiling *c, const node *n)
3556 {
3557 PyObject *v;
3558 int i;
3559 REQ(CHILD(n, 0), STRING);
3560 if ((v = parsestr(c, n, STR(CHILD(n, 0)))) != NULL) {
3561 /* String literal concatenation */
3562 for (i = 1; i < NCH(n); i++) {
3563 PyObject *s;
3564 s = parsestr(c, n, STR(CHILD(n, i)));
3565 if (s == NULL)
3566 goto onError;
3567 if (PyString_Check(v) && PyString_Check(s)) {
3568 PyString_ConcatAndDel(&v, s);
3569 if (v == NULL)
3570 goto onError;
3571 }
3572 #ifdef Py_USING_UNICODE
3573 else {
3574 PyObject *temp = PyUnicode_Concat(v, s);
3575 Py_DECREF(s);
3576 Py_DECREF(v);
3577 v = temp;
3578 if (v == NULL)
3579 goto onError;
3580 }
3581 #endif
3582 }
3583 }
3584 return v;
3585
3586 onError:
3587 Py_XDECREF(v);
3588 return NULL;
3589 }
3590