1 /*
2  * This file includes functions to transform a concrete syntax tree (CST) to
3  * an abstract syntax tree (AST).  The main function is PyAST_FromNode().
4  *
5  */
6 #include "Python.h"
7 #include "Python-ast.h"
8 #include "grammar.h"
9 #include "node.h"
10 #include "pyarena.h"
11 #include "ast.h"
12 #include "token.h"
13 #include "parsetok.h"
14 #include "graminit.h"
15 
16 #include <assert.h>
17 
18 /* Data structure used internally */
19 struct compiling {
20     char *c_encoding; /* source encoding */
21     int c_future_unicode; /* __future__ unicode literals flag */
22     PyArena *c_arena; /* arena for allocating memeory */
23     const char *c_filename; /* filename */
24 };
25 
26 static asdl_seq *seq_for_testlist(struct compiling *, const node *);
27 static expr_ty ast_for_expr(struct compiling *, const node *);
28 static stmt_ty ast_for_stmt(struct compiling *, const node *);
29 static asdl_seq *ast_for_suite(struct compiling *, const node *);
30 static asdl_seq *ast_for_exprlist(struct compiling *, const node *,
31                                   expr_context_ty);
32 static expr_ty ast_for_testlist(struct compiling *, const node *);
33 static stmt_ty ast_for_classdef(struct compiling *, const node *, asdl_seq *);
34 static expr_ty ast_for_testlist_comp(struct compiling *, const node *);
35 
36 /* Note different signature for ast_for_call */
37 static expr_ty ast_for_call(struct compiling *, const node *, expr_ty);
38 
39 static PyObject *parsenumber(struct compiling *, const char *);
40 static PyObject *parsestr(struct compiling *, const node *n, const char *);
41 static PyObject *parsestrplus(struct compiling *, const node *n);
42 
43 #ifndef LINENO
44 #define LINENO(n)       ((n)->n_lineno)
45 #endif
46 
47 #define COMP_GENEXP 0
48 #define COMP_SETCOMP  1
49 
50 static identifier
new_identifier(const char * n,PyArena * arena)51 new_identifier(const char* n, PyArena *arena) {
52     PyObject* id = PyString_InternFromString(n);
53     if (id != NULL)
54         PyArena_AddPyObject(arena, id);
55     return id;
56 }
57 
58 #define NEW_IDENTIFIER(n) new_identifier(STR(n), c->c_arena)
59 
60 /* This routine provides an invalid object for the syntax error.
61    The outermost routine must unpack this error and create the
62    proper object.  We do this so that we don't have to pass
63    the filename to everything function.
64 
65    XXX Maybe we should just pass the filename...
66 */
67 
68 static int
ast_error(const node * n,const char * errstr)69 ast_error(const node *n, const char *errstr)
70 {
71     PyObject *u = Py_BuildValue("zi", errstr, LINENO(n));
72     if (!u)
73         return 0;
74     PyErr_SetObject(PyExc_SyntaxError, u);
75     Py_DECREF(u);
76     return 0;
77 }
78 
79 static void
ast_error_finish(const char * filename)80 ast_error_finish(const char *filename)
81 {
82     PyObject *type, *value, *tback, *errstr, *loc, *tmp;
83     long lineno;
84 
85     assert(PyErr_Occurred());
86     if (!PyErr_ExceptionMatches(PyExc_SyntaxError))
87         return;
88 
89     PyErr_Fetch(&type, &value, &tback);
90     errstr = PyTuple_GetItem(value, 0);
91     if (!errstr)
92         return;
93     Py_INCREF(errstr);
94     lineno = PyInt_AsLong(PyTuple_GetItem(value, 1));
95     if (lineno == -1) {
96         Py_DECREF(errstr);
97         return;
98     }
99     Py_DECREF(value);
100 
101     loc = PyErr_ProgramText(filename, lineno);
102     if (!loc) {
103         Py_INCREF(Py_None);
104         loc = Py_None;
105     }
106     tmp = Py_BuildValue("(zlOO)", filename, lineno, Py_None, loc);
107     Py_DECREF(loc);
108     if (!tmp) {
109         Py_DECREF(errstr);
110         return;
111     }
112     value = PyTuple_Pack(2, errstr, tmp);
113     Py_DECREF(errstr);
114     Py_DECREF(tmp);
115     if (!value)
116         return;
117     PyErr_Restore(type, value, tback);
118 }
119 
120 static int
ast_warn(struct compiling * c,const node * n,char * msg)121 ast_warn(struct compiling *c, const node *n, char *msg)
122 {
123     if (PyErr_WarnExplicit(PyExc_SyntaxWarning, msg, c->c_filename, LINENO(n),
124                            NULL, NULL) < 0) {
125         /* if -Werr, change it to a SyntaxError */
126         if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_SyntaxWarning))
127             ast_error(n, msg);
128         return 0;
129     }
130     return 1;
131 }
132 
133 static int
forbidden_check(struct compiling * c,const node * n,const char * x)134 forbidden_check(struct compiling *c, const node *n, const char *x)
135 {
136     if (!strcmp(x, "None"))
137         return ast_error(n, "cannot assign to None");
138     if (!strcmp(x, "__debug__"))
139         return ast_error(n, "cannot assign to __debug__");
140     if (Py_Py3kWarningFlag) {
141         if (!(strcmp(x, "True") && strcmp(x, "False")) &&
142             !ast_warn(c, n, "assignment to True or False is forbidden in 3.x"))
143             return 0;
144         if (!strcmp(x, "nonlocal") &&
145             !ast_warn(c, n, "nonlocal is a keyword in 3.x"))
146             return 0;
147     }
148     return 1;
149 }
150 
151 /* num_stmts() returns number of contained statements.
152 
153    Use this routine to determine how big a sequence is needed for
154    the statements in a parse tree.  Its raison d'etre is this bit of
155    grammar:
156 
157    stmt: simple_stmt | compound_stmt
158    simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
159 
160    A simple_stmt can contain multiple small_stmt elements joined
161    by semicolons.  If the arg is a simple_stmt, the number of
162    small_stmt elements is returned.
163 */
164 
165 static int
num_stmts(const node * n)166 num_stmts(const node *n)
167 {
168     int i, l;
169     node *ch;
170 
171     switch (TYPE(n)) {
172         case single_input:
173             if (TYPE(CHILD(n, 0)) == NEWLINE)
174                 return 0;
175             else
176                 return num_stmts(CHILD(n, 0));
177         case file_input:
178             l = 0;
179             for (i = 0; i < NCH(n); i++) {
180                 ch = CHILD(n, i);
181                 if (TYPE(ch) == stmt)
182                     l += num_stmts(ch);
183             }
184             return l;
185         case stmt:
186             return num_stmts(CHILD(n, 0));
187         case compound_stmt:
188             return 1;
189         case simple_stmt:
190             return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */
191         case suite:
192             if (NCH(n) == 1)
193                 return num_stmts(CHILD(n, 0));
194             else {
195                 l = 0;
196                 for (i = 2; i < (NCH(n) - 1); i++)
197                     l += num_stmts(CHILD(n, i));
198                 return l;
199             }
200         default: {
201             char buf[128];
202 
203             sprintf(buf, "Non-statement found: %d %d",
204                     TYPE(n), NCH(n));
205             Py_FatalError(buf);
206         }
207     }
208     assert(0);
209     return 0;
210 }
211 
212 /* Transform the CST rooted at node * to the appropriate AST
213 */
214 
215 mod_ty
PyAST_FromNode(const node * n,PyCompilerFlags * flags,const char * filename,PyArena * arena)216 PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,
217                PyArena *arena)
218 {
219     int i, j, k, num;
220     asdl_seq *stmts = NULL;
221     stmt_ty s;
222     node *ch;
223     struct compiling c;
224 
225     if (flags && flags->cf_flags & PyCF_SOURCE_IS_UTF8) {
226         c.c_encoding = "utf-8";
227         if (TYPE(n) == encoding_decl) {
228             ast_error(n, "encoding declaration in Unicode string");
229             goto error;
230         }
231     } else if (TYPE(n) == encoding_decl) {
232         c.c_encoding = STR(n);
233         n = CHILD(n, 0);
234     } else {
235         c.c_encoding = NULL;
236     }
237     c.c_future_unicode = flags && flags->cf_flags & CO_FUTURE_UNICODE_LITERALS;
238     c.c_arena = arena;
239     c.c_filename = filename;
240 
241     k = 0;
242     switch (TYPE(n)) {
243         case file_input:
244             stmts = asdl_seq_new(num_stmts(n), arena);
245             if (!stmts)
246                 return NULL;
247             for (i = 0; i < NCH(n) - 1; i++) {
248                 ch = CHILD(n, i);
249                 if (TYPE(ch) == NEWLINE)
250                     continue;
251                 REQ(ch, stmt);
252                 num = num_stmts(ch);
253                 if (num == 1) {
254                     s = ast_for_stmt(&c, ch);
255                     if (!s)
256                         goto error;
257                     asdl_seq_SET(stmts, k++, s);
258                 }
259                 else {
260                     ch = CHILD(ch, 0);
261                     REQ(ch, simple_stmt);
262                     for (j = 0; j < num; j++) {
263                         s = ast_for_stmt(&c, CHILD(ch, j * 2));
264                         if (!s)
265                             goto error;
266                         asdl_seq_SET(stmts, k++, s);
267                     }
268                 }
269             }
270             return Module(stmts, arena);
271         case eval_input: {
272             expr_ty testlist_ast;
273 
274             /* XXX Why not comp_for here? */
275             testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
276             if (!testlist_ast)
277                 goto error;
278             return Expression(testlist_ast, arena);
279         }
280         case single_input:
281             if (TYPE(CHILD(n, 0)) == NEWLINE) {
282                 stmts = asdl_seq_new(1, arena);
283                 if (!stmts)
284                     goto error;
285                 asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
286                                             arena));
287                 if (!asdl_seq_GET(stmts, 0))
288                     goto error;
289                 return Interactive(stmts, arena);
290             }
291             else {
292                 n = CHILD(n, 0);
293                 num = num_stmts(n);
294                 stmts = asdl_seq_new(num, arena);
295                 if (!stmts)
296                     goto error;
297                 if (num == 1) {
298                     s = ast_for_stmt(&c, n);
299                     if (!s)
300                         goto error;
301                     asdl_seq_SET(stmts, 0, s);
302                 }
303                 else {
304                     /* Only a simple_stmt can contain multiple statements. */
305                     REQ(n, simple_stmt);
306                     for (i = 0; i < NCH(n); i += 2) {
307                         if (TYPE(CHILD(n, i)) == NEWLINE)
308                             break;
309                         s = ast_for_stmt(&c, CHILD(n, i));
310                         if (!s)
311                             goto error;
312                         asdl_seq_SET(stmts, i / 2, s);
313                     }
314                 }
315 
316                 return Interactive(stmts, arena);
317             }
318         default:
319             PyErr_Format(PyExc_SystemError,
320                          "invalid node %d for PyAST_FromNode", TYPE(n));
321             goto error;
322     }
323  error:
324     ast_error_finish(filename);
325     return NULL;
326 }
327 
328 /* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
329 */
330 
331 static operator_ty
get_operator(const node * n)332 get_operator(const node *n)
333 {
334     switch (TYPE(n)) {
335         case VBAR:
336             return BitOr;
337         case CIRCUMFLEX:
338             return BitXor;
339         case AMPER:
340             return BitAnd;
341         case LEFTSHIFT:
342             return LShift;
343         case RIGHTSHIFT:
344             return RShift;
345         case PLUS:
346             return Add;
347         case MINUS:
348             return Sub;
349         case STAR:
350             return Mult;
351         case SLASH:
352             return Div;
353         case DOUBLESLASH:
354             return FloorDiv;
355         case PERCENT:
356             return Mod;
357         default:
358             return (operator_ty)0;
359     }
360 }
361 
362 /* Set the context ctx for expr_ty e, recursively traversing e.
363 
364    Only sets context for expr kinds that "can appear in assignment context"
365    (according to ../Parser/Python.asdl).  For other expr kinds, it sets
366    an appropriate syntax error and returns false.
367 */
368 
369 static int
set_context(struct compiling * c,expr_ty e,expr_context_ty ctx,const node * n)370 set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)
371 {
372     asdl_seq *s = NULL;
373     /* If a particular expression type can't be used for assign / delete,
374        set expr_name to its name and an error message will be generated.
375     */
376     const char* expr_name = NULL;
377 
378     /* The ast defines augmented store and load contexts, but the
379        implementation here doesn't actually use them.  The code may be
380        a little more complex than necessary as a result.  It also means
381        that expressions in an augmented assignment have a Store context.
382        Consider restructuring so that augmented assignment uses
383        set_context(), too.
384     */
385     assert(ctx != AugStore && ctx != AugLoad);
386 
387     switch (e->kind) {
388         case Attribute_kind:
389             if (ctx == Store && !forbidden_check(c, n,
390                                 PyBytes_AS_STRING(e->v.Attribute.attr)))
391                     return 0;
392             e->v.Attribute.ctx = ctx;
393             break;
394         case Subscript_kind:
395             e->v.Subscript.ctx = ctx;
396             break;
397         case Name_kind:
398             if (ctx == Store && !forbidden_check(c, n,
399                                 PyBytes_AS_STRING(e->v.Name.id)))
400                     return 0;
401             e->v.Name.ctx = ctx;
402             break;
403         case List_kind:
404             e->v.List.ctx = ctx;
405             s = e->v.List.elts;
406             break;
407         case Tuple_kind:
408             if (asdl_seq_LEN(e->v.Tuple.elts))  {
409                 e->v.Tuple.ctx = ctx;
410                 s = e->v.Tuple.elts;
411             }
412             else {
413                 expr_name = "()";
414             }
415             break;
416         case Lambda_kind:
417             expr_name = "lambda";
418             break;
419         case Call_kind:
420             expr_name = "function call";
421             break;
422         case BoolOp_kind:
423         case BinOp_kind:
424         case UnaryOp_kind:
425             expr_name = "operator";
426             break;
427         case GeneratorExp_kind:
428             expr_name = "generator expression";
429             break;
430         case Yield_kind:
431             expr_name = "yield expression";
432             break;
433         case ListComp_kind:
434             expr_name = "list comprehension";
435             break;
436         case SetComp_kind:
437             expr_name = "set comprehension";
438             break;
439         case DictComp_kind:
440             expr_name = "dict comprehension";
441             break;
442         case Dict_kind:
443         case Set_kind:
444         case Num_kind:
445         case Str_kind:
446             expr_name = "literal";
447             break;
448         case Compare_kind:
449             expr_name = "comparison";
450             break;
451         case Repr_kind:
452             expr_name = "repr";
453             break;
454         case IfExp_kind:
455             expr_name = "conditional expression";
456             break;
457         default:
458             PyErr_Format(PyExc_SystemError,
459                          "unexpected expression in assignment %d (line %d)",
460                          e->kind, e->lineno);
461             return 0;
462     }
463     /* Check for error string set by switch */
464     if (expr_name) {
465         char buf[300];
466         PyOS_snprintf(buf, sizeof(buf),
467                       "can't %s %s",
468                       ctx == Store ? "assign to" : "delete",
469                       expr_name);
470         return ast_error(n, buf);
471     }
472 
473     /* If the LHS is a list or tuple, we need to set the assignment
474        context for all the contained elements.
475     */
476     if (s) {
477         int i;
478 
479         for (i = 0; i < asdl_seq_LEN(s); i++) {
480             if (!set_context(c, (expr_ty)asdl_seq_GET(s, i), ctx, n))
481                 return 0;
482         }
483     }
484     return 1;
485 }
486 
487 static operator_ty
ast_for_augassign(struct compiling * c,const node * n)488 ast_for_augassign(struct compiling *c, const node *n)
489 {
490     REQ(n, augassign);
491     n = CHILD(n, 0);
492     switch (STR(n)[0]) {
493         case '+':
494             return Add;
495         case '-':
496             return Sub;
497         case '/':
498             if (STR(n)[1] == '/')
499                 return FloorDiv;
500             else
501                 return Div;
502         case '%':
503             return Mod;
504         case '<':
505             return LShift;
506         case '>':
507             return RShift;
508         case '&':
509             return BitAnd;
510         case '^':
511             return BitXor;
512         case '|':
513             return BitOr;
514         case '*':
515             if (STR(n)[1] == '*')
516                 return Pow;
517             else
518                 return Mult;
519         default:
520             PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n));
521             return (operator_ty)0;
522     }
523 }
524 
525 static cmpop_ty
ast_for_comp_op(struct compiling * c,const node * n)526 ast_for_comp_op(struct compiling *c, const node *n)
527 {
528     /* comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'
529                |'is' 'not'
530     */
531     REQ(n, comp_op);
532     if (NCH(n) == 1) {
533         n = CHILD(n, 0);
534         switch (TYPE(n)) {
535             case LESS:
536                 return Lt;
537             case GREATER:
538                 return Gt;
539             case EQEQUAL:                       /* == */
540                 return Eq;
541             case LESSEQUAL:
542                 return LtE;
543             case GREATEREQUAL:
544                 return GtE;
545             case NOTEQUAL:
546                 return NotEq;
547             case NAME:
548                 if (strcmp(STR(n), "in") == 0)
549                     return In;
550                 if (strcmp(STR(n), "is") == 0)
551                     return Is;
552             default:
553                 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s",
554                              STR(n));
555                 return (cmpop_ty)0;
556         }
557     }
558     else if (NCH(n) == 2) {
559         /* handle "not in" and "is not" */
560         switch (TYPE(CHILD(n, 0))) {
561             case NAME:
562                 if (strcmp(STR(CHILD(n, 1)), "in") == 0)
563                     return NotIn;
564                 if (strcmp(STR(CHILD(n, 0)), "is") == 0)
565                     return IsNot;
566             default:
567                 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s",
568                              STR(CHILD(n, 0)), STR(CHILD(n, 1)));
569                 return (cmpop_ty)0;
570         }
571     }
572     PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children",
573                  NCH(n));
574     return (cmpop_ty)0;
575 }
576 
577 static asdl_seq *
seq_for_testlist(struct compiling * c,const node * n)578 seq_for_testlist(struct compiling *c, const node *n)
579 {
580     /* testlist: test (',' test)* [','] */
581     asdl_seq *seq;
582     expr_ty expression;
583     int i;
584     assert(TYPE(n) == testlist ||
585            TYPE(n) == listmaker ||
586            TYPE(n) == testlist_comp ||
587            TYPE(n) == testlist_safe ||
588            TYPE(n) == testlist1);
589 
590     seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
591     if (!seq)
592         return NULL;
593 
594     for (i = 0; i < NCH(n); i += 2) {
595         assert(TYPE(CHILD(n, i)) == test || TYPE(CHILD(n, i)) == old_test);
596 
597         expression = ast_for_expr(c, CHILD(n, i));
598         if (!expression)
599             return NULL;
600 
601         assert(i / 2 < seq->size);
602         asdl_seq_SET(seq, i / 2, expression);
603     }
604     return seq;
605 }
606 
607 static expr_ty
compiler_complex_args(struct compiling * c,const node * n)608 compiler_complex_args(struct compiling *c, const node *n)
609 {
610     int i, len = (NCH(n) + 1) / 2;
611     expr_ty result;
612     asdl_seq *args = asdl_seq_new(len, c->c_arena);
613     if (!args)
614         return NULL;
615 
616     /* fpdef: NAME | '(' fplist ')'
617        fplist: fpdef (',' fpdef)* [',']
618     */
619     REQ(n, fplist);
620     for (i = 0; i < len; i++) {
621         PyObject *arg_id;
622         const node *fpdef_node = CHILD(n, 2*i);
623         const node *child;
624         expr_ty arg;
625 set_name:
626         /* fpdef_node is either a NAME or an fplist */
627         child = CHILD(fpdef_node, 0);
628         if (TYPE(child) == NAME) {
629             if (!forbidden_check(c, n, STR(child)))
630                 return NULL;
631             arg_id = NEW_IDENTIFIER(child);
632             if (!arg_id)
633                 return NULL;
634             arg = Name(arg_id, Store, LINENO(child), child->n_col_offset,
635                        c->c_arena);
636         }
637         else {
638             assert(TYPE(fpdef_node) == fpdef);
639             /* fpdef_node[0] is not a name, so it must be '(', get CHILD[1] */
640             child = CHILD(fpdef_node, 1);
641             assert(TYPE(child) == fplist);
642             /* NCH == 1 means we have (x), we need to elide the extra parens */
643             if (NCH(child) == 1) {
644                 fpdef_node = CHILD(child, 0);
645                 assert(TYPE(fpdef_node) == fpdef);
646                 goto set_name;
647             }
648             arg = compiler_complex_args(c, child);
649         }
650         asdl_seq_SET(args, i, arg);
651     }
652 
653     result = Tuple(args, Store, LINENO(n), n->n_col_offset, c->c_arena);
654     if (!set_context(c, result, Store, n))
655         return NULL;
656     return result;
657 }
658 
659 
660 /* Create AST for argument list. */
661 
662 static arguments_ty
ast_for_arguments(struct compiling * c,const node * n)663 ast_for_arguments(struct compiling *c, const node *n)
664 {
665     /* parameters: '(' [varargslist] ')'
666        varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME]
667             | '**' NAME) | fpdef ['=' test] (',' fpdef ['=' test])* [',']
668     */
669     int i, j, k, n_args = 0, n_defaults = 0, found_default = 0;
670     asdl_seq *args, *defaults;
671     identifier vararg = NULL, kwarg = NULL;
672     node *ch;
673 
674     if (TYPE(n) == parameters) {
675         if (NCH(n) == 2) /* () as argument list */
676             return arguments(NULL, NULL, NULL, NULL, c->c_arena);
677         n = CHILD(n, 1);
678     }
679     REQ(n, varargslist);
680 
681     /* first count the number of normal args & defaults */
682     for (i = 0; i < NCH(n); i++) {
683         ch = CHILD(n, i);
684         if (TYPE(ch) == fpdef)
685             n_args++;
686         if (TYPE(ch) == EQUAL)
687             n_defaults++;
688     }
689     args = (n_args ? asdl_seq_new(n_args, c->c_arena) : NULL);
690     if (!args && n_args)
691         return NULL;
692     defaults = (n_defaults ? asdl_seq_new(n_defaults, c->c_arena) : NULL);
693     if (!defaults && n_defaults)
694         return NULL;
695 
696     /* fpdef: NAME | '(' fplist ')'
697        fplist: fpdef (',' fpdef)* [',']
698     */
699     i = 0;
700     j = 0;  /* index for defaults */
701     k = 0;  /* index for args */
702     while (i < NCH(n)) {
703         ch = CHILD(n, i);
704         switch (TYPE(ch)) {
705             case fpdef: {
706                 int complex_args = 0, parenthesized = 0;
707             handle_fpdef:
708                 /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is
709                    anything other than EQUAL or a comma? */
710                 /* XXX Should NCH(n) check be made a separate check? */
711                 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
712                     expr_ty expression = ast_for_expr(c, CHILD(n, i + 2));
713                     if (!expression)
714                         return NULL;
715                     assert(defaults != NULL);
716                     asdl_seq_SET(defaults, j++, expression);
717                     i += 2;
718                     found_default = 1;
719                 }
720                 else if (found_default) {
721                     /* def f((x)=4): pass should raise an error.
722                        def f((x, (y))): pass will just incur the tuple unpacking warning. */
723                     if (parenthesized && !complex_args) {
724                         ast_error(n, "parenthesized arg with default");
725                         return NULL;
726                     }
727                     ast_error(n,
728                              "non-default argument follows default argument");
729                     return NULL;
730                 }
731                 if (NCH(ch) == 3) {
732                     ch = CHILD(ch, 1);
733                     /* def foo((x)): is not complex, special case. */
734                     if (NCH(ch) != 1) {
735                         /* We have complex arguments, setup for unpacking. */
736                         if (Py_Py3kWarningFlag && !ast_warn(c, ch,
737                             "tuple parameter unpacking has been removed in 3.x"))
738                             return NULL;
739                         complex_args = 1;
740                         asdl_seq_SET(args, k++, compiler_complex_args(c, ch));
741                         if (!asdl_seq_GET(args, k-1))
742                                 return NULL;
743                     } else {
744                         /* def foo((x)): setup for checking NAME below. */
745                         /* Loop because there can be many parens and tuple
746                            unpacking mixed in. */
747                         parenthesized = 1;
748                         ch = CHILD(ch, 0);
749                         assert(TYPE(ch) == fpdef);
750                         goto handle_fpdef;
751                     }
752                 }
753                 if (TYPE(CHILD(ch, 0)) == NAME) {
754                     PyObject *id;
755                     expr_ty name;
756                     if (!forbidden_check(c, n, STR(CHILD(ch, 0))))
757                         return NULL;
758                     id = NEW_IDENTIFIER(CHILD(ch, 0));
759                     if (!id)
760                         return NULL;
761                     name = Name(id, Param, LINENO(ch), ch->n_col_offset,
762                                 c->c_arena);
763                     if (!name)
764                         return NULL;
765                     asdl_seq_SET(args, k++, name);
766 
767                 }
768                 i += 2; /* the name and the comma */
769                 if (parenthesized && Py_Py3kWarningFlag &&
770                     !ast_warn(c, ch, "parenthesized argument names "
771                               "are invalid in 3.x"))
772                     return NULL;
773 
774                 break;
775             }
776             case STAR:
777                 if (!forbidden_check(c, CHILD(n, i+1), STR(CHILD(n, i+1))))
778                     return NULL;
779                 vararg = NEW_IDENTIFIER(CHILD(n, i+1));
780                 if (!vararg)
781                     return NULL;
782                 i += 3;
783                 break;
784             case DOUBLESTAR:
785                 if (!forbidden_check(c, CHILD(n, i+1), STR(CHILD(n, i+1))))
786                     return NULL;
787                 kwarg = NEW_IDENTIFIER(CHILD(n, i+1));
788                 if (!kwarg)
789                     return NULL;
790                 i += 3;
791                 break;
792             default:
793                 PyErr_Format(PyExc_SystemError,
794                              "unexpected node in varargslist: %d @ %d",
795                              TYPE(ch), i);
796                 return NULL;
797         }
798     }
799 
800     return arguments(args, vararg, kwarg, defaults, c->c_arena);
801 }
802 
803 static expr_ty
ast_for_dotted_name(struct compiling * c,const node * n)804 ast_for_dotted_name(struct compiling *c, const node *n)
805 {
806     expr_ty e;
807     identifier id;
808     int lineno, col_offset;
809     int i;
810 
811     REQ(n, dotted_name);
812 
813     lineno = LINENO(n);
814     col_offset = n->n_col_offset;
815 
816     id = NEW_IDENTIFIER(CHILD(n, 0));
817     if (!id)
818         return NULL;
819     e = Name(id, Load, lineno, col_offset, c->c_arena);
820     if (!e)
821         return NULL;
822 
823     for (i = 2; i < NCH(n); i+=2) {
824         id = NEW_IDENTIFIER(CHILD(n, i));
825         if (!id)
826             return NULL;
827         e = Attribute(e, id, Load, lineno, col_offset, c->c_arena);
828         if (!e)
829             return NULL;
830     }
831 
832     return e;
833 }
834 
835 static expr_ty
ast_for_decorator(struct compiling * c,const node * n)836 ast_for_decorator(struct compiling *c, const node *n)
837 {
838     /* decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE */
839     expr_ty d = NULL;
840     expr_ty name_expr;
841 
842     REQ(n, decorator);
843     REQ(CHILD(n, 0), AT);
844     REQ(RCHILD(n, -1), NEWLINE);
845 
846     name_expr = ast_for_dotted_name(c, CHILD(n, 1));
847     if (!name_expr)
848         return NULL;
849 
850     if (NCH(n) == 3) { /* No arguments */
851         d = name_expr;
852         name_expr = NULL;
853     }
854     else if (NCH(n) == 5) { /* Call with no arguments */
855         d = Call(name_expr, NULL, NULL, NULL, NULL, LINENO(n),
856                  n->n_col_offset, c->c_arena);
857         if (!d)
858             return NULL;
859         name_expr = NULL;
860     }
861     else {
862         d = ast_for_call(c, CHILD(n, 3), name_expr);
863         if (!d)
864             return NULL;
865         name_expr = NULL;
866     }
867 
868     return d;
869 }
870 
871 static asdl_seq*
ast_for_decorators(struct compiling * c,const node * n)872 ast_for_decorators(struct compiling *c, const node *n)
873 {
874     asdl_seq* decorator_seq;
875     expr_ty d;
876     int i;
877 
878     REQ(n, decorators);
879     decorator_seq = asdl_seq_new(NCH(n), c->c_arena);
880     if (!decorator_seq)
881         return NULL;
882 
883     for (i = 0; i < NCH(n); i++) {
884         d = ast_for_decorator(c, CHILD(n, i));
885         if (!d)
886             return NULL;
887         asdl_seq_SET(decorator_seq, i, d);
888     }
889     return decorator_seq;
890 }
891 
892 static stmt_ty
ast_for_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)893 ast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
894 {
895     /* funcdef: 'def' NAME parameters ':' suite */
896     identifier name;
897     arguments_ty args;
898     asdl_seq *body;
899     int name_i = 1;
900 
901     REQ(n, funcdef);
902 
903     name = NEW_IDENTIFIER(CHILD(n, name_i));
904     if (!name)
905         return NULL;
906     else if (!forbidden_check(c, CHILD(n, name_i), STR(CHILD(n, name_i))))
907         return NULL;
908     args = ast_for_arguments(c, CHILD(n, name_i + 1));
909     if (!args)
910         return NULL;
911     body = ast_for_suite(c, CHILD(n, name_i + 3));
912     if (!body)
913         return NULL;
914 
915     return FunctionDef(name, args, body, decorator_seq, LINENO(n),
916                        n->n_col_offset, c->c_arena);
917 }
918 
919 static stmt_ty
ast_for_decorated(struct compiling * c,const node * n)920 ast_for_decorated(struct compiling *c, const node *n)
921 {
922     /* decorated: decorators (classdef | funcdef) */
923     stmt_ty thing = NULL;
924     asdl_seq *decorator_seq = NULL;
925 
926     REQ(n, decorated);
927 
928     decorator_seq = ast_for_decorators(c, CHILD(n, 0));
929     if (!decorator_seq)
930       return NULL;
931 
932     assert(TYPE(CHILD(n, 1)) == funcdef ||
933            TYPE(CHILD(n, 1)) == classdef);
934 
935     if (TYPE(CHILD(n, 1)) == funcdef) {
936       thing = ast_for_funcdef(c, CHILD(n, 1), decorator_seq);
937     } else if (TYPE(CHILD(n, 1)) == classdef) {
938       thing = ast_for_classdef(c, CHILD(n, 1), decorator_seq);
939     }
940     /* we count the decorators in when talking about the class' or
941        function's line number */
942     if (thing) {
943         thing->lineno = LINENO(n);
944         thing->col_offset = n->n_col_offset;
945     }
946     return thing;
947 }
948 
949 static expr_ty
ast_for_lambdef(struct compiling * c,const node * n)950 ast_for_lambdef(struct compiling *c, const node *n)
951 {
952     /* lambdef: 'lambda' [varargslist] ':' test */
953     arguments_ty args;
954     expr_ty expression;
955 
956     if (NCH(n) == 3) {
957         args = arguments(NULL, NULL, NULL, NULL, c->c_arena);
958         if (!args)
959             return NULL;
960         expression = ast_for_expr(c, CHILD(n, 2));
961         if (!expression)
962             return NULL;
963     }
964     else {
965         args = ast_for_arguments(c, CHILD(n, 1));
966         if (!args)
967             return NULL;
968         expression = ast_for_expr(c, CHILD(n, 3));
969         if (!expression)
970             return NULL;
971     }
972 
973     return Lambda(args, expression, LINENO(n), n->n_col_offset, c->c_arena);
974 }
975 
976 static expr_ty
ast_for_ifexpr(struct compiling * c,const node * n)977 ast_for_ifexpr(struct compiling *c, const node *n)
978 {
979     /* test: or_test 'if' or_test 'else' test */
980     expr_ty expression, body, orelse;
981 
982     assert(NCH(n) == 5);
983     body = ast_for_expr(c, CHILD(n, 0));
984     if (!body)
985         return NULL;
986     expression = ast_for_expr(c, CHILD(n, 2));
987     if (!expression)
988         return NULL;
989     orelse = ast_for_expr(c, CHILD(n, 4));
990     if (!orelse)
991         return NULL;
992     return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset,
993                  c->c_arena);
994 }
995 
996 /* XXX(nnorwitz): the listcomp and genexpr code should be refactored
997    so there is only a single version.  Possibly for loops can also re-use
998    the code.
999 */
1000 
1001 /* Count the number of 'for' loop in a list comprehension.
1002 
1003    Helper for ast_for_listcomp().
1004 */
1005 
1006 static int
count_list_fors(struct compiling * c,const node * n)1007 count_list_fors(struct compiling *c, const node *n)
1008 {
1009     int n_fors = 0;
1010     node *ch = CHILD(n, 1);
1011 
1012  count_list_for:
1013     n_fors++;
1014     REQ(ch, list_for);
1015     if (NCH(ch) == 5)
1016         ch = CHILD(ch, 4);
1017     else
1018         return n_fors;
1019  count_list_iter:
1020     REQ(ch, list_iter);
1021     ch = CHILD(ch, 0);
1022     if (TYPE(ch) == list_for)
1023         goto count_list_for;
1024     else if (TYPE(ch) == list_if) {
1025         if (NCH(ch) == 3) {
1026             ch = CHILD(ch, 2);
1027             goto count_list_iter;
1028         }
1029         else
1030             return n_fors;
1031     }
1032 
1033     /* Should never be reached */
1034     PyErr_SetString(PyExc_SystemError, "logic error in count_list_fors");
1035     return -1;
1036 }
1037 
1038 /* Count the number of 'if' statements in a list comprehension.
1039 
1040    Helper for ast_for_listcomp().
1041 */
1042 
1043 static int
count_list_ifs(struct compiling * c,const node * n)1044 count_list_ifs(struct compiling *c, const node *n)
1045 {
1046     int n_ifs = 0;
1047 
1048  count_list_iter:
1049     REQ(n, list_iter);
1050     if (TYPE(CHILD(n, 0)) == list_for)
1051         return n_ifs;
1052     n = CHILD(n, 0);
1053     REQ(n, list_if);
1054     n_ifs++;
1055     if (NCH(n) == 2)
1056         return n_ifs;
1057     n = CHILD(n, 2);
1058     goto count_list_iter;
1059 }
1060 
1061 static expr_ty
ast_for_listcomp(struct compiling * c,const node * n)1062 ast_for_listcomp(struct compiling *c, const node *n)
1063 {
1064     /* listmaker: test ( list_for | (',' test)* [','] )
1065        list_for: 'for' exprlist 'in' testlist_safe [list_iter]
1066        list_iter: list_for | list_if
1067        list_if: 'if' test [list_iter]
1068        testlist_safe: test [(',' test)+ [',']]
1069     */
1070     expr_ty elt, first;
1071     asdl_seq *listcomps;
1072     int i, n_fors;
1073     node *ch;
1074 
1075     REQ(n, listmaker);
1076     assert(NCH(n) > 1);
1077 
1078     elt = ast_for_expr(c, CHILD(n, 0));
1079     if (!elt)
1080         return NULL;
1081 
1082     n_fors = count_list_fors(c, n);
1083     if (n_fors == -1)
1084         return NULL;
1085 
1086     listcomps = asdl_seq_new(n_fors, c->c_arena);
1087     if (!listcomps)
1088         return NULL;
1089 
1090     ch = CHILD(n, 1);
1091     for (i = 0; i < n_fors; i++) {
1092         comprehension_ty lc;
1093         asdl_seq *t;
1094         expr_ty expression;
1095         node *for_ch;
1096 
1097         REQ(ch, list_for);
1098 
1099         for_ch = CHILD(ch, 1);
1100         t = ast_for_exprlist(c, for_ch, Store);
1101         if (!t)
1102             return NULL;
1103         expression = ast_for_testlist(c, CHILD(ch, 3));
1104         if (!expression)
1105             return NULL;
1106 
1107         /* Check the # of children rather than the length of t, since
1108            [x for x, in ... ] has 1 element in t, but still requires a Tuple.
1109         */
1110         first = (expr_ty)asdl_seq_GET(t, 0);
1111         if (NCH(for_ch) == 1)
1112             lc = comprehension(first, expression, NULL, c->c_arena);
1113         else
1114             lc = comprehension(Tuple(t, Store, first->lineno, first->col_offset,
1115                                      c->c_arena),
1116                                expression, NULL, c->c_arena);
1117         if (!lc)
1118             return NULL;
1119 
1120         if (NCH(ch) == 5) {
1121             int j, n_ifs;
1122             asdl_seq *ifs;
1123             expr_ty list_for_expr;
1124 
1125             ch = CHILD(ch, 4);
1126             n_ifs = count_list_ifs(c, ch);
1127             if (n_ifs == -1)
1128                 return NULL;
1129 
1130             ifs = asdl_seq_new(n_ifs, c->c_arena);
1131             if (!ifs)
1132                 return NULL;
1133 
1134             for (j = 0; j < n_ifs; j++) {
1135                 REQ(ch, list_iter);
1136                 ch = CHILD(ch, 0);
1137                 REQ(ch, list_if);
1138 
1139                 list_for_expr = ast_for_expr(c, CHILD(ch, 1));
1140                 if (!list_for_expr)
1141                     return NULL;
1142 
1143                 asdl_seq_SET(ifs, j, list_for_expr);
1144                 if (NCH(ch) == 3)
1145                     ch = CHILD(ch, 2);
1146             }
1147             /* on exit, must guarantee that ch is a list_for */
1148             if (TYPE(ch) == list_iter)
1149                 ch = CHILD(ch, 0);
1150             lc->ifs = ifs;
1151         }
1152         asdl_seq_SET(listcomps, i, lc);
1153     }
1154 
1155     return ListComp(elt, listcomps, LINENO(n), n->n_col_offset, c->c_arena);
1156 }
1157 
1158 /*
1159    Count the number of 'for' loops in a comprehension.
1160 
1161    Helper for ast_for_comprehension().
1162 */
1163 
1164 static int
count_comp_fors(struct compiling * c,const node * n)1165 count_comp_fors(struct compiling *c, const node *n)
1166 {
1167     int n_fors = 0;
1168 
1169   count_comp_for:
1170     n_fors++;
1171     REQ(n, comp_for);
1172     if (NCH(n) == 5)
1173         n = CHILD(n, 4);
1174     else
1175         return n_fors;
1176   count_comp_iter:
1177     REQ(n, comp_iter);
1178     n = CHILD(n, 0);
1179     if (TYPE(n) == comp_for)
1180         goto count_comp_for;
1181     else if (TYPE(n) == comp_if) {
1182         if (NCH(n) == 3) {
1183             n = CHILD(n, 2);
1184             goto count_comp_iter;
1185         }
1186         else
1187             return n_fors;
1188     }
1189 
1190     /* Should never be reached */
1191     PyErr_SetString(PyExc_SystemError,
1192                     "logic error in count_comp_fors");
1193     return -1;
1194 }
1195 
1196 /* Count the number of 'if' statements in a comprehension.
1197 
1198    Helper for ast_for_comprehension().
1199 */
1200 
1201 static int
count_comp_ifs(struct compiling * c,const node * n)1202 count_comp_ifs(struct compiling *c, const node *n)
1203 {
1204     int n_ifs = 0;
1205 
1206     while (1) {
1207         REQ(n, comp_iter);
1208         if (TYPE(CHILD(n, 0)) == comp_for)
1209             return n_ifs;
1210         n = CHILD(n, 0);
1211         REQ(n, comp_if);
1212         n_ifs++;
1213         if (NCH(n) == 2)
1214             return n_ifs;
1215         n = CHILD(n, 2);
1216     }
1217 }
1218 
1219 static asdl_seq *
ast_for_comprehension(struct compiling * c,const node * n)1220 ast_for_comprehension(struct compiling *c, const node *n)
1221 {
1222     int i, n_fors;
1223     asdl_seq *comps;
1224 
1225     n_fors = count_comp_fors(c, n);
1226     if (n_fors == -1)
1227         return NULL;
1228 
1229     comps = asdl_seq_new(n_fors, c->c_arena);
1230     if (!comps)
1231         return NULL;
1232 
1233     for (i = 0; i < n_fors; i++) {
1234         comprehension_ty comp;
1235         asdl_seq *t;
1236         expr_ty expression, first;
1237         node *for_ch;
1238 
1239         REQ(n, comp_for);
1240 
1241         for_ch = CHILD(n, 1);
1242         t = ast_for_exprlist(c, for_ch, Store);
1243         if (!t)
1244             return NULL;
1245         expression = ast_for_expr(c, CHILD(n, 3));
1246         if (!expression)
1247             return NULL;
1248 
1249         /* Check the # of children rather than the length of t, since
1250            (x for x, in ...) has 1 element in t, but still requires a Tuple. */
1251         first = (expr_ty)asdl_seq_GET(t, 0);
1252         if (NCH(for_ch) == 1)
1253             comp = comprehension(first, expression, NULL, c->c_arena);
1254         else
1255             comp = comprehension(Tuple(t, Store, first->lineno, first->col_offset,
1256                                      c->c_arena),
1257                                expression, NULL, c->c_arena);
1258         if (!comp)
1259             return NULL;
1260 
1261         if (NCH(n) == 5) {
1262             int j, n_ifs;
1263             asdl_seq *ifs;
1264 
1265             n = CHILD(n, 4);
1266             n_ifs = count_comp_ifs(c, n);
1267             if (n_ifs == -1)
1268                 return NULL;
1269 
1270             ifs = asdl_seq_new(n_ifs, c->c_arena);
1271             if (!ifs)
1272                 return NULL;
1273 
1274             for (j = 0; j < n_ifs; j++) {
1275                 REQ(n, comp_iter);
1276                 n = CHILD(n, 0);
1277                 REQ(n, comp_if);
1278 
1279                 expression = ast_for_expr(c, CHILD(n, 1));
1280                 if (!expression)
1281                     return NULL;
1282                 asdl_seq_SET(ifs, j, expression);
1283                 if (NCH(n) == 3)
1284                     n = CHILD(n, 2);
1285             }
1286             /* on exit, must guarantee that n is a comp_for */
1287             if (TYPE(n) == comp_iter)
1288                 n = CHILD(n, 0);
1289             comp->ifs = ifs;
1290         }
1291         asdl_seq_SET(comps, i, comp);
1292     }
1293     return comps;
1294 }
1295 
1296 static expr_ty
ast_for_itercomp(struct compiling * c,const node * n,int type)1297 ast_for_itercomp(struct compiling *c, const node *n, int type)
1298 {
1299     expr_ty elt;
1300     asdl_seq *comps;
1301 
1302     assert(NCH(n) > 1);
1303 
1304     elt = ast_for_expr(c, CHILD(n, 0));
1305     if (!elt)
1306         return NULL;
1307 
1308     comps = ast_for_comprehension(c, CHILD(n, 1));
1309     if (!comps)
1310         return NULL;
1311 
1312     if (type == COMP_GENEXP)
1313         return GeneratorExp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1314     else if (type == COMP_SETCOMP)
1315         return SetComp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1316     else
1317         /* Should never happen */
1318         return NULL;
1319 }
1320 
1321 static expr_ty
ast_for_dictcomp(struct compiling * c,const node * n)1322 ast_for_dictcomp(struct compiling *c, const node *n)
1323 {
1324     expr_ty key, value;
1325     asdl_seq *comps;
1326 
1327     assert(NCH(n) > 3);
1328     REQ(CHILD(n, 1), COLON);
1329 
1330     key = ast_for_expr(c, CHILD(n, 0));
1331     if (!key)
1332         return NULL;
1333 
1334     value = ast_for_expr(c, CHILD(n, 2));
1335     if (!value)
1336         return NULL;
1337 
1338     comps = ast_for_comprehension(c, CHILD(n, 3));
1339     if (!comps)
1340         return NULL;
1341 
1342     return DictComp(key, value, comps, LINENO(n), n->n_col_offset, c->c_arena);
1343 }
1344 
1345 static expr_ty
ast_for_genexp(struct compiling * c,const node * n)1346 ast_for_genexp(struct compiling *c, const node *n)
1347 {
1348     assert(TYPE(n) == (testlist_comp) || TYPE(n) == (argument));
1349     return ast_for_itercomp(c, n, COMP_GENEXP);
1350 }
1351 
1352 static expr_ty
ast_for_setcomp(struct compiling * c,const node * n)1353 ast_for_setcomp(struct compiling *c, const node *n)
1354 {
1355     assert(TYPE(n) == (dictorsetmaker));
1356     return ast_for_itercomp(c, n, COMP_SETCOMP);
1357 }
1358 
1359 static expr_ty
ast_for_atom(struct compiling * c,const node * n)1360 ast_for_atom(struct compiling *c, const node *n)
1361 {
1362     /* atom: '(' [yield_expr|testlist_comp] ')' | '[' [listmaker] ']'
1363        | '{' [dictmaker] '}' | '`' testlist '`' | NAME | NUMBER | STRING+
1364     */
1365     node *ch = CHILD(n, 0);
1366 
1367     switch (TYPE(ch)) {
1368     case NAME: {
1369         /* All names start in Load context, but may later be
1370            changed. */
1371         PyObject *name = NEW_IDENTIFIER(ch);
1372         if (!name)
1373             return NULL;
1374         return Name(name, Load, LINENO(n), n->n_col_offset, c->c_arena);
1375     }
1376     case STRING: {
1377         PyObject *str = parsestrplus(c, n);
1378         if (!str) {
1379 #ifdef Py_USING_UNICODE
1380             if (PyErr_ExceptionMatches(PyExc_UnicodeError)){
1381                 PyObject *type, *value, *tback, *errstr;
1382                 PyErr_Fetch(&type, &value, &tback);
1383                 errstr = PyObject_Str(value);
1384                 if (errstr) {
1385                     char *s = "";
1386                     char buf[128];
1387                     s = PyString_AsString(errstr);
1388                     PyOS_snprintf(buf, sizeof(buf), "(unicode error) %s", s);
1389                     ast_error(n, buf);
1390                     Py_DECREF(errstr);
1391                 } else {
1392                     ast_error(n, "(unicode error) unknown error");
1393                 }
1394                 Py_DECREF(type);
1395                 Py_DECREF(value);
1396                 Py_XDECREF(tback);
1397             }
1398 #endif
1399             return NULL;
1400         }
1401         PyArena_AddPyObject(c->c_arena, str);
1402         return Str(str, LINENO(n), n->n_col_offset, c->c_arena);
1403     }
1404     case NUMBER: {
1405         PyObject *pynum = parsenumber(c, STR(ch));
1406         if (!pynum)
1407             return NULL;
1408 
1409         PyArena_AddPyObject(c->c_arena, pynum);
1410         return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena);
1411     }
1412     case LPAR: /* some parenthesized expressions */
1413         ch = CHILD(n, 1);
1414 
1415         if (TYPE(ch) == RPAR)
1416             return Tuple(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
1417 
1418         if (TYPE(ch) == yield_expr)
1419             return ast_for_expr(c, ch);
1420 
1421         return ast_for_testlist_comp(c, ch);
1422     case LSQB: /* list (or list comprehension) */
1423         ch = CHILD(n, 1);
1424 
1425         if (TYPE(ch) == RSQB)
1426             return List(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
1427 
1428         REQ(ch, listmaker);
1429         if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
1430             asdl_seq *elts = seq_for_testlist(c, ch);
1431             if (!elts)
1432                 return NULL;
1433 
1434             return List(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
1435         }
1436         else
1437             return ast_for_listcomp(c, ch);
1438     case LBRACE: {
1439         /* dictorsetmaker:
1440          *    (test ':' test (comp_for | (',' test ':' test)* [','])) |
1441          *    (test (comp_for | (',' test)* [',']))
1442          */
1443         int i, size;
1444         asdl_seq *keys, *values;
1445 
1446         ch = CHILD(n, 1);
1447         if (TYPE(ch) == RBRACE) {
1448             /* it's an empty dict */
1449             return Dict(NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena);
1450         } else if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
1451             /* it's a simple set */
1452             asdl_seq *elts;
1453             size = (NCH(ch) + 1) / 2; /* +1 in case no trailing comma */
1454             elts = asdl_seq_new(size, c->c_arena);
1455             if (!elts)
1456                 return NULL;
1457             for (i = 0; i < NCH(ch); i += 2) {
1458                 expr_ty expression;
1459                 expression = ast_for_expr(c, CHILD(ch, i));
1460                 if (!expression)
1461                     return NULL;
1462                 asdl_seq_SET(elts, i / 2, expression);
1463             }
1464             return Set(elts, LINENO(n), n->n_col_offset, c->c_arena);
1465         } else if (TYPE(CHILD(ch, 1)) == comp_for) {
1466             /* it's a set comprehension */
1467             return ast_for_setcomp(c, ch);
1468         } else if (NCH(ch) > 3 && TYPE(CHILD(ch, 3)) == comp_for) {
1469             return ast_for_dictcomp(c, ch);
1470         } else {
1471             /* it's a dict */
1472             size = (NCH(ch) + 1) / 4; /* +1 in case no trailing comma */
1473             keys = asdl_seq_new(size, c->c_arena);
1474             if (!keys)
1475                 return NULL;
1476 
1477             values = asdl_seq_new(size, c->c_arena);
1478             if (!values)
1479                 return NULL;
1480 
1481             for (i = 0; i < NCH(ch); i += 4) {
1482                 expr_ty expression;
1483 
1484                 expression = ast_for_expr(c, CHILD(ch, i));
1485                 if (!expression)
1486                     return NULL;
1487 
1488                 asdl_seq_SET(keys, i / 4, expression);
1489 
1490                 expression = ast_for_expr(c, CHILD(ch, i + 2));
1491                 if (!expression)
1492                     return NULL;
1493 
1494                 asdl_seq_SET(values, i / 4, expression);
1495             }
1496             return Dict(keys, values, LINENO(n), n->n_col_offset, c->c_arena);
1497         }
1498     }
1499     case BACKQUOTE: { /* repr */
1500         expr_ty expression;
1501         if (Py_Py3kWarningFlag &&
1502             !ast_warn(c, n, "backquote not supported in 3.x; use repr()"))
1503             return NULL;
1504         expression = ast_for_testlist(c, CHILD(n, 1));
1505         if (!expression)
1506             return NULL;
1507 
1508         return Repr(expression, LINENO(n), n->n_col_offset, c->c_arena);
1509     }
1510     default:
1511         PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch));
1512         return NULL;
1513     }
1514 }
1515 
1516 static slice_ty
ast_for_slice(struct compiling * c,const node * n)1517 ast_for_slice(struct compiling *c, const node *n)
1518 {
1519     node *ch;
1520     expr_ty lower = NULL, upper = NULL, step = NULL;
1521 
1522     REQ(n, subscript);
1523 
1524     /*
1525        subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
1526        sliceop: ':' [test]
1527     */
1528     ch = CHILD(n, 0);
1529     if (TYPE(ch) == DOT)
1530         return Ellipsis(c->c_arena);
1531 
1532     if (NCH(n) == 1 && TYPE(ch) == test) {
1533         /* 'step' variable hold no significance in terms of being used over
1534            other vars */
1535         step = ast_for_expr(c, ch);
1536         if (!step)
1537             return NULL;
1538 
1539         return Index(step, c->c_arena);
1540     }
1541 
1542     if (TYPE(ch) == test) {
1543         lower = ast_for_expr(c, ch);
1544         if (!lower)
1545             return NULL;
1546     }
1547 
1548     /* If there's an upper bound it's in the second or third position. */
1549     if (TYPE(ch) == COLON) {
1550         if (NCH(n) > 1) {
1551             node *n2 = CHILD(n, 1);
1552 
1553             if (TYPE(n2) == test) {
1554                 upper = ast_for_expr(c, n2);
1555                 if (!upper)
1556                     return NULL;
1557             }
1558         }
1559     } else if (NCH(n) > 2) {
1560         node *n2 = CHILD(n, 2);
1561 
1562         if (TYPE(n2) == test) {
1563             upper = ast_for_expr(c, n2);
1564             if (!upper)
1565                 return NULL;
1566         }
1567     }
1568 
1569     ch = CHILD(n, NCH(n) - 1);
1570     if (TYPE(ch) == sliceop) {
1571         if (NCH(ch) == 1) {
1572             /*
1573               This is an extended slice (ie "x[::]") with no expression in the
1574               step field. We set this literally to "None" in order to
1575               disambiguate it from x[:]. (The interpreter might have to call
1576               __getslice__ for x[:], but it must call __getitem__ for x[::].)
1577             */
1578             identifier none = new_identifier("None", c->c_arena);
1579             if (!none)
1580                 return NULL;
1581             ch = CHILD(ch, 0);
1582             step = Name(none, Load, LINENO(ch), ch->n_col_offset, c->c_arena);
1583             if (!step)
1584                 return NULL;
1585         } else {
1586             ch = CHILD(ch, 1);
1587             if (TYPE(ch) == test) {
1588                 step = ast_for_expr(c, ch);
1589                 if (!step)
1590                     return NULL;
1591             }
1592         }
1593     }
1594 
1595     return Slice(lower, upper, step, c->c_arena);
1596 }
1597 
1598 static expr_ty
ast_for_binop(struct compiling * c,const node * n)1599 ast_for_binop(struct compiling *c, const node *n)
1600 {
1601         /* Must account for a sequence of expressions.
1602            How should A op B op C by represented?
1603            BinOp(BinOp(A, op, B), op, C).
1604         */
1605 
1606         int i, nops;
1607         expr_ty expr1, expr2, result;
1608         operator_ty newoperator;
1609 
1610         expr1 = ast_for_expr(c, CHILD(n, 0));
1611         if (!expr1)
1612             return NULL;
1613 
1614         expr2 = ast_for_expr(c, CHILD(n, 2));
1615         if (!expr2)
1616             return NULL;
1617 
1618         newoperator = get_operator(CHILD(n, 1));
1619         if (!newoperator)
1620             return NULL;
1621 
1622         result = BinOp(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
1623                        c->c_arena);
1624         if (!result)
1625             return NULL;
1626 
1627         nops = (NCH(n) - 1) / 2;
1628         for (i = 1; i < nops; i++) {
1629                 expr_ty tmp_result, tmp;
1630                 const node* next_oper = CHILD(n, i * 2 + 1);
1631 
1632                 newoperator = get_operator(next_oper);
1633                 if (!newoperator)
1634                     return NULL;
1635 
1636                 tmp = ast_for_expr(c, CHILD(n, i * 2 + 2));
1637                 if (!tmp)
1638                     return NULL;
1639 
1640                 tmp_result = BinOp(result, newoperator, tmp,
1641                                    LINENO(next_oper), next_oper->n_col_offset,
1642                                    c->c_arena);
1643                 if (!tmp_result)
1644                         return NULL;
1645                 result = tmp_result;
1646         }
1647         return result;
1648 }
1649 
1650 static expr_ty
ast_for_trailer(struct compiling * c,const node * n,expr_ty left_expr)1651 ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr)
1652 {
1653     /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
1654        subscriptlist: subscript (',' subscript)* [',']
1655        subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
1656      */
1657     REQ(n, trailer);
1658     if (TYPE(CHILD(n, 0)) == LPAR) {
1659         if (NCH(n) == 2)
1660             return Call(left_expr, NULL, NULL, NULL, NULL, LINENO(n),
1661                         n->n_col_offset, c->c_arena);
1662         else
1663             return ast_for_call(c, CHILD(n, 1), left_expr);
1664     }
1665     else if (TYPE(CHILD(n, 0)) == DOT ) {
1666         PyObject *attr_id = NEW_IDENTIFIER(CHILD(n, 1));
1667         if (!attr_id)
1668             return NULL;
1669         return Attribute(left_expr, attr_id, Load,
1670                          LINENO(n), n->n_col_offset, c->c_arena);
1671     }
1672     else {
1673         REQ(CHILD(n, 0), LSQB);
1674         REQ(CHILD(n, 2), RSQB);
1675         n = CHILD(n, 1);
1676         if (NCH(n) == 1) {
1677             slice_ty slc = ast_for_slice(c, CHILD(n, 0));
1678             if (!slc)
1679                 return NULL;
1680             return Subscript(left_expr, slc, Load, LINENO(n), n->n_col_offset,
1681                              c->c_arena);
1682         }
1683         else {
1684             /* The grammar is ambiguous here. The ambiguity is resolved
1685                by treating the sequence as a tuple literal if there are
1686                no slice features.
1687             */
1688             int j;
1689             slice_ty slc;
1690             expr_ty e;
1691             bool simple = true;
1692             asdl_seq *slices, *elts;
1693             slices = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1694             if (!slices)
1695                 return NULL;
1696             for (j = 0; j < NCH(n); j += 2) {
1697                 slc = ast_for_slice(c, CHILD(n, j));
1698                 if (!slc)
1699                     return NULL;
1700                 if (slc->kind != Index_kind)
1701                     simple = false;
1702                 asdl_seq_SET(slices, j / 2, slc);
1703             }
1704             if (!simple) {
1705                 return Subscript(left_expr, ExtSlice(slices, c->c_arena),
1706                                  Load, LINENO(n), n->n_col_offset, c->c_arena);
1707             }
1708             /* extract Index values and put them in a Tuple */
1709             elts = asdl_seq_new(asdl_seq_LEN(slices), c->c_arena);
1710             if (!elts)
1711                 return NULL;
1712             for (j = 0; j < asdl_seq_LEN(slices); ++j) {
1713                 slc = (slice_ty)asdl_seq_GET(slices, j);
1714                 assert(slc->kind == Index_kind  && slc->v.Index.value);
1715                 asdl_seq_SET(elts, j, slc->v.Index.value);
1716             }
1717             e = Tuple(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
1718             if (!e)
1719                 return NULL;
1720             return Subscript(left_expr, Index(e, c->c_arena),
1721                              Load, LINENO(n), n->n_col_offset, c->c_arena);
1722         }
1723     }
1724 }
1725 
1726 static expr_ty
ast_for_factor(struct compiling * c,const node * n)1727 ast_for_factor(struct compiling *c, const node *n)
1728 {
1729     node *pfactor, *ppower, *patom, *pnum;
1730     expr_ty expression;
1731 
1732     /* If the unary - operator is applied to a constant, don't generate
1733        a UNARY_NEGATIVE opcode.  Just store the approriate value as a
1734        constant.  The peephole optimizer already does something like
1735        this but it doesn't handle the case where the constant is
1736        (sys.maxint - 1).  In that case, we want a PyIntObject, not a
1737        PyLongObject.
1738     */
1739     if (TYPE(CHILD(n, 0)) == MINUS &&
1740         NCH(n) == 2 &&
1741         TYPE((pfactor = CHILD(n, 1))) == factor &&
1742         NCH(pfactor) == 1 &&
1743         TYPE((ppower = CHILD(pfactor, 0))) == power &&
1744         NCH(ppower) == 1 &&
1745         TYPE((patom = CHILD(ppower, 0))) == atom &&
1746         TYPE((pnum = CHILD(patom, 0))) == NUMBER) {
1747         PyObject *pynum;
1748         char *s = PyObject_MALLOC(strlen(STR(pnum)) + 2);
1749         if (s == NULL)
1750             return NULL;
1751         s[0] = '-';
1752         strcpy(s + 1, STR(pnum));
1753         pynum = parsenumber(c, s);
1754         PyObject_FREE(s);
1755         if (!pynum)
1756             return NULL;
1757 
1758         PyArena_AddPyObject(c->c_arena, pynum);
1759         return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena);
1760     }
1761 
1762     expression = ast_for_expr(c, CHILD(n, 1));
1763     if (!expression)
1764         return NULL;
1765 
1766     switch (TYPE(CHILD(n, 0))) {
1767         case PLUS:
1768             return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset,
1769                            c->c_arena);
1770         case MINUS:
1771             return UnaryOp(USub, expression, LINENO(n), n->n_col_offset,
1772                            c->c_arena);
1773         case TILDE:
1774             return UnaryOp(Invert, expression, LINENO(n),
1775                            n->n_col_offset, c->c_arena);
1776     }
1777     PyErr_Format(PyExc_SystemError, "unhandled factor: %d",
1778                  TYPE(CHILD(n, 0)));
1779     return NULL;
1780 }
1781 
1782 static expr_ty
ast_for_power(struct compiling * c,const node * n)1783 ast_for_power(struct compiling *c, const node *n)
1784 {
1785     /* power: atom trailer* ('**' factor)*
1786      */
1787     int i;
1788     expr_ty e, tmp;
1789     REQ(n, power);
1790     e = ast_for_atom(c, CHILD(n, 0));
1791     if (!e)
1792         return NULL;
1793     if (NCH(n) == 1)
1794         return e;
1795     for (i = 1; i < NCH(n); i++) {
1796         node *ch = CHILD(n, i);
1797         if (TYPE(ch) != trailer)
1798             break;
1799         tmp = ast_for_trailer(c, ch, e);
1800         if (!tmp)
1801             return NULL;
1802         tmp->lineno = e->lineno;
1803         tmp->col_offset = e->col_offset;
1804         e = tmp;
1805     }
1806     if (TYPE(CHILD(n, NCH(n) - 1)) == factor) {
1807         expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1));
1808         if (!f)
1809             return NULL;
1810         tmp = BinOp(e, Pow, f, LINENO(n), n->n_col_offset, c->c_arena);
1811         if (!tmp)
1812             return NULL;
1813         e = tmp;
1814     }
1815     return e;
1816 }
1817 
1818 /* Do not name a variable 'expr'!  Will cause a compile error.
1819 */
1820 
1821 static expr_ty
ast_for_expr(struct compiling * c,const node * n)1822 ast_for_expr(struct compiling *c, const node *n)
1823 {
1824     /* handle the full range of simple expressions
1825        test: or_test ['if' or_test 'else' test] | lambdef
1826        or_test: and_test ('or' and_test)*
1827        and_test: not_test ('and' not_test)*
1828        not_test: 'not' not_test | comparison
1829        comparison: expr (comp_op expr)*
1830        expr: xor_expr ('|' xor_expr)*
1831        xor_expr: and_expr ('^' and_expr)*
1832        and_expr: shift_expr ('&' shift_expr)*
1833        shift_expr: arith_expr (('<<'|'>>') arith_expr)*
1834        arith_expr: term (('+'|'-') term)*
1835        term: factor (('*'|'/'|'%'|'//') factor)*
1836        factor: ('+'|'-'|'~') factor | power
1837        power: atom trailer* ('**' factor)*
1838 
1839        As well as modified versions that exist for backward compatibility,
1840        to explicitly allow:
1841        [ x for x in lambda: 0, lambda: 1 ]
1842        (which would be ambiguous without these extra rules)
1843 
1844        old_test: or_test | old_lambdef
1845        old_lambdef: 'lambda' [vararglist] ':' old_test
1846 
1847     */
1848 
1849     asdl_seq *seq;
1850     int i;
1851 
1852  loop:
1853     switch (TYPE(n)) {
1854         case test:
1855         case old_test:
1856             if (TYPE(CHILD(n, 0)) == lambdef ||
1857                 TYPE(CHILD(n, 0)) == old_lambdef)
1858                 return ast_for_lambdef(c, CHILD(n, 0));
1859             else if (NCH(n) > 1)
1860                 return ast_for_ifexpr(c, n);
1861             /* Fallthrough */
1862         case or_test:
1863         case and_test:
1864             if (NCH(n) == 1) {
1865                 n = CHILD(n, 0);
1866                 goto loop;
1867             }
1868             seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1869             if (!seq)
1870                 return NULL;
1871             for (i = 0; i < NCH(n); i += 2) {
1872                 expr_ty e = ast_for_expr(c, CHILD(n, i));
1873                 if (!e)
1874                     return NULL;
1875                 asdl_seq_SET(seq, i / 2, e);
1876             }
1877             if (!strcmp(STR(CHILD(n, 1)), "and"))
1878                 return BoolOp(And, seq, LINENO(n), n->n_col_offset,
1879                               c->c_arena);
1880             assert(!strcmp(STR(CHILD(n, 1)), "or"));
1881             return BoolOp(Or, seq, LINENO(n), n->n_col_offset, c->c_arena);
1882         case not_test:
1883             if (NCH(n) == 1) {
1884                 n = CHILD(n, 0);
1885                 goto loop;
1886             }
1887             else {
1888                 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
1889                 if (!expression)
1890                     return NULL;
1891 
1892                 return UnaryOp(Not, expression, LINENO(n), n->n_col_offset,
1893                                c->c_arena);
1894             }
1895         case comparison:
1896             if (NCH(n) == 1) {
1897                 n = CHILD(n, 0);
1898                 goto loop;
1899             }
1900             else {
1901                 expr_ty expression;
1902                 asdl_int_seq *ops;
1903                 asdl_seq *cmps;
1904                 ops = asdl_int_seq_new(NCH(n) / 2, c->c_arena);
1905                 if (!ops)
1906                     return NULL;
1907                 cmps = asdl_seq_new(NCH(n) / 2, c->c_arena);
1908                 if (!cmps) {
1909                     return NULL;
1910                 }
1911                 for (i = 1; i < NCH(n); i += 2) {
1912                     cmpop_ty newoperator;
1913 
1914                     newoperator = ast_for_comp_op(c, CHILD(n, i));
1915                     if (!newoperator) {
1916                         return NULL;
1917                     }
1918 
1919                     expression = ast_for_expr(c, CHILD(n, i + 1));
1920                     if (!expression) {
1921                         return NULL;
1922                     }
1923 
1924                     asdl_seq_SET(ops, i / 2, newoperator);
1925                     asdl_seq_SET(cmps, i / 2, expression);
1926                 }
1927                 expression = ast_for_expr(c, CHILD(n, 0));
1928                 if (!expression) {
1929                     return NULL;
1930                 }
1931 
1932                 return Compare(expression, ops, cmps, LINENO(n),
1933                                n->n_col_offset, c->c_arena);
1934             }
1935             break;
1936 
1937         /* The next five cases all handle BinOps.  The main body of code
1938            is the same in each case, but the switch turned inside out to
1939            reuse the code for each type of operator.
1940          */
1941         case expr:
1942         case xor_expr:
1943         case and_expr:
1944         case shift_expr:
1945         case arith_expr:
1946         case term:
1947             if (NCH(n) == 1) {
1948                 n = CHILD(n, 0);
1949                 goto loop;
1950             }
1951             return ast_for_binop(c, n);
1952         case yield_expr: {
1953             expr_ty exp = NULL;
1954             if (NCH(n) == 2) {
1955                 exp = ast_for_testlist(c, CHILD(n, 1));
1956                 if (!exp)
1957                     return NULL;
1958             }
1959             return Yield(exp, LINENO(n), n->n_col_offset, c->c_arena);
1960         }
1961         case factor:
1962             if (NCH(n) == 1) {
1963                 n = CHILD(n, 0);
1964                 goto loop;
1965             }
1966             return ast_for_factor(c, n);
1967         case power:
1968             return ast_for_power(c, n);
1969         default:
1970             PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n));
1971             return NULL;
1972     }
1973     /* should never get here unless if error is set */
1974     return NULL;
1975 }
1976 
1977 static expr_ty
ast_for_call(struct compiling * c,const node * n,expr_ty func)1978 ast_for_call(struct compiling *c, const node *n, expr_ty func)
1979 {
1980     /*
1981       arglist: (argument ',')* (argument [',']| '*' test [',' '**' test]
1982                | '**' test)
1983       argument: [test '='] test [comp_for]        # Really [keyword '='] test
1984     */
1985 
1986     int i, nargs, nkeywords, ngens;
1987     asdl_seq *args;
1988     asdl_seq *keywords;
1989     expr_ty vararg = NULL, kwarg = NULL;
1990 
1991     REQ(n, arglist);
1992 
1993     nargs = 0;
1994     nkeywords = 0;
1995     ngens = 0;
1996     for (i = 0; i < NCH(n); i++) {
1997         node *ch = CHILD(n, i);
1998         if (TYPE(ch) == argument) {
1999             if (NCH(ch) == 1)
2000                 nargs++;
2001             else if (TYPE(CHILD(ch, 1)) == comp_for)
2002                 ngens++;
2003             else
2004                 nkeywords++;
2005         }
2006     }
2007     if (ngens > 1 || (ngens && (nargs || nkeywords))) {
2008         ast_error(n, "Generator expression must be parenthesized "
2009                   "if not sole argument");
2010         return NULL;
2011     }
2012 
2013     if (nargs + nkeywords + ngens > 255) {
2014       ast_error(n, "more than 255 arguments");
2015       return NULL;
2016     }
2017 
2018     args = asdl_seq_new(nargs + ngens, c->c_arena);
2019     if (!args)
2020         return NULL;
2021     keywords = asdl_seq_new(nkeywords, c->c_arena);
2022     if (!keywords)
2023         return NULL;
2024     nargs = 0;
2025     nkeywords = 0;
2026     for (i = 0; i < NCH(n); i++) {
2027         node *ch = CHILD(n, i);
2028         if (TYPE(ch) == argument) {
2029             expr_ty e;
2030             if (NCH(ch) == 1) {
2031                 if (nkeywords) {
2032                     ast_error(CHILD(ch, 0),
2033                               "non-keyword arg after keyword arg");
2034                     return NULL;
2035                 }
2036                 if (vararg) {
2037                     ast_error(CHILD(ch, 0),
2038                               "only named arguments may follow *expression");
2039                     return NULL;
2040                 }
2041                 e = ast_for_expr(c, CHILD(ch, 0));
2042                 if (!e)
2043                     return NULL;
2044                 asdl_seq_SET(args, nargs++, e);
2045             }
2046             else if (TYPE(CHILD(ch, 1)) == comp_for) {
2047                 e = ast_for_genexp(c, ch);
2048                 if (!e)
2049                     return NULL;
2050                 asdl_seq_SET(args, nargs++, e);
2051             }
2052             else {
2053                 keyword_ty kw;
2054                 identifier key;
2055                 int k;
2056                 char *tmp;
2057 
2058                 /* CHILD(ch, 0) is test, but must be an identifier? */
2059                 e = ast_for_expr(c, CHILD(ch, 0));
2060                 if (!e)
2061                     return NULL;
2062                 /* f(lambda x: x[0] = 3) ends up getting parsed with
2063                  * LHS test = lambda x: x[0], and RHS test = 3.
2064                  * SF bug 132313 points out that complaining about a keyword
2065                  * then is very confusing.
2066                  */
2067                 if (e->kind == Lambda_kind) {
2068                     ast_error(CHILD(ch, 0),
2069                               "lambda cannot contain assignment");
2070                     return NULL;
2071                 } else if (e->kind != Name_kind) {
2072                     ast_error(CHILD(ch, 0), "keyword can't be an expression");
2073                     return NULL;
2074                 }
2075                 key = e->v.Name.id;
2076                 if (!forbidden_check(c, CHILD(ch, 0), PyBytes_AS_STRING(key)))
2077                     return NULL;
2078                 for (k = 0; k < nkeywords; k++) {
2079                     tmp = PyString_AS_STRING(
2080                         ((keyword_ty)asdl_seq_GET(keywords, k))->arg);
2081                     if (!strcmp(tmp, PyString_AS_STRING(key))) {
2082                         ast_error(CHILD(ch, 0), "keyword argument repeated");
2083                         return NULL;
2084                     }
2085                 }
2086                 e = ast_for_expr(c, CHILD(ch, 2));
2087                 if (!e)
2088                     return NULL;
2089                 kw = keyword(key, e, c->c_arena);
2090                 if (!kw)
2091                     return NULL;
2092                 asdl_seq_SET(keywords, nkeywords++, kw);
2093             }
2094         }
2095         else if (TYPE(ch) == STAR) {
2096             vararg = ast_for_expr(c, CHILD(n, i+1));
2097             if (!vararg)
2098                 return NULL;
2099             i++;
2100         }
2101         else if (TYPE(ch) == DOUBLESTAR) {
2102             kwarg = ast_for_expr(c, CHILD(n, i+1));
2103             if (!kwarg)
2104                 return NULL;
2105             i++;
2106         }
2107     }
2108 
2109     return Call(func, args, keywords, vararg, kwarg, func->lineno,
2110                 func->col_offset, c->c_arena);
2111 }
2112 
2113 static expr_ty
ast_for_testlist(struct compiling * c,const node * n)2114 ast_for_testlist(struct compiling *c, const node* n)
2115 {
2116     /* testlist_comp: test (',' test)* [','] */
2117     /* testlist: test (',' test)* [','] */
2118     /* testlist_safe: test (',' test)+ [','] */
2119     /* testlist1: test (',' test)* */
2120     assert(NCH(n) > 0);
2121     if (TYPE(n) == testlist_comp) {
2122         if (NCH(n) > 1)
2123             assert(TYPE(CHILD(n, 1)) != comp_for);
2124     }
2125     else {
2126         assert(TYPE(n) == testlist ||
2127                TYPE(n) == testlist_safe ||
2128                TYPE(n) == testlist1);
2129     }
2130     if (NCH(n) == 1)
2131         return ast_for_expr(c, CHILD(n, 0));
2132     else {
2133         asdl_seq *tmp = seq_for_testlist(c, n);
2134         if (!tmp)
2135             return NULL;
2136         return Tuple(tmp, Load, LINENO(n), n->n_col_offset, c->c_arena);
2137     }
2138 }
2139 
2140 static expr_ty
ast_for_testlist_comp(struct compiling * c,const node * n)2141 ast_for_testlist_comp(struct compiling *c, const node* n)
2142 {
2143     /* testlist_comp: test ( comp_for | (',' test)* [','] ) */
2144     /* argument: test [ comp_for ] */
2145     assert(TYPE(n) == testlist_comp || TYPE(n) == argument);
2146     if (NCH(n) > 1 && TYPE(CHILD(n, 1)) == comp_for)
2147         return ast_for_genexp(c, n);
2148     return ast_for_testlist(c, n);
2149 }
2150 
2151 /* like ast_for_testlist() but returns a sequence */
2152 static asdl_seq*
ast_for_class_bases(struct compiling * c,const node * n)2153 ast_for_class_bases(struct compiling *c, const node* n)
2154 {
2155     /* testlist: test (',' test)* [','] */
2156     assert(NCH(n) > 0);
2157     REQ(n, testlist);
2158     if (NCH(n) == 1) {
2159         expr_ty base;
2160         asdl_seq *bases = asdl_seq_new(1, c->c_arena);
2161         if (!bases)
2162             return NULL;
2163         base = ast_for_expr(c, CHILD(n, 0));
2164         if (!base)
2165             return NULL;
2166         asdl_seq_SET(bases, 0, base);
2167         return bases;
2168     }
2169 
2170     return seq_for_testlist(c, n);
2171 }
2172 
2173 static stmt_ty
ast_for_expr_stmt(struct compiling * c,const node * n)2174 ast_for_expr_stmt(struct compiling *c, const node *n)
2175 {
2176     REQ(n, expr_stmt);
2177     /* expr_stmt: testlist (augassign (yield_expr|testlist)
2178                 | ('=' (yield_expr|testlist))*)
2179        testlist: test (',' test)* [',']
2180        augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^='
2181                 | '<<=' | '>>=' | '**=' | '//='
2182        test: ... here starts the operator precedence dance
2183      */
2184 
2185     if (NCH(n) == 1) {
2186         expr_ty e = ast_for_testlist(c, CHILD(n, 0));
2187         if (!e)
2188             return NULL;
2189 
2190         return Expr(e, LINENO(n), n->n_col_offset, c->c_arena);
2191     }
2192     else if (TYPE(CHILD(n, 1)) == augassign) {
2193         expr_ty expr1, expr2;
2194         operator_ty newoperator;
2195         node *ch = CHILD(n, 0);
2196 
2197         expr1 = ast_for_testlist(c, ch);
2198         if (!expr1)
2199             return NULL;
2200         if(!set_context(c, expr1, Store, ch))
2201             return NULL;
2202         /* set_context checks that most expressions are not the left side.
2203           Augmented assignments can only have a name, a subscript, or an
2204           attribute on the left, though, so we have to explicitly check for
2205           those. */
2206         switch (expr1->kind) {
2207             case Name_kind:
2208             case Attribute_kind:
2209             case Subscript_kind:
2210                 break;
2211             default:
2212                 ast_error(ch, "illegal expression for augmented assignment");
2213                 return NULL;
2214         }
2215 
2216         ch = CHILD(n, 2);
2217         if (TYPE(ch) == testlist)
2218             expr2 = ast_for_testlist(c, ch);
2219         else
2220             expr2 = ast_for_expr(c, ch);
2221         if (!expr2)
2222             return NULL;
2223 
2224         newoperator = ast_for_augassign(c, CHILD(n, 1));
2225         if (!newoperator)
2226             return NULL;
2227 
2228         return AugAssign(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
2229                          c->c_arena);
2230     }
2231     else {
2232         int i;
2233         asdl_seq *targets;
2234         node *value;
2235         expr_ty expression;
2236 
2237         /* a normal assignment */
2238         REQ(CHILD(n, 1), EQUAL);
2239         targets = asdl_seq_new(NCH(n) / 2, c->c_arena);
2240         if (!targets)
2241             return NULL;
2242         for (i = 0; i < NCH(n) - 2; i += 2) {
2243             expr_ty e;
2244             node *ch = CHILD(n, i);
2245             if (TYPE(ch) == yield_expr) {
2246                 ast_error(ch, "assignment to yield expression not possible");
2247                 return NULL;
2248             }
2249             e = ast_for_testlist(c, ch);
2250             if (!e)
2251                 return NULL;
2252 
2253             /* set context to assign */
2254             if (!set_context(c, e, Store, CHILD(n, i)))
2255                 return NULL;
2256 
2257             asdl_seq_SET(targets, i / 2, e);
2258         }
2259         value = CHILD(n, NCH(n) - 1);
2260         if (TYPE(value) == testlist)
2261             expression = ast_for_testlist(c, value);
2262         else
2263             expression = ast_for_expr(c, value);
2264         if (!expression)
2265             return NULL;
2266         return Assign(targets, expression, LINENO(n), n->n_col_offset,
2267                       c->c_arena);
2268     }
2269 }
2270 
2271 static stmt_ty
ast_for_print_stmt(struct compiling * c,const node * n)2272 ast_for_print_stmt(struct compiling *c, const node *n)
2273 {
2274     /* print_stmt: 'print' ( [ test (',' test)* [','] ]
2275                              | '>>' test [ (',' test)+ [','] ] )
2276      */
2277     expr_ty dest = NULL, expression;
2278     asdl_seq *seq = NULL;
2279     bool nl;
2280     int i, j, values_count, start = 1;
2281 
2282     REQ(n, print_stmt);
2283     if (NCH(n) >= 2 && TYPE(CHILD(n, 1)) == RIGHTSHIFT) {
2284         dest = ast_for_expr(c, CHILD(n, 2));
2285         if (!dest)
2286             return NULL;
2287         start = 4;
2288     }
2289     values_count = (NCH(n) + 1 - start) / 2;
2290     if (values_count) {
2291         seq = asdl_seq_new(values_count, c->c_arena);
2292         if (!seq)
2293             return NULL;
2294         for (i = start, j = 0; i < NCH(n); i += 2, ++j) {
2295             expression = ast_for_expr(c, CHILD(n, i));
2296             if (!expression)
2297                 return NULL;
2298             asdl_seq_SET(seq, j, expression);
2299         }
2300     }
2301     nl = (TYPE(CHILD(n, NCH(n) - 1)) == COMMA) ? false : true;
2302     return Print(dest, seq, nl, LINENO(n), n->n_col_offset, c->c_arena);
2303 }
2304 
2305 static asdl_seq *
ast_for_exprlist(struct compiling * c,const node * n,expr_context_ty context)2306 ast_for_exprlist(struct compiling *c, const node *n, expr_context_ty context)
2307 {
2308     asdl_seq *seq;
2309     int i;
2310     expr_ty e;
2311 
2312     REQ(n, exprlist);
2313 
2314     seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2315     if (!seq)
2316         return NULL;
2317     for (i = 0; i < NCH(n); i += 2) {
2318         e = ast_for_expr(c, CHILD(n, i));
2319         if (!e)
2320             return NULL;
2321         asdl_seq_SET(seq, i / 2, e);
2322         if (context && !set_context(c, e, context, CHILD(n, i)))
2323             return NULL;
2324     }
2325     return seq;
2326 }
2327 
2328 static stmt_ty
ast_for_del_stmt(struct compiling * c,const node * n)2329 ast_for_del_stmt(struct compiling *c, const node *n)
2330 {
2331     asdl_seq *expr_list;
2332 
2333     /* del_stmt: 'del' exprlist */
2334     REQ(n, del_stmt);
2335 
2336     expr_list = ast_for_exprlist(c, CHILD(n, 1), Del);
2337     if (!expr_list)
2338         return NULL;
2339     return Delete(expr_list, LINENO(n), n->n_col_offset, c->c_arena);
2340 }
2341 
2342 static stmt_ty
ast_for_flow_stmt(struct compiling * c,const node * n)2343 ast_for_flow_stmt(struct compiling *c, const node *n)
2344 {
2345     /*
2346       flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt
2347                  | yield_stmt
2348       break_stmt: 'break'
2349       continue_stmt: 'continue'
2350       return_stmt: 'return' [testlist]
2351       yield_stmt: yield_expr
2352       yield_expr: 'yield' testlist
2353       raise_stmt: 'raise' [test [',' test [',' test]]]
2354     */
2355     node *ch;
2356 
2357     REQ(n, flow_stmt);
2358     ch = CHILD(n, 0);
2359     switch (TYPE(ch)) {
2360         case break_stmt:
2361             return Break(LINENO(n), n->n_col_offset, c->c_arena);
2362         case continue_stmt:
2363             return Continue(LINENO(n), n->n_col_offset, c->c_arena);
2364         case yield_stmt: { /* will reduce to yield_expr */
2365             expr_ty exp = ast_for_expr(c, CHILD(ch, 0));
2366             if (!exp)
2367                 return NULL;
2368             return Expr(exp, LINENO(n), n->n_col_offset, c->c_arena);
2369         }
2370         case return_stmt:
2371             if (NCH(ch) == 1)
2372                 return Return(NULL, LINENO(n), n->n_col_offset, c->c_arena);
2373             else {
2374                 expr_ty expression = ast_for_testlist(c, CHILD(ch, 1));
2375                 if (!expression)
2376                     return NULL;
2377                 return Return(expression, LINENO(n), n->n_col_offset,
2378                               c->c_arena);
2379             }
2380         case raise_stmt:
2381             if (NCH(ch) == 1)
2382                 return Raise(NULL, NULL, NULL, LINENO(n), n->n_col_offset,
2383                              c->c_arena);
2384             else if (NCH(ch) == 2) {
2385                 expr_ty expression = ast_for_expr(c, CHILD(ch, 1));
2386                 if (!expression)
2387                     return NULL;
2388                 return Raise(expression, NULL, NULL, LINENO(n),
2389                              n->n_col_offset, c->c_arena);
2390             }
2391             else if (NCH(ch) == 4) {
2392                 expr_ty expr1, expr2;
2393 
2394                 expr1 = ast_for_expr(c, CHILD(ch, 1));
2395                 if (!expr1)
2396                     return NULL;
2397                 expr2 = ast_for_expr(c, CHILD(ch, 3));
2398                 if (!expr2)
2399                     return NULL;
2400 
2401                 return Raise(expr1, expr2, NULL, LINENO(n), n->n_col_offset,
2402                              c->c_arena);
2403             }
2404             else if (NCH(ch) == 6) {
2405                 expr_ty expr1, expr2, expr3;
2406 
2407                 expr1 = ast_for_expr(c, CHILD(ch, 1));
2408                 if (!expr1)
2409                     return NULL;
2410                 expr2 = ast_for_expr(c, CHILD(ch, 3));
2411                 if (!expr2)
2412                     return NULL;
2413                 expr3 = ast_for_expr(c, CHILD(ch, 5));
2414                 if (!expr3)
2415                     return NULL;
2416 
2417                 return Raise(expr1, expr2, expr3, LINENO(n), n->n_col_offset,
2418                              c->c_arena);
2419             }
2420         default:
2421             PyErr_Format(PyExc_SystemError,
2422                          "unexpected flow_stmt: %d", TYPE(ch));
2423             return NULL;
2424     }
2425 
2426     PyErr_SetString(PyExc_SystemError, "unhandled flow statement");
2427     return NULL;
2428 }
2429 
2430 static alias_ty
alias_for_import_name(struct compiling * c,const node * n,int store)2431 alias_for_import_name(struct compiling *c, const node *n, int store)
2432 {
2433     /*
2434       import_as_name: NAME ['as' NAME]
2435       dotted_as_name: dotted_name ['as' NAME]
2436       dotted_name: NAME ('.' NAME)*
2437     */
2438     PyObject *str, *name;
2439 
2440  loop:
2441     switch (TYPE(n)) {
2442          case import_as_name: {
2443             node *name_node = CHILD(n, 0);
2444             str = NULL;
2445             if (NCH(n) == 3) {
2446                 node *str_node = CHILD(n, 2);
2447                 if (store && !forbidden_check(c, str_node, STR(str_node)))
2448                     return NULL;
2449                 str = NEW_IDENTIFIER(str_node);
2450                 if (!str)
2451                     return NULL;
2452             }
2453             else {
2454                 if (!forbidden_check(c, name_node, STR(name_node)))
2455                     return NULL;
2456             }
2457             name = NEW_IDENTIFIER(name_node);
2458             if (!name)
2459                 return NULL;
2460             return alias(name, str, c->c_arena);
2461         }
2462         case dotted_as_name:
2463             if (NCH(n) == 1) {
2464                 n = CHILD(n, 0);
2465                 goto loop;
2466             }
2467             else {
2468                 node *asname_node = CHILD(n, 2);
2469                 alias_ty a = alias_for_import_name(c, CHILD(n, 0), 0);
2470                 if (!a)
2471                     return NULL;
2472                 assert(!a->asname);
2473                 if (!forbidden_check(c, asname_node, STR(asname_node)))
2474                     return NULL;
2475                 a->asname = NEW_IDENTIFIER(asname_node);
2476                 if (!a->asname)
2477                     return NULL;
2478                 return a;
2479             }
2480             break;
2481         case dotted_name:
2482             if (NCH(n) == 1) {
2483                 node *name_node = CHILD(n, 0);
2484                 if (store && !forbidden_check(c, name_node, STR(name_node)))
2485                     return NULL;
2486                 name = NEW_IDENTIFIER(name_node);
2487                 if (!name)
2488                     return NULL;
2489                 return alias(name, NULL, c->c_arena);
2490             }
2491             else {
2492                 /* Create a string of the form "a.b.c" */
2493                 int i;
2494                 size_t len;
2495                 char *s;
2496 
2497                 len = 0;
2498                 for (i = 0; i < NCH(n); i += 2)
2499                     /* length of string plus one for the dot */
2500                     len += strlen(STR(CHILD(n, i))) + 1;
2501                 len--; /* the last name doesn't have a dot */
2502                 str = PyString_FromStringAndSize(NULL, len);
2503                 if (!str)
2504                     return NULL;
2505                 s = PyString_AS_STRING(str);
2506                 if (!s)
2507                     return NULL;
2508                 for (i = 0; i < NCH(n); i += 2) {
2509                     char *sch = STR(CHILD(n, i));
2510                     strcpy(s, STR(CHILD(n, i)));
2511                     s += strlen(sch);
2512                     *s++ = '.';
2513                 }
2514                 --s;
2515                 *s = '\0';
2516                 PyString_InternInPlace(&str);
2517                 PyArena_AddPyObject(c->c_arena, str);
2518                 return alias(str, NULL, c->c_arena);
2519             }
2520             break;
2521         case STAR:
2522             str = PyString_InternFromString("*");
2523             if (!str)
2524                 return NULL;
2525             PyArena_AddPyObject(c->c_arena, str);
2526             return alias(str, NULL, c->c_arena);
2527         default:
2528             PyErr_Format(PyExc_SystemError,
2529                          "unexpected import name: %d", TYPE(n));
2530             return NULL;
2531     }
2532 
2533     PyErr_SetString(PyExc_SystemError, "unhandled import name condition");
2534     return NULL;
2535 }
2536 
2537 static stmt_ty
ast_for_import_stmt(struct compiling * c,const node * n)2538 ast_for_import_stmt(struct compiling *c, const node *n)
2539 {
2540     /*
2541       import_stmt: import_name | import_from
2542       import_name: 'import' dotted_as_names
2543       import_from: 'from' ('.'* dotted_name | '.') 'import'
2544                           ('*' | '(' import_as_names ')' | import_as_names)
2545     */
2546     int lineno;
2547     int col_offset;
2548     int i;
2549     asdl_seq *aliases;
2550 
2551     REQ(n, import_stmt);
2552     lineno = LINENO(n);
2553     col_offset = n->n_col_offset;
2554     n = CHILD(n, 0);
2555     if (TYPE(n) == import_name) {
2556         n = CHILD(n, 1);
2557         REQ(n, dotted_as_names);
2558         aliases = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2559         if (!aliases)
2560             return NULL;
2561         for (i = 0; i < NCH(n); i += 2) {
2562             alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
2563             if (!import_alias)
2564                 return NULL;
2565             asdl_seq_SET(aliases, i / 2, import_alias);
2566         }
2567         return Import(aliases, lineno, col_offset, c->c_arena);
2568     }
2569     else if (TYPE(n) == import_from) {
2570         int n_children;
2571         int idx, ndots = 0;
2572         alias_ty mod = NULL;
2573         identifier modname = NULL;
2574 
2575        /* Count the number of dots (for relative imports) and check for the
2576           optional module name */
2577         for (idx = 1; idx < NCH(n); idx++) {
2578             if (TYPE(CHILD(n, idx)) == dotted_name) {
2579                 mod = alias_for_import_name(c, CHILD(n, idx), 0);
2580                 if (!mod)
2581                     return NULL;
2582                 idx++;
2583                 break;
2584             } else if (TYPE(CHILD(n, idx)) != DOT) {
2585                 break;
2586             }
2587             ndots++;
2588         }
2589         idx++; /* skip over the 'import' keyword */
2590         switch (TYPE(CHILD(n, idx))) {
2591         case STAR:
2592             /* from ... import * */
2593             n = CHILD(n, idx);
2594             n_children = 1;
2595             break;
2596         case LPAR:
2597             /* from ... import (x, y, z) */
2598             n = CHILD(n, idx + 1);
2599             n_children = NCH(n);
2600             break;
2601         case import_as_names:
2602             /* from ... import x, y, z */
2603             n = CHILD(n, idx);
2604             n_children = NCH(n);
2605             if (n_children % 2 == 0) {
2606                 ast_error(n, "trailing comma not allowed without"
2607                              " surrounding parentheses");
2608                 return NULL;
2609             }
2610             break;
2611         default:
2612             ast_error(n, "Unexpected node-type in from-import");
2613             return NULL;
2614         }
2615 
2616         aliases = asdl_seq_new((n_children + 1) / 2, c->c_arena);
2617         if (!aliases)
2618             return NULL;
2619 
2620         /* handle "from ... import *" special b/c there's no children */
2621         if (TYPE(n) == STAR) {
2622             alias_ty import_alias = alias_for_import_name(c, n, 1);
2623             if (!import_alias)
2624                 return NULL;
2625                 asdl_seq_SET(aliases, 0, import_alias);
2626         }
2627         else {
2628             for (i = 0; i < NCH(n); i += 2) {
2629                 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
2630                 if (!import_alias)
2631                     return NULL;
2632                     asdl_seq_SET(aliases, i / 2, import_alias);
2633             }
2634         }
2635         if (mod != NULL)
2636             modname = mod->name;
2637         return ImportFrom(modname, aliases, ndots, lineno, col_offset,
2638                           c->c_arena);
2639     }
2640     PyErr_Format(PyExc_SystemError,
2641                  "unknown import statement: starts with command '%s'",
2642                  STR(CHILD(n, 0)));
2643     return NULL;
2644 }
2645 
2646 static stmt_ty
ast_for_global_stmt(struct compiling * c,const node * n)2647 ast_for_global_stmt(struct compiling *c, const node *n)
2648 {
2649     /* global_stmt: 'global' NAME (',' NAME)* */
2650     identifier name;
2651     asdl_seq *s;
2652     int i;
2653 
2654     REQ(n, global_stmt);
2655     s = asdl_seq_new(NCH(n) / 2, c->c_arena);
2656     if (!s)
2657         return NULL;
2658     for (i = 1; i < NCH(n); i += 2) {
2659         name = NEW_IDENTIFIER(CHILD(n, i));
2660         if (!name)
2661             return NULL;
2662         asdl_seq_SET(s, i / 2, name);
2663     }
2664     return Global(s, LINENO(n), n->n_col_offset, c->c_arena);
2665 }
2666 
2667 static stmt_ty
ast_for_exec_stmt(struct compiling * c,const node * n)2668 ast_for_exec_stmt(struct compiling *c, const node *n)
2669 {
2670     expr_ty expr1, globals = NULL, locals = NULL;
2671     int n_children = NCH(n);
2672     if (n_children != 2 && n_children != 4 && n_children != 6) {
2673         PyErr_Format(PyExc_SystemError,
2674                      "poorly formed 'exec' statement: %d parts to statement",
2675                      n_children);
2676         return NULL;
2677     }
2678 
2679     /* exec_stmt: 'exec' expr ['in' test [',' test]] */
2680     REQ(n, exec_stmt);
2681     expr1 = ast_for_expr(c, CHILD(n, 1));
2682     if (!expr1)
2683         return NULL;
2684 
2685     if (expr1->kind == Tuple_kind && n_children < 4 &&
2686         (asdl_seq_LEN(expr1->v.Tuple.elts) == 2 ||
2687          asdl_seq_LEN(expr1->v.Tuple.elts) == 3)) {
2688         /* Backwards compatibility: passing exec args as a tuple */
2689         globals = asdl_seq_GET(expr1->v.Tuple.elts, 1);
2690         if (asdl_seq_LEN(expr1->v.Tuple.elts) == 3) {
2691             locals = asdl_seq_GET(expr1->v.Tuple.elts, 2);
2692         }
2693         expr1 = asdl_seq_GET(expr1->v.Tuple.elts, 0);
2694     }
2695 
2696     if (n_children >= 4) {
2697         globals = ast_for_expr(c, CHILD(n, 3));
2698         if (!globals)
2699             return NULL;
2700     }
2701     if (n_children == 6) {
2702         locals = ast_for_expr(c, CHILD(n, 5));
2703         if (!locals)
2704             return NULL;
2705     }
2706 
2707     return Exec(expr1, globals, locals, LINENO(n), n->n_col_offset,
2708                 c->c_arena);
2709 }
2710 
2711 static stmt_ty
ast_for_assert_stmt(struct compiling * c,const node * n)2712 ast_for_assert_stmt(struct compiling *c, const node *n)
2713 {
2714     /* assert_stmt: 'assert' test [',' test] */
2715     REQ(n, assert_stmt);
2716     if (NCH(n) == 2) {
2717         expr_ty expression = ast_for_expr(c, CHILD(n, 1));
2718         if (!expression)
2719             return NULL;
2720         return Assert(expression, NULL, LINENO(n), n->n_col_offset,
2721                       c->c_arena);
2722     }
2723     else if (NCH(n) == 4) {
2724         expr_ty expr1, expr2;
2725 
2726         expr1 = ast_for_expr(c, CHILD(n, 1));
2727         if (!expr1)
2728             return NULL;
2729         expr2 = ast_for_expr(c, CHILD(n, 3));
2730         if (!expr2)
2731             return NULL;
2732 
2733         return Assert(expr1, expr2, LINENO(n), n->n_col_offset, c->c_arena);
2734     }
2735     PyErr_Format(PyExc_SystemError,
2736                  "improper number of parts to 'assert' statement: %d",
2737                  NCH(n));
2738     return NULL;
2739 }
2740 
2741 static asdl_seq *
ast_for_suite(struct compiling * c,const node * n)2742 ast_for_suite(struct compiling *c, const node *n)
2743 {
2744     /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */
2745     asdl_seq *seq;
2746     stmt_ty s;
2747     int i, total, num, end, pos = 0;
2748     node *ch;
2749 
2750     REQ(n, suite);
2751 
2752     total = num_stmts(n);
2753     seq = asdl_seq_new(total, c->c_arena);
2754     if (!seq)
2755         return NULL;
2756     if (TYPE(CHILD(n, 0)) == simple_stmt) {
2757         n = CHILD(n, 0);
2758         /* simple_stmt always ends with a NEWLINE,
2759            and may have a trailing SEMI
2760         */
2761         end = NCH(n) - 1;
2762         if (TYPE(CHILD(n, end - 1)) == SEMI)
2763             end--;
2764         /* loop by 2 to skip semi-colons */
2765         for (i = 0; i < end; i += 2) {
2766             ch = CHILD(n, i);
2767             s = ast_for_stmt(c, ch);
2768             if (!s)
2769                 return NULL;
2770             asdl_seq_SET(seq, pos++, s);
2771         }
2772     }
2773     else {
2774         for (i = 2; i < (NCH(n) - 1); i++) {
2775             ch = CHILD(n, i);
2776             REQ(ch, stmt);
2777             num = num_stmts(ch);
2778             if (num == 1) {
2779                 /* small_stmt or compound_stmt with only one child */
2780                 s = ast_for_stmt(c, ch);
2781                 if (!s)
2782                     return NULL;
2783                 asdl_seq_SET(seq, pos++, s);
2784             }
2785             else {
2786                 int j;
2787                 ch = CHILD(ch, 0);
2788                 REQ(ch, simple_stmt);
2789                 for (j = 0; j < NCH(ch); j += 2) {
2790                     /* statement terminates with a semi-colon ';' */
2791                     if (NCH(CHILD(ch, j)) == 0) {
2792                         assert((j + 1) == NCH(ch));
2793                         break;
2794                     }
2795                     s = ast_for_stmt(c, CHILD(ch, j));
2796                     if (!s)
2797                         return NULL;
2798                     asdl_seq_SET(seq, pos++, s);
2799                 }
2800             }
2801         }
2802     }
2803     assert(pos == seq->size);
2804     return seq;
2805 }
2806 
2807 static stmt_ty
ast_for_if_stmt(struct compiling * c,const node * n)2808 ast_for_if_stmt(struct compiling *c, const node *n)
2809 {
2810     /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)*
2811        ['else' ':' suite]
2812     */
2813     char *s;
2814 
2815     REQ(n, if_stmt);
2816 
2817     if (NCH(n) == 4) {
2818         expr_ty expression;
2819         asdl_seq *suite_seq;
2820 
2821         expression = ast_for_expr(c, CHILD(n, 1));
2822         if (!expression)
2823             return NULL;
2824         suite_seq = ast_for_suite(c, CHILD(n, 3));
2825         if (!suite_seq)
2826             return NULL;
2827 
2828         return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
2829                   c->c_arena);
2830     }
2831 
2832     s = STR(CHILD(n, 4));
2833     /* s[2], the third character in the string, will be
2834        's' for el_s_e, or
2835        'i' for el_i_f
2836     */
2837     if (s[2] == 's') {
2838         expr_ty expression;
2839         asdl_seq *seq1, *seq2;
2840 
2841         expression = ast_for_expr(c, CHILD(n, 1));
2842         if (!expression)
2843             return NULL;
2844         seq1 = ast_for_suite(c, CHILD(n, 3));
2845         if (!seq1)
2846             return NULL;
2847         seq2 = ast_for_suite(c, CHILD(n, 6));
2848         if (!seq2)
2849             return NULL;
2850 
2851         return If(expression, seq1, seq2, LINENO(n), n->n_col_offset,
2852                   c->c_arena);
2853     }
2854     else if (s[2] == 'i') {
2855         int i, n_elif, has_else = 0;
2856         expr_ty expression;
2857         asdl_seq *suite_seq;
2858         asdl_seq *orelse = NULL;
2859         n_elif = NCH(n) - 4;
2860         /* must reference the child n_elif+1 since 'else' token is third,
2861            not fourth, child from the end. */
2862         if (TYPE(CHILD(n, (n_elif + 1))) == NAME
2863             && STR(CHILD(n, (n_elif + 1)))[2] == 's') {
2864             has_else = 1;
2865             n_elif -= 3;
2866         }
2867         n_elif /= 4;
2868 
2869         if (has_else) {
2870             asdl_seq *suite_seq2;
2871 
2872             orelse = asdl_seq_new(1, c->c_arena);
2873             if (!orelse)
2874                 return NULL;
2875             expression = ast_for_expr(c, CHILD(n, NCH(n) - 6));
2876             if (!expression)
2877                 return NULL;
2878             suite_seq = ast_for_suite(c, CHILD(n, NCH(n) - 4));
2879             if (!suite_seq)
2880                 return NULL;
2881             suite_seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1));
2882             if (!suite_seq2)
2883                 return NULL;
2884 
2885             asdl_seq_SET(orelse, 0,
2886                          If(expression, suite_seq, suite_seq2,
2887                             LINENO(CHILD(n, NCH(n) - 6)),
2888                             CHILD(n, NCH(n) - 6)->n_col_offset,
2889                             c->c_arena));
2890             /* the just-created orelse handled the last elif */
2891             n_elif--;
2892         }
2893 
2894         for (i = 0; i < n_elif; i++) {
2895             int off = 5 + (n_elif - i - 1) * 4;
2896             asdl_seq *newobj = asdl_seq_new(1, c->c_arena);
2897             if (!newobj)
2898                 return NULL;
2899             expression = ast_for_expr(c, CHILD(n, off));
2900             if (!expression)
2901                 return NULL;
2902             suite_seq = ast_for_suite(c, CHILD(n, off + 2));
2903             if (!suite_seq)
2904                 return NULL;
2905 
2906             asdl_seq_SET(newobj, 0,
2907                          If(expression, suite_seq, orelse,
2908                             LINENO(CHILD(n, off)),
2909                             CHILD(n, off)->n_col_offset, c->c_arena));
2910             orelse = newobj;
2911         }
2912         expression = ast_for_expr(c, CHILD(n, 1));
2913         if (!expression)
2914             return NULL;
2915         suite_seq = ast_for_suite(c, CHILD(n, 3));
2916         if (!suite_seq)
2917             return NULL;
2918         return If(expression, suite_seq, orelse,
2919                   LINENO(n), n->n_col_offset, c->c_arena);
2920     }
2921 
2922     PyErr_Format(PyExc_SystemError,
2923                  "unexpected token in 'if' statement: %s", s);
2924     return NULL;
2925 }
2926 
2927 static stmt_ty
ast_for_while_stmt(struct compiling * c,const node * n)2928 ast_for_while_stmt(struct compiling *c, const node *n)
2929 {
2930     /* while_stmt: 'while' test ':' suite ['else' ':' suite] */
2931     REQ(n, while_stmt);
2932 
2933     if (NCH(n) == 4) {
2934         expr_ty expression;
2935         asdl_seq *suite_seq;
2936 
2937         expression = ast_for_expr(c, CHILD(n, 1));
2938         if (!expression)
2939             return NULL;
2940         suite_seq = ast_for_suite(c, CHILD(n, 3));
2941         if (!suite_seq)
2942             return NULL;
2943         return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
2944                      c->c_arena);
2945     }
2946     else if (NCH(n) == 7) {
2947         expr_ty expression;
2948         asdl_seq *seq1, *seq2;
2949 
2950         expression = ast_for_expr(c, CHILD(n, 1));
2951         if (!expression)
2952             return NULL;
2953         seq1 = ast_for_suite(c, CHILD(n, 3));
2954         if (!seq1)
2955             return NULL;
2956         seq2 = ast_for_suite(c, CHILD(n, 6));
2957         if (!seq2)
2958             return NULL;
2959 
2960         return While(expression, seq1, seq2, LINENO(n), n->n_col_offset,
2961                      c->c_arena);
2962     }
2963 
2964     PyErr_Format(PyExc_SystemError,
2965                  "wrong number of tokens for 'while' statement: %d",
2966                  NCH(n));
2967     return NULL;
2968 }
2969 
2970 static stmt_ty
ast_for_for_stmt(struct compiling * c,const node * n)2971 ast_for_for_stmt(struct compiling *c, const node *n)
2972 {
2973     asdl_seq *_target, *seq = NULL, *suite_seq;
2974     expr_ty expression;
2975     expr_ty target, first;
2976     const node *node_target;
2977     /* for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] */
2978     REQ(n, for_stmt);
2979 
2980     if (NCH(n) == 9) {
2981         seq = ast_for_suite(c, CHILD(n, 8));
2982         if (!seq)
2983             return NULL;
2984     }
2985 
2986     node_target = CHILD(n, 1);
2987     _target = ast_for_exprlist(c, node_target, Store);
2988     if (!_target)
2989         return NULL;
2990     /* Check the # of children rather than the length of _target, since
2991        for x, in ... has 1 element in _target, but still requires a Tuple. */
2992     first = (expr_ty)asdl_seq_GET(_target, 0);
2993     if (NCH(node_target) == 1)
2994         target = first;
2995     else
2996         target = Tuple(_target, Store, first->lineno, first->col_offset, c->c_arena);
2997 
2998     expression = ast_for_testlist(c, CHILD(n, 3));
2999     if (!expression)
3000         return NULL;
3001     suite_seq = ast_for_suite(c, CHILD(n, 5));
3002     if (!suite_seq)
3003         return NULL;
3004 
3005     return For(target, expression, suite_seq, seq, LINENO(n), n->n_col_offset,
3006                c->c_arena);
3007 }
3008 
3009 static excepthandler_ty
ast_for_except_clause(struct compiling * c,const node * exc,node * body)3010 ast_for_except_clause(struct compiling *c, const node *exc, node *body)
3011 {
3012     /* except_clause: 'except' [test [(',' | 'as') test]] */
3013     REQ(exc, except_clause);
3014     REQ(body, suite);
3015 
3016     if (NCH(exc) == 1) {
3017         asdl_seq *suite_seq = ast_for_suite(c, body);
3018         if (!suite_seq)
3019             return NULL;
3020 
3021         return ExceptHandler(NULL, NULL, suite_seq, LINENO(exc),
3022                              exc->n_col_offset, c->c_arena);
3023     }
3024     else if (NCH(exc) == 2) {
3025         expr_ty expression;
3026         asdl_seq *suite_seq;
3027 
3028         expression = ast_for_expr(c, CHILD(exc, 1));
3029         if (!expression)
3030             return NULL;
3031         suite_seq = ast_for_suite(c, body);
3032         if (!suite_seq)
3033             return NULL;
3034 
3035         return ExceptHandler(expression, NULL, suite_seq, LINENO(exc),
3036                              exc->n_col_offset, c->c_arena);
3037     }
3038     else if (NCH(exc) == 4) {
3039         asdl_seq *suite_seq;
3040         expr_ty expression;
3041         expr_ty e = ast_for_expr(c, CHILD(exc, 3));
3042         if (!e)
3043             return NULL;
3044         if (!set_context(c, e, Store, CHILD(exc, 3)))
3045             return NULL;
3046         expression = ast_for_expr(c, CHILD(exc, 1));
3047         if (!expression)
3048             return NULL;
3049         suite_seq = ast_for_suite(c, body);
3050         if (!suite_seq)
3051             return NULL;
3052 
3053         return ExceptHandler(expression, e, suite_seq, LINENO(exc),
3054                              exc->n_col_offset, c->c_arena);
3055     }
3056 
3057     PyErr_Format(PyExc_SystemError,
3058                  "wrong number of children for 'except' clause: %d",
3059                  NCH(exc));
3060     return NULL;
3061 }
3062 
3063 static stmt_ty
ast_for_try_stmt(struct compiling * c,const node * n)3064 ast_for_try_stmt(struct compiling *c, const node *n)
3065 {
3066     const int nch = NCH(n);
3067     int n_except = (nch - 3)/3;
3068     asdl_seq *body, *orelse = NULL, *finally = NULL;
3069 
3070     REQ(n, try_stmt);
3071 
3072     body = ast_for_suite(c, CHILD(n, 2));
3073     if (body == NULL)
3074         return NULL;
3075 
3076     if (TYPE(CHILD(n, nch - 3)) == NAME) {
3077         if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) {
3078             if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) {
3079                 /* we can assume it's an "else",
3080                    because nch >= 9 for try-else-finally and
3081                    it would otherwise have a type of except_clause */
3082                 orelse = ast_for_suite(c, CHILD(n, nch - 4));
3083                 if (orelse == NULL)
3084                     return NULL;
3085                 n_except--;
3086             }
3087 
3088             finally = ast_for_suite(c, CHILD(n, nch - 1));
3089             if (finally == NULL)
3090                 return NULL;
3091             n_except--;
3092         }
3093         else {
3094             /* we can assume it's an "else",
3095                otherwise it would have a type of except_clause */
3096             orelse = ast_for_suite(c, CHILD(n, nch - 1));
3097             if (orelse == NULL)
3098                 return NULL;
3099             n_except--;
3100         }
3101     }
3102     else if (TYPE(CHILD(n, nch - 3)) != except_clause) {
3103         ast_error(n, "malformed 'try' statement");
3104         return NULL;
3105     }
3106 
3107     if (n_except > 0) {
3108         int i;
3109         stmt_ty except_st;
3110         /* process except statements to create a try ... except */
3111         asdl_seq *handlers = asdl_seq_new(n_except, c->c_arena);
3112         if (handlers == NULL)
3113             return NULL;
3114 
3115         for (i = 0; i < n_except; i++) {
3116             excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3),
3117                                                        CHILD(n, 5 + i * 3));
3118             if (!e)
3119                 return NULL;
3120             asdl_seq_SET(handlers, i, e);
3121         }
3122 
3123         except_st = TryExcept(body, handlers, orelse, LINENO(n),
3124                               n->n_col_offset, c->c_arena);
3125         if (!finally)
3126             return except_st;
3127 
3128         /* if a 'finally' is present too, we nest the TryExcept within a
3129            TryFinally to emulate try ... except ... finally */
3130         body = asdl_seq_new(1, c->c_arena);
3131         if (body == NULL)
3132             return NULL;
3133         asdl_seq_SET(body, 0, except_st);
3134     }
3135 
3136     /* must be a try ... finally (except clauses are in body, if any exist) */
3137     assert(finally != NULL);
3138     return TryFinally(body, finally, LINENO(n), n->n_col_offset, c->c_arena);
3139 }
3140 
3141 /* with_item: test ['as' expr] */
3142 static stmt_ty
ast_for_with_item(struct compiling * c,const node * n,asdl_seq * content)3143 ast_for_with_item(struct compiling *c, const node *n, asdl_seq *content)
3144 {
3145     expr_ty context_expr, optional_vars = NULL;
3146 
3147     REQ(n, with_item);
3148     context_expr = ast_for_expr(c, CHILD(n, 0));
3149     if (!context_expr)
3150         return NULL;
3151     if (NCH(n) == 3) {
3152         optional_vars = ast_for_expr(c, CHILD(n, 2));
3153 
3154         if (!optional_vars) {
3155             return NULL;
3156         }
3157         if (!set_context(c, optional_vars, Store, n)) {
3158             return NULL;
3159         }
3160     }
3161 
3162     return With(context_expr, optional_vars, content, LINENO(n),
3163                 n->n_col_offset, c->c_arena);
3164 }
3165 
3166 /* with_stmt: 'with' with_item (',' with_item)* ':' suite */
3167 static stmt_ty
ast_for_with_stmt(struct compiling * c,const node * n)3168 ast_for_with_stmt(struct compiling *c, const node *n)
3169 {
3170     int i;
3171     stmt_ty ret;
3172     asdl_seq *inner;
3173 
3174     REQ(n, with_stmt);
3175 
3176     /* process the with items inside-out */
3177     i = NCH(n) - 1;
3178     /* the suite of the innermost with item is the suite of the with stmt */
3179     inner = ast_for_suite(c, CHILD(n, i));
3180     if (!inner)
3181         return NULL;
3182 
3183     for (;;) {
3184         i -= 2;
3185         ret = ast_for_with_item(c, CHILD(n, i), inner);
3186         if (!ret)
3187             return NULL;
3188         /* was this the last item? */
3189         if (i == 1)
3190             break;
3191         /* if not, wrap the result so far in a new sequence */
3192         inner = asdl_seq_new(1, c->c_arena);
3193         if (!inner)
3194             return NULL;
3195         asdl_seq_SET(inner, 0, ret);
3196     }
3197 
3198     return ret;
3199 }
3200 
3201 static stmt_ty
ast_for_classdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)3202 ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
3203 {
3204     /* classdef: 'class' NAME ['(' testlist ')'] ':' suite */
3205     PyObject *classname;
3206     asdl_seq *bases, *s;
3207 
3208     REQ(n, classdef);
3209 
3210     if (!forbidden_check(c, n, STR(CHILD(n, 1))))
3211             return NULL;
3212 
3213     if (NCH(n) == 4) {
3214         s = ast_for_suite(c, CHILD(n, 3));
3215         if (!s)
3216             return NULL;
3217         classname = NEW_IDENTIFIER(CHILD(n, 1));
3218         if (!classname)
3219             return NULL;
3220         return ClassDef(classname, NULL, s, decorator_seq, LINENO(n),
3221                         n->n_col_offset, c->c_arena);
3222     }
3223     /* check for empty base list */
3224     if (TYPE(CHILD(n,3)) == RPAR) {
3225         s = ast_for_suite(c, CHILD(n,5));
3226         if (!s)
3227             return NULL;
3228         classname = NEW_IDENTIFIER(CHILD(n, 1));
3229         if (!classname)
3230             return NULL;
3231         return ClassDef(classname, NULL, s, decorator_seq, LINENO(n),
3232                         n->n_col_offset, c->c_arena);
3233     }
3234 
3235     /* else handle the base class list */
3236     bases = ast_for_class_bases(c, CHILD(n, 3));
3237     if (!bases)
3238         return NULL;
3239 
3240     s = ast_for_suite(c, CHILD(n, 6));
3241     if (!s)
3242         return NULL;
3243     classname = NEW_IDENTIFIER(CHILD(n, 1));
3244     if (!classname)
3245         return NULL;
3246     return ClassDef(classname, bases, s, decorator_seq,
3247                     LINENO(n), n->n_col_offset, c->c_arena);
3248 }
3249 
3250 static stmt_ty
ast_for_stmt(struct compiling * c,const node * n)3251 ast_for_stmt(struct compiling *c, const node *n)
3252 {
3253     if (TYPE(n) == stmt) {
3254         assert(NCH(n) == 1);
3255         n = CHILD(n, 0);
3256     }
3257     if (TYPE(n) == simple_stmt) {
3258         assert(num_stmts(n) == 1);
3259         n = CHILD(n, 0);
3260     }
3261     if (TYPE(n) == small_stmt) {
3262         n = CHILD(n, 0);
3263         /* small_stmt: expr_stmt | print_stmt  | del_stmt | pass_stmt
3264                      | flow_stmt | import_stmt | global_stmt | exec_stmt
3265                      | assert_stmt
3266         */
3267         switch (TYPE(n)) {
3268             case expr_stmt:
3269                 return ast_for_expr_stmt(c, n);
3270             case print_stmt:
3271                 return ast_for_print_stmt(c, n);
3272             case del_stmt:
3273                 return ast_for_del_stmt(c, n);
3274             case pass_stmt:
3275                 return Pass(LINENO(n), n->n_col_offset, c->c_arena);
3276             case flow_stmt:
3277                 return ast_for_flow_stmt(c, n);
3278             case import_stmt:
3279                 return ast_for_import_stmt(c, n);
3280             case global_stmt:
3281                 return ast_for_global_stmt(c, n);
3282             case exec_stmt:
3283                 return ast_for_exec_stmt(c, n);
3284             case assert_stmt:
3285                 return ast_for_assert_stmt(c, n);
3286             default:
3287                 PyErr_Format(PyExc_SystemError,
3288                              "unhandled small_stmt: TYPE=%d NCH=%d\n",
3289                              TYPE(n), NCH(n));
3290                 return NULL;
3291         }
3292     }
3293     else {
3294         /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt
3295                         | funcdef | classdef | decorated
3296         */
3297         node *ch = CHILD(n, 0);
3298         REQ(n, compound_stmt);
3299         switch (TYPE(ch)) {
3300             case if_stmt:
3301                 return ast_for_if_stmt(c, ch);
3302             case while_stmt:
3303                 return ast_for_while_stmt(c, ch);
3304             case for_stmt:
3305                 return ast_for_for_stmt(c, ch);
3306             case try_stmt:
3307                 return ast_for_try_stmt(c, ch);
3308             case with_stmt:
3309                 return ast_for_with_stmt(c, ch);
3310             case funcdef:
3311                 return ast_for_funcdef(c, ch, NULL);
3312             case classdef:
3313                 return ast_for_classdef(c, ch, NULL);
3314             case decorated:
3315                 return ast_for_decorated(c, ch);
3316             default:
3317                 PyErr_Format(PyExc_SystemError,
3318                              "unhandled small_stmt: TYPE=%d NCH=%d\n",
3319                              TYPE(n), NCH(n));
3320                 return NULL;
3321         }
3322     }
3323 }
3324 
3325 static PyObject *
parsenumber(struct compiling * c,const char * s)3326 parsenumber(struct compiling *c, const char *s)
3327 {
3328         const char *end;
3329         long x;
3330         double dx;
3331 #ifndef WITHOUT_COMPLEX
3332         Py_complex complex;
3333         int imflag;
3334 #endif
3335 
3336         assert(s != NULL);
3337         errno = 0;
3338         end = s + strlen(s) - 1;
3339 #ifndef WITHOUT_COMPLEX
3340         imflag = *end == 'j' || *end == 'J';
3341 #endif
3342         if (*end == 'l' || *end == 'L')
3343                 return PyLong_FromString((char *)s, (char **)0, 0);
3344         x = PyOS_strtol((char *)s, (char **)&end, 0);
3345         if (*end == '\0') {
3346                 if (errno != 0)
3347                         return PyLong_FromString((char *)s, (char **)0, 0);
3348                 return PyInt_FromLong(x);
3349         }
3350         /* XXX Huge floats may silently fail */
3351 #ifndef WITHOUT_COMPLEX
3352         if (imflag) {
3353                 complex.real = 0.;
3354                 complex.imag = PyOS_string_to_double(s, (char **)&end, NULL);
3355                 if (complex.imag == -1.0 && PyErr_Occurred())
3356                         return NULL;
3357                 return PyComplex_FromCComplex(complex);
3358         }
3359         else
3360 #endif
3361         {
3362                 dx = PyOS_string_to_double(s, NULL, NULL);
3363                 if (dx == -1.0 && PyErr_Occurred())
3364                         return NULL;
3365                 return PyFloat_FromDouble(dx);
3366         }
3367 }
3368 
3369 static PyObject *
decode_utf8(struct compiling * c,const char ** sPtr,const char * end,char * encoding)3370 decode_utf8(struct compiling *c, const char **sPtr, const char *end, char* encoding)
3371 {
3372 #ifndef Py_USING_UNICODE
3373         Py_FatalError("decode_utf8 should not be called in this build.");
3374         return NULL;
3375 #else
3376         PyObject *u, *v;
3377         char *s, *t;
3378         t = s = (char *)*sPtr;
3379         /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
3380         while (s < end && (*s & 0x80)) s++;
3381         *sPtr = s;
3382         u = PyUnicode_DecodeUTF8(t, s - t, NULL);
3383         if (u == NULL)
3384                 return NULL;
3385         v = PyUnicode_AsEncodedString(u, encoding, NULL);
3386         Py_DECREF(u);
3387         return v;
3388 #endif
3389 }
3390 
3391 #ifdef Py_USING_UNICODE
3392 static PyObject *
decode_unicode(struct compiling * c,const char * s,size_t len,int rawmode,const char * encoding)3393 decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, const char *encoding)
3394 {
3395         PyObject *v;
3396         PyObject *u = NULL;
3397         char *buf;
3398         char *p;
3399         const char *end;
3400         if (encoding != NULL && strcmp(encoding, "iso-8859-1")) {
3401                 /* check for integer overflow */
3402                 if (len > PY_SIZE_MAX / 6)
3403                         return NULL;
3404                 /* "<C3><A4>" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
3405                    "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
3406                 u = PyString_FromStringAndSize((char *)NULL, len * 6);
3407                 if (u == NULL)
3408                         return NULL;
3409                 p = buf = PyString_AsString(u);
3410                 end = s + len;
3411                 while (s < end) {
3412                         if (*s == '\\') {
3413                                 *p++ = *s++;
3414                                 if (*s & 0x80) {
3415                                         strcpy(p, "u005c");
3416                                         p += 5;
3417                                 }
3418                         }
3419                         if (*s & 0x80) { /* XXX inefficient */
3420                                 PyObject *w;
3421                                 char *r;
3422                                 Py_ssize_t rn, i;
3423                                 w = decode_utf8(c, &s, end, "utf-32-be");
3424                                 if (w == NULL) {
3425                                         Py_DECREF(u);
3426                                         return NULL;
3427                                 }
3428                                 r = PyString_AsString(w);
3429                                 rn = PyString_Size(w);
3430                                 assert(rn % 4 == 0);
3431                                 for (i = 0; i < rn; i += 4) {
3432                                         sprintf(p, "\\U%02x%02x%02x%02x",
3433                                                 r[i + 0] & 0xFF,
3434                                                 r[i + 1] & 0xFF,
3435                                                 r[i + 2] & 0xFF,
3436                                                 r[i + 3] & 0xFF);
3437                                         p += 10;
3438                                 }
3439                                 Py_DECREF(w);
3440                         } else {
3441                                 *p++ = *s++;
3442                         }
3443                 }
3444                 len = p - buf;
3445                 s = buf;
3446         }
3447         if (rawmode)
3448                 v = PyUnicode_DecodeRawUnicodeEscape(s, len, NULL);
3449         else
3450                 v = PyUnicode_DecodeUnicodeEscape(s, len, NULL);
3451         Py_XDECREF(u);
3452         return v;
3453 }
3454 #endif
3455 
3456 /* s is a Python string literal, including the bracketing quote characters,
3457  * and r &/or u prefixes (if any), and embedded escape sequences (if any).
3458  * parsestr parses it, and returns the decoded Python string object.
3459  */
3460 static PyObject *
parsestr(struct compiling * c,const node * n,const char * s)3461 parsestr(struct compiling *c, const node *n, const char *s)
3462 {
3463         size_t len, i;
3464         int quote = Py_CHARMASK(*s);
3465         int rawmode = 0;
3466         int need_encoding;
3467         int unicode = c->c_future_unicode;
3468         int bytes = 0;
3469 
3470         if (isalpha(quote) || quote == '_') {
3471                 if (quote == 'u' || quote == 'U') {
3472                         quote = *++s;
3473                         unicode = 1;
3474                 }
3475                 if (quote == 'b' || quote == 'B') {
3476                         quote = *++s;
3477                         unicode = 0;
3478                         bytes = 1;
3479                 }
3480                 if (quote == 'r' || quote == 'R') {
3481                         quote = *++s;
3482                         rawmode = 1;
3483                 }
3484         }
3485         if (quote != '\'' && quote != '\"') {
3486                 PyErr_BadInternalCall();
3487                 return NULL;
3488         }
3489         s++;
3490         len = strlen(s);
3491         if (len > INT_MAX) {
3492                 PyErr_SetString(PyExc_OverflowError,
3493                                 "string to parse is too long");
3494                 return NULL;
3495         }
3496         if (s[--len] != quote) {
3497                 PyErr_BadInternalCall();
3498                 return NULL;
3499         }
3500         if (len >= 4 && s[0] == quote && s[1] == quote) {
3501                 s += 2;
3502                 len -= 2;
3503                 if (s[--len] != quote || s[--len] != quote) {
3504                         PyErr_BadInternalCall();
3505                         return NULL;
3506                 }
3507         }
3508         if (Py_Py3kWarningFlag && bytes) {
3509             for (i = 0; i < len; i++) {
3510                 if ((unsigned char)s[i] > 127) {
3511                     if (!ast_warn(c, n,
3512                         "non-ascii bytes literals not supported in 3.x"))
3513                         return NULL;
3514                     break;
3515                 }
3516             }
3517         }
3518 #ifdef Py_USING_UNICODE
3519         if (unicode || Py_UnicodeFlag) {
3520                 return decode_unicode(c, s, len, rawmode, c->c_encoding);
3521         }
3522 #endif
3523         need_encoding = (c->c_encoding != NULL &&
3524                          strcmp(c->c_encoding, "utf-8") != 0 &&
3525                          strcmp(c->c_encoding, "iso-8859-1") != 0);
3526         if (rawmode || strchr(s, '\\') == NULL) {
3527                 if (need_encoding) {
3528 #ifndef Py_USING_UNICODE
3529                         /* This should not happen - we never see any other
3530                            encoding. */
3531                         Py_FatalError(
3532                             "cannot deal with encodings in this build.");
3533 #else
3534                         PyObject *v, *u = PyUnicode_DecodeUTF8(s, len, NULL);
3535                         if (u == NULL)
3536                                 return NULL;
3537                         v = PyUnicode_AsEncodedString(u, c->c_encoding, NULL);
3538                         Py_DECREF(u);
3539                         return v;
3540 #endif
3541                 } else {
3542                         return PyString_FromStringAndSize(s, len);
3543                 }
3544         }
3545 
3546         return PyString_DecodeEscape(s, len, NULL, unicode,
3547                                      need_encoding ? c->c_encoding : NULL);
3548 }
3549 
3550 /* Build a Python string object out of a STRING atom.  This takes care of
3551  * compile-time literal catenation, calling parsestr() on each piece, and
3552  * pasting the intermediate results together.
3553  */
3554 static PyObject *
parsestrplus(struct compiling * c,const node * n)3555 parsestrplus(struct compiling *c, const node *n)
3556 {
3557         PyObject *v;
3558         int i;
3559         REQ(CHILD(n, 0), STRING);
3560         if ((v = parsestr(c, n, STR(CHILD(n, 0)))) != NULL) {
3561                 /* String literal concatenation */
3562                 for (i = 1; i < NCH(n); i++) {
3563                         PyObject *s;
3564                         s = parsestr(c, n, STR(CHILD(n, i)));
3565                         if (s == NULL)
3566                                 goto onError;
3567                         if (PyString_Check(v) && PyString_Check(s)) {
3568                                 PyString_ConcatAndDel(&v, s);
3569                                 if (v == NULL)
3570                                     goto onError;
3571                         }
3572 #ifdef Py_USING_UNICODE
3573                         else {
3574                                 PyObject *temp = PyUnicode_Concat(v, s);
3575                                 Py_DECREF(s);
3576                                 Py_DECREF(v);
3577                                 v = temp;
3578                                 if (v == NULL)
3579                                     goto onError;
3580                         }
3581 #endif
3582                 }
3583         }
3584         return v;
3585 
3586  onError:
3587         Py_XDECREF(v);
3588         return NULL;
3589 }
3590