1 /* Peephole optimizations for bytecode compiler. */
2 
3 #include "Python.h"
4 
5 #include "Python-ast.h"
6 #include "node.h"
7 #include "ast.h"
8 #include "code.h"
9 #include "symtable.h"
10 #include "opcode.h"
11 #include "wordcode_helpers.h"
12 
13 #define UNCONDITIONAL_JUMP(op)  (op==JUMP_ABSOLUTE || op==JUMP_FORWARD)
14 #define CONDITIONAL_JUMP(op) (op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \
15     || op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP)
16 #define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP \
17     || op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \
18     || op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP)
19 #define JUMPS_ON_TRUE(op) (op==POP_JUMP_IF_TRUE || op==JUMP_IF_TRUE_OR_POP)
20 #define GETJUMPTGT(arr, i) (get_arg(arr, i) / sizeof(_Py_CODEUNIT) + \
21         (ABSOLUTE_JUMP(_Py_OPCODE(arr[i])) ? 0 : i+1))
22 #define ISBASICBLOCK(blocks, start, end) \
23     (blocks[start]==blocks[end])
24 
25 
26 #define CONST_STACK_CREATE() { \
27     const_stack_size = 256; \
28     const_stack = PyMem_New(PyObject *, const_stack_size); \
29     if (!const_stack) { \
30         PyErr_NoMemory(); \
31         goto exitError; \
32     } \
33     }
34 
35 #define CONST_STACK_DELETE() do { \
36     if (const_stack) \
37         PyMem_Free(const_stack); \
38     } while(0)
39 
40 #define CONST_STACK_LEN() ((unsigned)(const_stack_top + 1))
41 
42 #define CONST_STACK_PUSH_OP(i) do { \
43     PyObject *_x; \
44     assert(_Py_OPCODE(codestr[i]) == LOAD_CONST); \
45     assert(PyList_GET_SIZE(consts) > (Py_ssize_t)get_arg(codestr, i)); \
46     _x = PyList_GET_ITEM(consts, get_arg(codestr, i)); \
47     if (++const_stack_top >= const_stack_size) { \
48         const_stack_size *= 2; \
49         PyMem_Resize(const_stack, PyObject *, const_stack_size); \
50         if (!const_stack) { \
51             PyErr_NoMemory(); \
52             goto exitError; \
53         } \
54     } \
55     const_stack[const_stack_top] = _x; \
56     in_consts = 1; \
57     } while(0)
58 
59 #define CONST_STACK_RESET() do { \
60     const_stack_top = -1; \
61     } while(0)
62 
63 #define CONST_STACK_LASTN(i) \
64     &const_stack[CONST_STACK_LEN() - i]
65 
66 #define CONST_STACK_POP(i) do { \
67     assert(CONST_STACK_LEN() >= i); \
68     const_stack_top -= i; \
69     } while(0)
70 
71 /* Scans back N consecutive LOAD_CONST instructions, skipping NOPs,
72    returns index of the Nth last's LOAD_CONST's EXTENDED_ARG prefix.
73    Callers are responsible to check CONST_STACK_LEN beforehand.
74 */
75 static Py_ssize_t
lastn_const_start(const _Py_CODEUNIT * codestr,Py_ssize_t i,Py_ssize_t n)76 lastn_const_start(const _Py_CODEUNIT *codestr, Py_ssize_t i, Py_ssize_t n)
77 {
78     assert(n > 0);
79     for (;;) {
80         i--;
81         assert(i >= 0);
82         if (_Py_OPCODE(codestr[i]) == LOAD_CONST) {
83             if (!--n) {
84                 while (i > 0 && _Py_OPCODE(codestr[i-1]) == EXTENDED_ARG) {
85                     i--;
86                 }
87                 return i;
88             }
89         }
90         else {
91             assert(_Py_OPCODE(codestr[i]) == NOP ||
92                    _Py_OPCODE(codestr[i]) == EXTENDED_ARG);
93         }
94     }
95 }
96 
97 /* Scans through EXTENDED ARGs, seeking the index of the effective opcode */
98 static Py_ssize_t
find_op(const _Py_CODEUNIT * codestr,Py_ssize_t i)99 find_op(const _Py_CODEUNIT *codestr, Py_ssize_t i)
100 {
101     while (_Py_OPCODE(codestr[i]) == EXTENDED_ARG) {
102         i++;
103     }
104     return i;
105 }
106 
107 /* Given the index of the effective opcode,
108    scan back to construct the oparg with EXTENDED_ARG */
109 static unsigned int
get_arg(const _Py_CODEUNIT * codestr,Py_ssize_t i)110 get_arg(const _Py_CODEUNIT *codestr, Py_ssize_t i)
111 {
112     _Py_CODEUNIT word;
113     unsigned int oparg = _Py_OPARG(codestr[i]);
114     if (i >= 1 && _Py_OPCODE(word = codestr[i-1]) == EXTENDED_ARG) {
115         oparg |= _Py_OPARG(word) << 8;
116         if (i >= 2 && _Py_OPCODE(word = codestr[i-2]) == EXTENDED_ARG) {
117             oparg |= _Py_OPARG(word) << 16;
118             if (i >= 3 && _Py_OPCODE(word = codestr[i-3]) == EXTENDED_ARG) {
119                 oparg |= _Py_OPARG(word) << 24;
120             }
121         }
122     }
123     return oparg;
124 }
125 
126 /* Fill the region with NOPs. */
127 static void
fill_nops(_Py_CODEUNIT * codestr,Py_ssize_t start,Py_ssize_t end)128 fill_nops(_Py_CODEUNIT *codestr, Py_ssize_t start, Py_ssize_t end)
129 {
130     memset(codestr + start, NOP, (end - start) * sizeof(_Py_CODEUNIT));
131 }
132 
133 /* Given the index of the effective opcode,
134    attempt to replace the argument, taking into account EXTENDED_ARG.
135    Returns -1 on failure, or the new op index on success */
136 static Py_ssize_t
set_arg(_Py_CODEUNIT * codestr,Py_ssize_t i,unsigned int oparg)137 set_arg(_Py_CODEUNIT *codestr, Py_ssize_t i, unsigned int oparg)
138 {
139     unsigned int curarg = get_arg(codestr, i);
140     int curilen, newilen;
141     if (curarg == oparg)
142         return i;
143     curilen = instrsize(curarg);
144     newilen = instrsize(oparg);
145     if (curilen < newilen) {
146         return -1;
147     }
148 
149     write_op_arg(codestr + i + 1 - curilen, _Py_OPCODE(codestr[i]), oparg, newilen);
150     fill_nops(codestr, i + 1 - curilen + newilen, i + 1);
151     return i-curilen+newilen;
152 }
153 
154 /* Attempt to write op/arg at end of specified region of memory.
155    Preceding memory in the region is overwritten with NOPs.
156    Returns -1 on failure, op index on success */
157 static Py_ssize_t
copy_op_arg(_Py_CODEUNIT * codestr,Py_ssize_t i,unsigned char op,unsigned int oparg,Py_ssize_t maxi)158 copy_op_arg(_Py_CODEUNIT *codestr, Py_ssize_t i, unsigned char op,
159             unsigned int oparg, Py_ssize_t maxi)
160 {
161     int ilen = instrsize(oparg);
162     if (i + ilen > maxi) {
163         return -1;
164     }
165     write_op_arg(codestr + maxi - ilen, op, oparg, ilen);
166     fill_nops(codestr, i, maxi - ilen);
167     return maxi - 1;
168 }
169 
170 /* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
171    with    LOAD_CONST (c1, c2, ... cn).
172    The consts table must still be in list form so that the
173    new constant (c1, c2, ... cn) can be appended.
174    Called with codestr pointing to the first LOAD_CONST.
175    Bails out with no change if one or more of the LOAD_CONSTs is missing.
176    Also works for BUILD_LIST and BUILT_SET when followed by an "in" or "not in"
177    test; for BUILD_SET it assembles a frozenset rather than a tuple.
178 */
179 static Py_ssize_t
fold_tuple_on_constants(_Py_CODEUNIT * codestr,Py_ssize_t c_start,Py_ssize_t opcode_end,unsigned char opcode,PyObject * consts,PyObject ** objs,int n)180 fold_tuple_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t c_start,
181                         Py_ssize_t opcode_end, unsigned char opcode,
182                         PyObject *consts, PyObject **objs, int n)
183 {
184     PyObject *newconst, *constant;
185     Py_ssize_t i, len_consts;
186 
187     /* Pre-conditions */
188     assert(PyList_CheckExact(consts));
189 
190     /* Buildup new tuple of constants */
191     newconst = PyTuple_New(n);
192     if (newconst == NULL) {
193         return -1;
194     }
195     for (i=0 ; i<n ; i++) {
196         constant = objs[i];
197         Py_INCREF(constant);
198         PyTuple_SET_ITEM(newconst, i, constant);
199     }
200 
201     /* If it's a BUILD_SET, use the PyTuple we just built to create a
202        PyFrozenSet, and use that as the constant instead: */
203     if (opcode == BUILD_SET) {
204         Py_SETREF(newconst, PyFrozenSet_New(newconst));
205         if (newconst == NULL) {
206             return -1;
207         }
208     }
209 
210     /* Append folded constant onto consts */
211     len_consts = PyList_GET_SIZE(consts);
212     if (PyList_Append(consts, newconst)) {
213         Py_DECREF(newconst);
214         return -1;
215     }
216     Py_DECREF(newconst);
217 
218     return copy_op_arg(codestr, c_start, LOAD_CONST, len_consts, opcode_end);
219 }
220 
221 /* Replace LOAD_CONST c1, LOAD_CONST c2, BINOP
222    with    LOAD_CONST binop(c1,c2)
223    The consts table must still be in list form so that the
224    new constant can be appended.
225    Called with codestr pointing to the BINOP.
226    Abandons the transformation if the folding fails (i.e.  1+'a').
227    If the new constant is a sequence, only folds when the size
228    is below a threshold value.  That keeps pyc files from
229    becoming large in the presence of code like:  (None,)*1000.
230 */
231 static Py_ssize_t
fold_binops_on_constants(_Py_CODEUNIT * codestr,Py_ssize_t c_start,Py_ssize_t opcode_end,unsigned char opcode,PyObject * consts,PyObject ** objs)232 fold_binops_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t c_start,
233                          Py_ssize_t opcode_end, unsigned char opcode,
234                          PyObject *consts, PyObject **objs)
235 {
236     PyObject *newconst, *v, *w;
237     Py_ssize_t len_consts, size;
238 
239     /* Pre-conditions */
240     assert(PyList_CheckExact(consts));
241     len_consts = PyList_GET_SIZE(consts);
242 
243     /* Create new constant */
244     v = objs[0];
245     w = objs[1];
246     switch (opcode) {
247         case BINARY_POWER:
248             newconst = PyNumber_Power(v, w, Py_None);
249             break;
250         case BINARY_MULTIPLY:
251             newconst = PyNumber_Multiply(v, w);
252             break;
253         case BINARY_TRUE_DIVIDE:
254             newconst = PyNumber_TrueDivide(v, w);
255             break;
256         case BINARY_FLOOR_DIVIDE:
257             newconst = PyNumber_FloorDivide(v, w);
258             break;
259         case BINARY_MODULO:
260             newconst = PyNumber_Remainder(v, w);
261             break;
262         case BINARY_ADD:
263             newconst = PyNumber_Add(v, w);
264             break;
265         case BINARY_SUBTRACT:
266             newconst = PyNumber_Subtract(v, w);
267             break;
268         case BINARY_SUBSCR:
269             newconst = PyObject_GetItem(v, w);
270             break;
271         case BINARY_LSHIFT:
272             newconst = PyNumber_Lshift(v, w);
273             break;
274         case BINARY_RSHIFT:
275             newconst = PyNumber_Rshift(v, w);
276             break;
277         case BINARY_AND:
278             newconst = PyNumber_And(v, w);
279             break;
280         case BINARY_XOR:
281             newconst = PyNumber_Xor(v, w);
282             break;
283         case BINARY_OR:
284             newconst = PyNumber_Or(v, w);
285             break;
286         default:
287             /* Called with an unknown opcode */
288             PyErr_Format(PyExc_SystemError,
289                  "unexpected binary operation %d on a constant",
290                      opcode);
291             return -1;
292     }
293     if (newconst == NULL) {
294         if(!PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
295             PyErr_Clear();
296         }
297         return -1;
298     }
299     size = PyObject_Size(newconst);
300     if (size == -1) {
301         if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
302             return -1;
303         }
304         PyErr_Clear();
305     } else if (size > 20) {
306         Py_DECREF(newconst);
307         return -1;
308     }
309 
310     /* Append folded constant into consts table */
311     if (PyList_Append(consts, newconst)) {
312         Py_DECREF(newconst);
313         return -1;
314     }
315     Py_DECREF(newconst);
316 
317     return copy_op_arg(codestr, c_start, LOAD_CONST, len_consts, opcode_end);
318 }
319 
320 static Py_ssize_t
fold_unaryops_on_constants(_Py_CODEUNIT * codestr,Py_ssize_t c_start,Py_ssize_t opcode_end,unsigned char opcode,PyObject * consts,PyObject * v)321 fold_unaryops_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t c_start,
322                            Py_ssize_t opcode_end, unsigned char opcode,
323                            PyObject *consts, PyObject *v)
324 {
325     PyObject *newconst;
326     Py_ssize_t len_consts;
327 
328     /* Pre-conditions */
329     assert(PyList_CheckExact(consts));
330     len_consts = PyList_GET_SIZE(consts);
331 
332     /* Create new constant */
333     switch (opcode) {
334         case UNARY_NEGATIVE:
335             newconst = PyNumber_Negative(v);
336             break;
337         case UNARY_INVERT:
338             newconst = PyNumber_Invert(v);
339             break;
340         case UNARY_POSITIVE:
341             newconst = PyNumber_Positive(v);
342             break;
343         default:
344             /* Called with an unknown opcode */
345             PyErr_Format(PyExc_SystemError,
346                  "unexpected unary operation %d on a constant",
347                      opcode);
348             return -1;
349     }
350     if (newconst == NULL) {
351         if(!PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
352             PyErr_Clear();
353         }
354         return -1;
355     }
356 
357     /* Append folded constant into consts table */
358     if (PyList_Append(consts, newconst)) {
359         Py_DECREF(newconst);
360         PyErr_Clear();
361         return -1;
362     }
363     Py_DECREF(newconst);
364 
365     return copy_op_arg(codestr, c_start, LOAD_CONST, len_consts, opcode_end);
366 }
367 
368 static unsigned int *
markblocks(_Py_CODEUNIT * code,Py_ssize_t len)369 markblocks(_Py_CODEUNIT *code, Py_ssize_t len)
370 {
371     unsigned int *blocks = PyMem_New(unsigned int, len);
372     int i, j, opcode, blockcnt = 0;
373 
374     if (blocks == NULL) {
375         PyErr_NoMemory();
376         return NULL;
377     }
378     memset(blocks, 0, len*sizeof(int));
379 
380     /* Mark labels in the first pass */
381     for (i = 0; i < len; i++) {
382         opcode = _Py_OPCODE(code[i]);
383         switch (opcode) {
384             case FOR_ITER:
385             case JUMP_FORWARD:
386             case JUMP_IF_FALSE_OR_POP:
387             case JUMP_IF_TRUE_OR_POP:
388             case POP_JUMP_IF_FALSE:
389             case POP_JUMP_IF_TRUE:
390             case JUMP_ABSOLUTE:
391             case CONTINUE_LOOP:
392             case SETUP_LOOP:
393             case SETUP_EXCEPT:
394             case SETUP_FINALLY:
395             case SETUP_WITH:
396             case SETUP_ASYNC_WITH:
397                 j = GETJUMPTGT(code, i);
398                 assert(j < len);
399                 blocks[j] = 1;
400                 break;
401         }
402     }
403     /* Build block numbers in the second pass */
404     for (i = 0; i < len; i++) {
405         blockcnt += blocks[i];          /* increment blockcnt over labels */
406         blocks[i] = blockcnt;
407     }
408     return blocks;
409 }
410 
411 /* Perform basic peephole optimizations to components of a code object.
412    The consts object should still be in list form to allow new constants
413    to be appended.
414 
415    To keep the optimizer simple, it bails when the lineno table has complex
416    encoding for gaps >= 255.
417 
418    Optimizations are restricted to simple transformations occurring within a
419    single basic block.  All transformations keep the code size the same or
420    smaller.  For those that reduce size, the gaps are initially filled with
421    NOPs.  Later those NOPs are removed and the jump addresses retargeted in
422    a single pass. */
423 
424 PyObject *
PyCode_Optimize(PyObject * code,PyObject * consts,PyObject * names,PyObject * lnotab_obj)425 PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
426                 PyObject *lnotab_obj)
427 {
428     Py_ssize_t h, i, nexti, op_start, codelen, tgt;
429     unsigned int j, nops;
430     unsigned char opcode, nextop;
431     _Py_CODEUNIT *codestr = NULL;
432     unsigned char *lnotab;
433     unsigned int cum_orig_offset, last_offset;
434     Py_ssize_t tabsiz;
435     PyObject **const_stack = NULL;
436     Py_ssize_t const_stack_top = -1;
437     Py_ssize_t const_stack_size = 0;
438     int in_consts = 0;  /* whether we are in a LOAD_CONST sequence */
439     unsigned int *blocks = NULL;
440 
441     /* Bail out if an exception is set */
442     if (PyErr_Occurred())
443         goto exitError;
444 
445     /* Bypass optimization when the lnotab table is too complex */
446     assert(PyBytes_Check(lnotab_obj));
447     lnotab = (unsigned char*)PyBytes_AS_STRING(lnotab_obj);
448     tabsiz = PyBytes_GET_SIZE(lnotab_obj);
449     assert(tabsiz == 0 || Py_REFCNT(lnotab_obj) == 1);
450     if (memchr(lnotab, 255, tabsiz) != NULL) {
451         /* 255 value are used for multibyte bytecode instructions */
452         goto exitUnchanged;
453     }
454     /* Note: -128 and 127 special values for line number delta are ok,
455        the peephole optimizer doesn't modify line numbers. */
456 
457     assert(PyBytes_Check(code));
458     codelen = PyBytes_GET_SIZE(code);
459     assert(codelen % sizeof(_Py_CODEUNIT) == 0);
460 
461     /* Make a modifiable copy of the code string */
462     codestr = (_Py_CODEUNIT *)PyMem_Malloc(codelen);
463     if (codestr == NULL) {
464         PyErr_NoMemory();
465         goto exitError;
466     }
467     memcpy(codestr, PyBytes_AS_STRING(code), codelen);
468     codelen /= sizeof(_Py_CODEUNIT);
469 
470     blocks = markblocks(codestr, codelen);
471     if (blocks == NULL)
472         goto exitError;
473     assert(PyList_Check(consts));
474 
475     CONST_STACK_CREATE();
476 
477     for (i=find_op(codestr, 0) ; i<codelen ; i=nexti) {
478         opcode = _Py_OPCODE(codestr[i]);
479         op_start = i;
480         while (op_start >= 1 && _Py_OPCODE(codestr[op_start-1]) == EXTENDED_ARG) {
481             op_start--;
482         }
483 
484         nexti = i + 1;
485         while (nexti < codelen && _Py_OPCODE(codestr[nexti]) == EXTENDED_ARG)
486             nexti++;
487         nextop = nexti < codelen ? _Py_OPCODE(codestr[nexti]) : 0;
488 
489         if (!in_consts) {
490             CONST_STACK_RESET();
491         }
492         in_consts = 0;
493 
494         switch (opcode) {
495             /* Replace UNARY_NOT POP_JUMP_IF_FALSE
496                with    POP_JUMP_IF_TRUE */
497             case UNARY_NOT:
498                 if (nextop != POP_JUMP_IF_FALSE
499                     || !ISBASICBLOCK(blocks, op_start, i + 1))
500                     break;
501                 fill_nops(codestr, op_start, i + 1);
502                 codestr[nexti] = PACKOPARG(POP_JUMP_IF_TRUE, _Py_OPARG(codestr[nexti]));
503                 break;
504 
505                 /* not a is b -->  a is not b
506                    not a in b -->  a not in b
507                    not a is not b -->  a is b
508                    not a not in b -->  a in b
509                 */
510             case COMPARE_OP:
511                 j = get_arg(codestr, i);
512                 if (j < 6 || j > 9 ||
513                     nextop != UNARY_NOT ||
514                     !ISBASICBLOCK(blocks, op_start, i + 1))
515                     break;
516                 codestr[i] = PACKOPARG(opcode, j^1);
517                 fill_nops(codestr, i + 1, nexti + 1);
518                 break;
519 
520                 /* Skip over LOAD_CONST trueconst
521                    POP_JUMP_IF_FALSE xx.  This improves
522                    "while 1" performance.  */
523             case LOAD_CONST:
524                 CONST_STACK_PUSH_OP(i);
525                 if (nextop != POP_JUMP_IF_FALSE  ||
526                     !ISBASICBLOCK(blocks, op_start, i + 1)  ||
527                     !PyObject_IsTrue(PyList_GET_ITEM(consts, get_arg(codestr, i))))
528                     break;
529                 fill_nops(codestr, op_start, nexti + 1);
530                 CONST_STACK_POP(1);
531                 break;
532 
533                 /* Try to fold tuples of constants (includes a case for lists
534                    and sets which are only used for "in" and "not in" tests).
535                    Skip over BUILD_SEQN 1 UNPACK_SEQN 1.
536                    Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2.
537                    Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2. */
538             case BUILD_TUPLE:
539             case BUILD_LIST:
540             case BUILD_SET:
541                 j = get_arg(codestr, i);
542                 if (j > 0 && CONST_STACK_LEN() >= j) {
543                     h = lastn_const_start(codestr, op_start, j);
544                     if ((opcode == BUILD_TUPLE &&
545                           ISBASICBLOCK(blocks, h, op_start)) ||
546                          ((opcode == BUILD_LIST || opcode == BUILD_SET) &&
547                           ((nextop==COMPARE_OP &&
548                           (_Py_OPARG(codestr[nexti]) == PyCmp_IN ||
549                            _Py_OPARG(codestr[nexti]) == PyCmp_NOT_IN)) ||
550                           nextop == GET_ITER) && ISBASICBLOCK(blocks, h, i + 1))) {
551                         h = fold_tuple_on_constants(codestr, h, i + 1, opcode,
552                                                     consts, CONST_STACK_LASTN(j), j);
553                         if (h >= 0) {
554                             CONST_STACK_POP(j);
555                             CONST_STACK_PUSH_OP(h);
556                         }
557                         break;
558                     }
559                 }
560                 if (nextop != UNPACK_SEQUENCE  ||
561                     !ISBASICBLOCK(blocks, op_start, i + 1) ||
562                     j != get_arg(codestr, nexti) ||
563                     opcode == BUILD_SET)
564                     break;
565                 if (j < 2) {
566                     fill_nops(codestr, op_start, nexti + 1);
567                 } else if (j == 2) {
568                     codestr[op_start] = PACKOPARG(ROT_TWO, 0);
569                     fill_nops(codestr, op_start + 1, nexti + 1);
570                     CONST_STACK_RESET();
571                 } else if (j == 3) {
572                     codestr[op_start] = PACKOPARG(ROT_THREE, 0);
573                     codestr[op_start + 1] = PACKOPARG(ROT_TWO, 0);
574                     fill_nops(codestr, op_start + 2, nexti + 1);
575                     CONST_STACK_RESET();
576                 }
577                 break;
578 
579                 /* Fold binary ops on constants.
580                    LOAD_CONST c1 LOAD_CONST c2 BINOP --> LOAD_CONST binop(c1,c2) */
581             case BINARY_POWER:
582             case BINARY_MULTIPLY:
583             case BINARY_TRUE_DIVIDE:
584             case BINARY_FLOOR_DIVIDE:
585             case BINARY_MODULO:
586             case BINARY_ADD:
587             case BINARY_SUBTRACT:
588             case BINARY_SUBSCR:
589             case BINARY_LSHIFT:
590             case BINARY_RSHIFT:
591             case BINARY_AND:
592             case BINARY_XOR:
593             case BINARY_OR:
594                 if (CONST_STACK_LEN() < 2)
595                     break;
596                 h = lastn_const_start(codestr, op_start, 2);
597                 if (ISBASICBLOCK(blocks, h, op_start)) {
598                     h = fold_binops_on_constants(codestr, h, i + 1, opcode,
599                                                  consts, CONST_STACK_LASTN(2));
600                     if (h >= 0) {
601                         CONST_STACK_POP(2);
602                         CONST_STACK_PUSH_OP(h);
603                     }
604                 }
605                 break;
606 
607                 /* Fold unary ops on constants.
608                    LOAD_CONST c1  UNARY_OP --> LOAD_CONST unary_op(c) */
609             case UNARY_NEGATIVE:
610             case UNARY_INVERT:
611             case UNARY_POSITIVE:
612                 if (CONST_STACK_LEN() < 1)
613                     break;
614                 h = lastn_const_start(codestr, op_start, 1);
615                 if (ISBASICBLOCK(blocks, h, op_start)) {
616                     h = fold_unaryops_on_constants(codestr, h, i + 1, opcode,
617                                                    consts, *CONST_STACK_LASTN(1));
618                     if (h >= 0) {
619                         CONST_STACK_POP(1);
620                         CONST_STACK_PUSH_OP(h);
621                     }
622                 }
623                 break;
624 
625                 /* Simplify conditional jump to conditional jump where the
626                    result of the first test implies the success of a similar
627                    test or the failure of the opposite test.
628                    Arises in code like:
629                    "if a and b:"
630                    "if a or b:"
631                    "a and b or c"
632                    "(a and b) and c"
633                    x:JUMP_IF_FALSE_OR_POP y   y:JUMP_IF_FALSE_OR_POP z
634                       -->  x:JUMP_IF_FALSE_OR_POP z
635                    x:JUMP_IF_FALSE_OR_POP y   y:JUMP_IF_TRUE_OR_POP z
636                       -->  x:POP_JUMP_IF_FALSE y+1
637                    where y+1 is the instruction following the second test.
638                 */
639             case JUMP_IF_FALSE_OR_POP:
640             case JUMP_IF_TRUE_OR_POP:
641                 h = get_arg(codestr, i) / sizeof(_Py_CODEUNIT);
642                 tgt = find_op(codestr, h);
643 
644                 j = _Py_OPCODE(codestr[tgt]);
645                 if (CONDITIONAL_JUMP(j)) {
646                     /* NOTE: all possible jumps here are absolute. */
647                     if (JUMPS_ON_TRUE(j) == JUMPS_ON_TRUE(opcode)) {
648                         /* The second jump will be taken iff the first is.
649                            The current opcode inherits its target's
650                            stack effect */
651                         h = set_arg(codestr, i, get_arg(codestr, tgt));
652                     } else {
653                         /* The second jump is not taken if the first is (so
654                            jump past it), and all conditional jumps pop their
655                            argument when they're not taken (so change the
656                            first jump to pop its argument when it's taken). */
657                         h = set_arg(codestr, i, (tgt + 1) * sizeof(_Py_CODEUNIT));
658                         j = opcode == JUMP_IF_TRUE_OR_POP ?
659                             POP_JUMP_IF_TRUE : POP_JUMP_IF_FALSE;
660                     }
661 
662                     if (h >= 0) {
663                         nexti = h;
664                         codestr[nexti] = PACKOPARG(j, _Py_OPARG(codestr[nexti]));
665                         break;
666                     }
667                 }
668                 /* Intentional fallthrough */
669 
670                 /* Replace jumps to unconditional jumps */
671             case POP_JUMP_IF_FALSE:
672             case POP_JUMP_IF_TRUE:
673             case FOR_ITER:
674             case JUMP_FORWARD:
675             case JUMP_ABSOLUTE:
676             case CONTINUE_LOOP:
677             case SETUP_LOOP:
678             case SETUP_EXCEPT:
679             case SETUP_FINALLY:
680             case SETUP_WITH:
681             case SETUP_ASYNC_WITH:
682                 h = GETJUMPTGT(codestr, i);
683                 tgt = find_op(codestr, h);
684                 /* Replace JUMP_* to a RETURN into just a RETURN */
685                 if (UNCONDITIONAL_JUMP(opcode) &&
686                     _Py_OPCODE(codestr[tgt]) == RETURN_VALUE) {
687                     codestr[op_start] = PACKOPARG(RETURN_VALUE, 0);
688                     fill_nops(codestr, op_start + 1, i + 1);
689                 } else if (UNCONDITIONAL_JUMP(_Py_OPCODE(codestr[tgt]))) {
690                     j = GETJUMPTGT(codestr, tgt);
691                     if (opcode == JUMP_FORWARD) { /* JMP_ABS can go backwards */
692                         opcode = JUMP_ABSOLUTE;
693                     } else if (!ABSOLUTE_JUMP(opcode)) {
694                         if ((Py_ssize_t)j < i + 1) {
695                             break;           /* No backward relative jumps */
696                         }
697                         j -= i + 1;          /* Calc relative jump addr */
698                     }
699                     j *= sizeof(_Py_CODEUNIT);
700                     copy_op_arg(codestr, op_start, opcode, j, i + 1);
701                 }
702                 break;
703 
704                 /* Remove unreachable ops after RETURN */
705             case RETURN_VALUE:
706                 h = i + 1;
707                 while (h < codelen && ISBASICBLOCK(blocks, i, h)) {
708                     h++;
709                 }
710                 if (h > i + 1) {
711                     fill_nops(codestr, i + 1, h);
712                     nexti = find_op(codestr, h);
713                 }
714                 break;
715         }
716     }
717 
718     /* Fixup lnotab */
719     for (i = 0, nops = 0; i < codelen; i++) {
720         assert(i - nops <= INT_MAX);
721         /* original code offset => new code offset */
722         blocks[i] = i - nops;
723         if (_Py_OPCODE(codestr[i]) == NOP)
724             nops++;
725     }
726     cum_orig_offset = 0;
727     last_offset = 0;
728     for (i=0 ; i < tabsiz ; i+=2) {
729         unsigned int offset_delta, new_offset;
730         cum_orig_offset += lnotab[i];
731         assert(cum_orig_offset % sizeof(_Py_CODEUNIT) == 0);
732         new_offset = blocks[cum_orig_offset / sizeof(_Py_CODEUNIT)] *
733                 sizeof(_Py_CODEUNIT);
734         offset_delta = new_offset - last_offset;
735         assert(offset_delta <= 255);
736         lnotab[i] = (unsigned char)offset_delta;
737         last_offset = new_offset;
738     }
739 
740     /* Remove NOPs and fixup jump targets */
741     for (op_start = i = h = 0; i < codelen; i++, op_start = i) {
742         j = _Py_OPARG(codestr[i]);
743         while (_Py_OPCODE(codestr[i]) == EXTENDED_ARG) {
744             i++;
745             j = j<<8 | _Py_OPARG(codestr[i]);
746         }
747         opcode = _Py_OPCODE(codestr[i]);
748         switch (opcode) {
749             case NOP:continue;
750 
751             case JUMP_ABSOLUTE:
752             case CONTINUE_LOOP:
753             case POP_JUMP_IF_FALSE:
754             case POP_JUMP_IF_TRUE:
755             case JUMP_IF_FALSE_OR_POP:
756             case JUMP_IF_TRUE_OR_POP:
757                 j = blocks[j / sizeof(_Py_CODEUNIT)] * sizeof(_Py_CODEUNIT);
758                 break;
759 
760             case FOR_ITER:
761             case JUMP_FORWARD:
762             case SETUP_LOOP:
763             case SETUP_EXCEPT:
764             case SETUP_FINALLY:
765             case SETUP_WITH:
766             case SETUP_ASYNC_WITH:
767                 j = blocks[j / sizeof(_Py_CODEUNIT) + i + 1] - blocks[i] - 1;
768                 j *= sizeof(_Py_CODEUNIT);
769                 break;
770         }
771         nexti = i - op_start + 1;
772         if (instrsize(j) > nexti)
773             goto exitUnchanged;
774         /* If instrsize(j) < nexti, we'll emit EXTENDED_ARG 0 */
775         write_op_arg(codestr + h, opcode, j, nexti);
776         h += nexti;
777     }
778     assert(h + (Py_ssize_t)nops == codelen);
779 
780     CONST_STACK_DELETE();
781     PyMem_Free(blocks);
782     code = PyBytes_FromStringAndSize((char *)codestr, h * sizeof(_Py_CODEUNIT));
783     PyMem_Free(codestr);
784     return code;
785 
786  exitError:
787     code = NULL;
788 
789  exitUnchanged:
790     Py_XINCREF(code);
791     CONST_STACK_DELETE();
792     PyMem_Free(blocks);
793     PyMem_Free(codestr);
794     return code;
795 }
796