1 /* Peephole optimizations for bytecode compiler. */
2
3 #include "Python.h"
4
5 #include "Python-ast.h"
6 #include "node.h"
7 #include "ast.h"
8 #include "code.h"
9 #include "symtable.h"
10 #include "opcode.h"
11 #include "wordcode_helpers.h"
12
13 #define UNCONDITIONAL_JUMP(op) (op==JUMP_ABSOLUTE || op==JUMP_FORWARD)
14 #define CONDITIONAL_JUMP(op) (op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \
15 || op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP)
16 #define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP \
17 || op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \
18 || op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP)
19 #define JUMPS_ON_TRUE(op) (op==POP_JUMP_IF_TRUE || op==JUMP_IF_TRUE_OR_POP)
20 #define GETJUMPTGT(arr, i) (get_arg(arr, i) / sizeof(_Py_CODEUNIT) + \
21 (ABSOLUTE_JUMP(_Py_OPCODE(arr[i])) ? 0 : i+1))
22 #define ISBASICBLOCK(blocks, start, end) \
23 (blocks[start]==blocks[end])
24
25
26 #define CONST_STACK_CREATE() { \
27 const_stack_size = 256; \
28 const_stack = PyMem_New(PyObject *, const_stack_size); \
29 if (!const_stack) { \
30 PyErr_NoMemory(); \
31 goto exitError; \
32 } \
33 }
34
35 #define CONST_STACK_DELETE() do { \
36 if (const_stack) \
37 PyMem_Free(const_stack); \
38 } while(0)
39
40 #define CONST_STACK_LEN() ((unsigned)(const_stack_top + 1))
41
42 #define CONST_STACK_PUSH_OP(i) do { \
43 PyObject *_x; \
44 assert(_Py_OPCODE(codestr[i]) == LOAD_CONST); \
45 assert(PyList_GET_SIZE(consts) > (Py_ssize_t)get_arg(codestr, i)); \
46 _x = PyList_GET_ITEM(consts, get_arg(codestr, i)); \
47 if (++const_stack_top >= const_stack_size) { \
48 const_stack_size *= 2; \
49 PyMem_Resize(const_stack, PyObject *, const_stack_size); \
50 if (!const_stack) { \
51 PyErr_NoMemory(); \
52 goto exitError; \
53 } \
54 } \
55 const_stack[const_stack_top] = _x; \
56 in_consts = 1; \
57 } while(0)
58
59 #define CONST_STACK_RESET() do { \
60 const_stack_top = -1; \
61 } while(0)
62
63 #define CONST_STACK_LASTN(i) \
64 &const_stack[CONST_STACK_LEN() - i]
65
66 #define CONST_STACK_POP(i) do { \
67 assert(CONST_STACK_LEN() >= i); \
68 const_stack_top -= i; \
69 } while(0)
70
71 /* Scans back N consecutive LOAD_CONST instructions, skipping NOPs,
72 returns index of the Nth last's LOAD_CONST's EXTENDED_ARG prefix.
73 Callers are responsible to check CONST_STACK_LEN beforehand.
74 */
75 static Py_ssize_t
lastn_const_start(const _Py_CODEUNIT * codestr,Py_ssize_t i,Py_ssize_t n)76 lastn_const_start(const _Py_CODEUNIT *codestr, Py_ssize_t i, Py_ssize_t n)
77 {
78 assert(n > 0);
79 for (;;) {
80 i--;
81 assert(i >= 0);
82 if (_Py_OPCODE(codestr[i]) == LOAD_CONST) {
83 if (!--n) {
84 while (i > 0 && _Py_OPCODE(codestr[i-1]) == EXTENDED_ARG) {
85 i--;
86 }
87 return i;
88 }
89 }
90 else {
91 assert(_Py_OPCODE(codestr[i]) == NOP ||
92 _Py_OPCODE(codestr[i]) == EXTENDED_ARG);
93 }
94 }
95 }
96
97 /* Scans through EXTENDED ARGs, seeking the index of the effective opcode */
98 static Py_ssize_t
find_op(const _Py_CODEUNIT * codestr,Py_ssize_t i)99 find_op(const _Py_CODEUNIT *codestr, Py_ssize_t i)
100 {
101 while (_Py_OPCODE(codestr[i]) == EXTENDED_ARG) {
102 i++;
103 }
104 return i;
105 }
106
107 /* Given the index of the effective opcode,
108 scan back to construct the oparg with EXTENDED_ARG */
109 static unsigned int
get_arg(const _Py_CODEUNIT * codestr,Py_ssize_t i)110 get_arg(const _Py_CODEUNIT *codestr, Py_ssize_t i)
111 {
112 _Py_CODEUNIT word;
113 unsigned int oparg = _Py_OPARG(codestr[i]);
114 if (i >= 1 && _Py_OPCODE(word = codestr[i-1]) == EXTENDED_ARG) {
115 oparg |= _Py_OPARG(word) << 8;
116 if (i >= 2 && _Py_OPCODE(word = codestr[i-2]) == EXTENDED_ARG) {
117 oparg |= _Py_OPARG(word) << 16;
118 if (i >= 3 && _Py_OPCODE(word = codestr[i-3]) == EXTENDED_ARG) {
119 oparg |= _Py_OPARG(word) << 24;
120 }
121 }
122 }
123 return oparg;
124 }
125
126 /* Fill the region with NOPs. */
127 static void
fill_nops(_Py_CODEUNIT * codestr,Py_ssize_t start,Py_ssize_t end)128 fill_nops(_Py_CODEUNIT *codestr, Py_ssize_t start, Py_ssize_t end)
129 {
130 memset(codestr + start, NOP, (end - start) * sizeof(_Py_CODEUNIT));
131 }
132
133 /* Given the index of the effective opcode,
134 attempt to replace the argument, taking into account EXTENDED_ARG.
135 Returns -1 on failure, or the new op index on success */
136 static Py_ssize_t
set_arg(_Py_CODEUNIT * codestr,Py_ssize_t i,unsigned int oparg)137 set_arg(_Py_CODEUNIT *codestr, Py_ssize_t i, unsigned int oparg)
138 {
139 unsigned int curarg = get_arg(codestr, i);
140 int curilen, newilen;
141 if (curarg == oparg)
142 return i;
143 curilen = instrsize(curarg);
144 newilen = instrsize(oparg);
145 if (curilen < newilen) {
146 return -1;
147 }
148
149 write_op_arg(codestr + i + 1 - curilen, _Py_OPCODE(codestr[i]), oparg, newilen);
150 fill_nops(codestr, i + 1 - curilen + newilen, i + 1);
151 return i-curilen+newilen;
152 }
153
154 /* Attempt to write op/arg at end of specified region of memory.
155 Preceding memory in the region is overwritten with NOPs.
156 Returns -1 on failure, op index on success */
157 static Py_ssize_t
copy_op_arg(_Py_CODEUNIT * codestr,Py_ssize_t i,unsigned char op,unsigned int oparg,Py_ssize_t maxi)158 copy_op_arg(_Py_CODEUNIT *codestr, Py_ssize_t i, unsigned char op,
159 unsigned int oparg, Py_ssize_t maxi)
160 {
161 int ilen = instrsize(oparg);
162 if (i + ilen > maxi) {
163 return -1;
164 }
165 write_op_arg(codestr + maxi - ilen, op, oparg, ilen);
166 fill_nops(codestr, i, maxi - ilen);
167 return maxi - 1;
168 }
169
170 /* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
171 with LOAD_CONST (c1, c2, ... cn).
172 The consts table must still be in list form so that the
173 new constant (c1, c2, ... cn) can be appended.
174 Called with codestr pointing to the first LOAD_CONST.
175 Bails out with no change if one or more of the LOAD_CONSTs is missing.
176 Also works for BUILD_LIST and BUILT_SET when followed by an "in" or "not in"
177 test; for BUILD_SET it assembles a frozenset rather than a tuple.
178 */
179 static Py_ssize_t
fold_tuple_on_constants(_Py_CODEUNIT * codestr,Py_ssize_t c_start,Py_ssize_t opcode_end,unsigned char opcode,PyObject * consts,PyObject ** objs,int n)180 fold_tuple_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t c_start,
181 Py_ssize_t opcode_end, unsigned char opcode,
182 PyObject *consts, PyObject **objs, int n)
183 {
184 PyObject *newconst, *constant;
185 Py_ssize_t i, len_consts;
186
187 /* Pre-conditions */
188 assert(PyList_CheckExact(consts));
189
190 /* Buildup new tuple of constants */
191 newconst = PyTuple_New(n);
192 if (newconst == NULL) {
193 return -1;
194 }
195 for (i=0 ; i<n ; i++) {
196 constant = objs[i];
197 Py_INCREF(constant);
198 PyTuple_SET_ITEM(newconst, i, constant);
199 }
200
201 /* If it's a BUILD_SET, use the PyTuple we just built to create a
202 PyFrozenSet, and use that as the constant instead: */
203 if (opcode == BUILD_SET) {
204 Py_SETREF(newconst, PyFrozenSet_New(newconst));
205 if (newconst == NULL) {
206 return -1;
207 }
208 }
209
210 /* Append folded constant onto consts */
211 len_consts = PyList_GET_SIZE(consts);
212 if (PyList_Append(consts, newconst)) {
213 Py_DECREF(newconst);
214 return -1;
215 }
216 Py_DECREF(newconst);
217
218 return copy_op_arg(codestr, c_start, LOAD_CONST, len_consts, opcode_end);
219 }
220
221 /* Replace LOAD_CONST c1, LOAD_CONST c2, BINOP
222 with LOAD_CONST binop(c1,c2)
223 The consts table must still be in list form so that the
224 new constant can be appended.
225 Called with codestr pointing to the BINOP.
226 Abandons the transformation if the folding fails (i.e. 1+'a').
227 If the new constant is a sequence, only folds when the size
228 is below a threshold value. That keeps pyc files from
229 becoming large in the presence of code like: (None,)*1000.
230 */
231 static Py_ssize_t
fold_binops_on_constants(_Py_CODEUNIT * codestr,Py_ssize_t c_start,Py_ssize_t opcode_end,unsigned char opcode,PyObject * consts,PyObject ** objs)232 fold_binops_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t c_start,
233 Py_ssize_t opcode_end, unsigned char opcode,
234 PyObject *consts, PyObject **objs)
235 {
236 PyObject *newconst, *v, *w;
237 Py_ssize_t len_consts, size;
238
239 /* Pre-conditions */
240 assert(PyList_CheckExact(consts));
241 len_consts = PyList_GET_SIZE(consts);
242
243 /* Create new constant */
244 v = objs[0];
245 w = objs[1];
246 switch (opcode) {
247 case BINARY_POWER:
248 newconst = PyNumber_Power(v, w, Py_None);
249 break;
250 case BINARY_MULTIPLY:
251 newconst = PyNumber_Multiply(v, w);
252 break;
253 case BINARY_TRUE_DIVIDE:
254 newconst = PyNumber_TrueDivide(v, w);
255 break;
256 case BINARY_FLOOR_DIVIDE:
257 newconst = PyNumber_FloorDivide(v, w);
258 break;
259 case BINARY_MODULO:
260 newconst = PyNumber_Remainder(v, w);
261 break;
262 case BINARY_ADD:
263 newconst = PyNumber_Add(v, w);
264 break;
265 case BINARY_SUBTRACT:
266 newconst = PyNumber_Subtract(v, w);
267 break;
268 case BINARY_SUBSCR:
269 newconst = PyObject_GetItem(v, w);
270 break;
271 case BINARY_LSHIFT:
272 newconst = PyNumber_Lshift(v, w);
273 break;
274 case BINARY_RSHIFT:
275 newconst = PyNumber_Rshift(v, w);
276 break;
277 case BINARY_AND:
278 newconst = PyNumber_And(v, w);
279 break;
280 case BINARY_XOR:
281 newconst = PyNumber_Xor(v, w);
282 break;
283 case BINARY_OR:
284 newconst = PyNumber_Or(v, w);
285 break;
286 default:
287 /* Called with an unknown opcode */
288 PyErr_Format(PyExc_SystemError,
289 "unexpected binary operation %d on a constant",
290 opcode);
291 return -1;
292 }
293 if (newconst == NULL) {
294 if(!PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
295 PyErr_Clear();
296 }
297 return -1;
298 }
299 size = PyObject_Size(newconst);
300 if (size == -1) {
301 if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
302 return -1;
303 }
304 PyErr_Clear();
305 } else if (size > 20) {
306 Py_DECREF(newconst);
307 return -1;
308 }
309
310 /* Append folded constant into consts table */
311 if (PyList_Append(consts, newconst)) {
312 Py_DECREF(newconst);
313 return -1;
314 }
315 Py_DECREF(newconst);
316
317 return copy_op_arg(codestr, c_start, LOAD_CONST, len_consts, opcode_end);
318 }
319
320 static Py_ssize_t
fold_unaryops_on_constants(_Py_CODEUNIT * codestr,Py_ssize_t c_start,Py_ssize_t opcode_end,unsigned char opcode,PyObject * consts,PyObject * v)321 fold_unaryops_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t c_start,
322 Py_ssize_t opcode_end, unsigned char opcode,
323 PyObject *consts, PyObject *v)
324 {
325 PyObject *newconst;
326 Py_ssize_t len_consts;
327
328 /* Pre-conditions */
329 assert(PyList_CheckExact(consts));
330 len_consts = PyList_GET_SIZE(consts);
331
332 /* Create new constant */
333 switch (opcode) {
334 case UNARY_NEGATIVE:
335 newconst = PyNumber_Negative(v);
336 break;
337 case UNARY_INVERT:
338 newconst = PyNumber_Invert(v);
339 break;
340 case UNARY_POSITIVE:
341 newconst = PyNumber_Positive(v);
342 break;
343 default:
344 /* Called with an unknown opcode */
345 PyErr_Format(PyExc_SystemError,
346 "unexpected unary operation %d on a constant",
347 opcode);
348 return -1;
349 }
350 if (newconst == NULL) {
351 if(!PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
352 PyErr_Clear();
353 }
354 return -1;
355 }
356
357 /* Append folded constant into consts table */
358 if (PyList_Append(consts, newconst)) {
359 Py_DECREF(newconst);
360 PyErr_Clear();
361 return -1;
362 }
363 Py_DECREF(newconst);
364
365 return copy_op_arg(codestr, c_start, LOAD_CONST, len_consts, opcode_end);
366 }
367
368 static unsigned int *
markblocks(_Py_CODEUNIT * code,Py_ssize_t len)369 markblocks(_Py_CODEUNIT *code, Py_ssize_t len)
370 {
371 unsigned int *blocks = PyMem_New(unsigned int, len);
372 int i, j, opcode, blockcnt = 0;
373
374 if (blocks == NULL) {
375 PyErr_NoMemory();
376 return NULL;
377 }
378 memset(blocks, 0, len*sizeof(int));
379
380 /* Mark labels in the first pass */
381 for (i = 0; i < len; i++) {
382 opcode = _Py_OPCODE(code[i]);
383 switch (opcode) {
384 case FOR_ITER:
385 case JUMP_FORWARD:
386 case JUMP_IF_FALSE_OR_POP:
387 case JUMP_IF_TRUE_OR_POP:
388 case POP_JUMP_IF_FALSE:
389 case POP_JUMP_IF_TRUE:
390 case JUMP_ABSOLUTE:
391 case CONTINUE_LOOP:
392 case SETUP_LOOP:
393 case SETUP_EXCEPT:
394 case SETUP_FINALLY:
395 case SETUP_WITH:
396 case SETUP_ASYNC_WITH:
397 j = GETJUMPTGT(code, i);
398 assert(j < len);
399 blocks[j] = 1;
400 break;
401 }
402 }
403 /* Build block numbers in the second pass */
404 for (i = 0; i < len; i++) {
405 blockcnt += blocks[i]; /* increment blockcnt over labels */
406 blocks[i] = blockcnt;
407 }
408 return blocks;
409 }
410
411 /* Perform basic peephole optimizations to components of a code object.
412 The consts object should still be in list form to allow new constants
413 to be appended.
414
415 To keep the optimizer simple, it bails when the lineno table has complex
416 encoding for gaps >= 255.
417
418 Optimizations are restricted to simple transformations occurring within a
419 single basic block. All transformations keep the code size the same or
420 smaller. For those that reduce size, the gaps are initially filled with
421 NOPs. Later those NOPs are removed and the jump addresses retargeted in
422 a single pass. */
423
424 PyObject *
PyCode_Optimize(PyObject * code,PyObject * consts,PyObject * names,PyObject * lnotab_obj)425 PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
426 PyObject *lnotab_obj)
427 {
428 Py_ssize_t h, i, nexti, op_start, codelen, tgt;
429 unsigned int j, nops;
430 unsigned char opcode, nextop;
431 _Py_CODEUNIT *codestr = NULL;
432 unsigned char *lnotab;
433 unsigned int cum_orig_offset, last_offset;
434 Py_ssize_t tabsiz;
435 PyObject **const_stack = NULL;
436 Py_ssize_t const_stack_top = -1;
437 Py_ssize_t const_stack_size = 0;
438 int in_consts = 0; /* whether we are in a LOAD_CONST sequence */
439 unsigned int *blocks = NULL;
440
441 /* Bail out if an exception is set */
442 if (PyErr_Occurred())
443 goto exitError;
444
445 /* Bypass optimization when the lnotab table is too complex */
446 assert(PyBytes_Check(lnotab_obj));
447 lnotab = (unsigned char*)PyBytes_AS_STRING(lnotab_obj);
448 tabsiz = PyBytes_GET_SIZE(lnotab_obj);
449 assert(tabsiz == 0 || Py_REFCNT(lnotab_obj) == 1);
450 if (memchr(lnotab, 255, tabsiz) != NULL) {
451 /* 255 value are used for multibyte bytecode instructions */
452 goto exitUnchanged;
453 }
454 /* Note: -128 and 127 special values for line number delta are ok,
455 the peephole optimizer doesn't modify line numbers. */
456
457 assert(PyBytes_Check(code));
458 codelen = PyBytes_GET_SIZE(code);
459 assert(codelen % sizeof(_Py_CODEUNIT) == 0);
460
461 /* Make a modifiable copy of the code string */
462 codestr = (_Py_CODEUNIT *)PyMem_Malloc(codelen);
463 if (codestr == NULL) {
464 PyErr_NoMemory();
465 goto exitError;
466 }
467 memcpy(codestr, PyBytes_AS_STRING(code), codelen);
468 codelen /= sizeof(_Py_CODEUNIT);
469
470 blocks = markblocks(codestr, codelen);
471 if (blocks == NULL)
472 goto exitError;
473 assert(PyList_Check(consts));
474
475 CONST_STACK_CREATE();
476
477 for (i=find_op(codestr, 0) ; i<codelen ; i=nexti) {
478 opcode = _Py_OPCODE(codestr[i]);
479 op_start = i;
480 while (op_start >= 1 && _Py_OPCODE(codestr[op_start-1]) == EXTENDED_ARG) {
481 op_start--;
482 }
483
484 nexti = i + 1;
485 while (nexti < codelen && _Py_OPCODE(codestr[nexti]) == EXTENDED_ARG)
486 nexti++;
487 nextop = nexti < codelen ? _Py_OPCODE(codestr[nexti]) : 0;
488
489 if (!in_consts) {
490 CONST_STACK_RESET();
491 }
492 in_consts = 0;
493
494 switch (opcode) {
495 /* Replace UNARY_NOT POP_JUMP_IF_FALSE
496 with POP_JUMP_IF_TRUE */
497 case UNARY_NOT:
498 if (nextop != POP_JUMP_IF_FALSE
499 || !ISBASICBLOCK(blocks, op_start, i + 1))
500 break;
501 fill_nops(codestr, op_start, i + 1);
502 codestr[nexti] = PACKOPARG(POP_JUMP_IF_TRUE, _Py_OPARG(codestr[nexti]));
503 break;
504
505 /* not a is b --> a is not b
506 not a in b --> a not in b
507 not a is not b --> a is b
508 not a not in b --> a in b
509 */
510 case COMPARE_OP:
511 j = get_arg(codestr, i);
512 if (j < 6 || j > 9 ||
513 nextop != UNARY_NOT ||
514 !ISBASICBLOCK(blocks, op_start, i + 1))
515 break;
516 codestr[i] = PACKOPARG(opcode, j^1);
517 fill_nops(codestr, i + 1, nexti + 1);
518 break;
519
520 /* Skip over LOAD_CONST trueconst
521 POP_JUMP_IF_FALSE xx. This improves
522 "while 1" performance. */
523 case LOAD_CONST:
524 CONST_STACK_PUSH_OP(i);
525 if (nextop != POP_JUMP_IF_FALSE ||
526 !ISBASICBLOCK(blocks, op_start, i + 1) ||
527 !PyObject_IsTrue(PyList_GET_ITEM(consts, get_arg(codestr, i))))
528 break;
529 fill_nops(codestr, op_start, nexti + 1);
530 CONST_STACK_POP(1);
531 break;
532
533 /* Try to fold tuples of constants (includes a case for lists
534 and sets which are only used for "in" and "not in" tests).
535 Skip over BUILD_SEQN 1 UNPACK_SEQN 1.
536 Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2.
537 Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2. */
538 case BUILD_TUPLE:
539 case BUILD_LIST:
540 case BUILD_SET:
541 j = get_arg(codestr, i);
542 if (j > 0 && CONST_STACK_LEN() >= j) {
543 h = lastn_const_start(codestr, op_start, j);
544 if ((opcode == BUILD_TUPLE &&
545 ISBASICBLOCK(blocks, h, op_start)) ||
546 ((opcode == BUILD_LIST || opcode == BUILD_SET) &&
547 ((nextop==COMPARE_OP &&
548 (_Py_OPARG(codestr[nexti]) == PyCmp_IN ||
549 _Py_OPARG(codestr[nexti]) == PyCmp_NOT_IN)) ||
550 nextop == GET_ITER) && ISBASICBLOCK(blocks, h, i + 1))) {
551 h = fold_tuple_on_constants(codestr, h, i + 1, opcode,
552 consts, CONST_STACK_LASTN(j), j);
553 if (h >= 0) {
554 CONST_STACK_POP(j);
555 CONST_STACK_PUSH_OP(h);
556 }
557 break;
558 }
559 }
560 if (nextop != UNPACK_SEQUENCE ||
561 !ISBASICBLOCK(blocks, op_start, i + 1) ||
562 j != get_arg(codestr, nexti) ||
563 opcode == BUILD_SET)
564 break;
565 if (j < 2) {
566 fill_nops(codestr, op_start, nexti + 1);
567 } else if (j == 2) {
568 codestr[op_start] = PACKOPARG(ROT_TWO, 0);
569 fill_nops(codestr, op_start + 1, nexti + 1);
570 CONST_STACK_RESET();
571 } else if (j == 3) {
572 codestr[op_start] = PACKOPARG(ROT_THREE, 0);
573 codestr[op_start + 1] = PACKOPARG(ROT_TWO, 0);
574 fill_nops(codestr, op_start + 2, nexti + 1);
575 CONST_STACK_RESET();
576 }
577 break;
578
579 /* Fold binary ops on constants.
580 LOAD_CONST c1 LOAD_CONST c2 BINOP --> LOAD_CONST binop(c1,c2) */
581 case BINARY_POWER:
582 case BINARY_MULTIPLY:
583 case BINARY_TRUE_DIVIDE:
584 case BINARY_FLOOR_DIVIDE:
585 case BINARY_MODULO:
586 case BINARY_ADD:
587 case BINARY_SUBTRACT:
588 case BINARY_SUBSCR:
589 case BINARY_LSHIFT:
590 case BINARY_RSHIFT:
591 case BINARY_AND:
592 case BINARY_XOR:
593 case BINARY_OR:
594 if (CONST_STACK_LEN() < 2)
595 break;
596 h = lastn_const_start(codestr, op_start, 2);
597 if (ISBASICBLOCK(blocks, h, op_start)) {
598 h = fold_binops_on_constants(codestr, h, i + 1, opcode,
599 consts, CONST_STACK_LASTN(2));
600 if (h >= 0) {
601 CONST_STACK_POP(2);
602 CONST_STACK_PUSH_OP(h);
603 }
604 }
605 break;
606
607 /* Fold unary ops on constants.
608 LOAD_CONST c1 UNARY_OP --> LOAD_CONST unary_op(c) */
609 case UNARY_NEGATIVE:
610 case UNARY_INVERT:
611 case UNARY_POSITIVE:
612 if (CONST_STACK_LEN() < 1)
613 break;
614 h = lastn_const_start(codestr, op_start, 1);
615 if (ISBASICBLOCK(blocks, h, op_start)) {
616 h = fold_unaryops_on_constants(codestr, h, i + 1, opcode,
617 consts, *CONST_STACK_LASTN(1));
618 if (h >= 0) {
619 CONST_STACK_POP(1);
620 CONST_STACK_PUSH_OP(h);
621 }
622 }
623 break;
624
625 /* Simplify conditional jump to conditional jump where the
626 result of the first test implies the success of a similar
627 test or the failure of the opposite test.
628 Arises in code like:
629 "if a and b:"
630 "if a or b:"
631 "a and b or c"
632 "(a and b) and c"
633 x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_FALSE_OR_POP z
634 --> x:JUMP_IF_FALSE_OR_POP z
635 x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_TRUE_OR_POP z
636 --> x:POP_JUMP_IF_FALSE y+1
637 where y+1 is the instruction following the second test.
638 */
639 case JUMP_IF_FALSE_OR_POP:
640 case JUMP_IF_TRUE_OR_POP:
641 h = get_arg(codestr, i) / sizeof(_Py_CODEUNIT);
642 tgt = find_op(codestr, h);
643
644 j = _Py_OPCODE(codestr[tgt]);
645 if (CONDITIONAL_JUMP(j)) {
646 /* NOTE: all possible jumps here are absolute. */
647 if (JUMPS_ON_TRUE(j) == JUMPS_ON_TRUE(opcode)) {
648 /* The second jump will be taken iff the first is.
649 The current opcode inherits its target's
650 stack effect */
651 h = set_arg(codestr, i, get_arg(codestr, tgt));
652 } else {
653 /* The second jump is not taken if the first is (so
654 jump past it), and all conditional jumps pop their
655 argument when they're not taken (so change the
656 first jump to pop its argument when it's taken). */
657 h = set_arg(codestr, i, (tgt + 1) * sizeof(_Py_CODEUNIT));
658 j = opcode == JUMP_IF_TRUE_OR_POP ?
659 POP_JUMP_IF_TRUE : POP_JUMP_IF_FALSE;
660 }
661
662 if (h >= 0) {
663 nexti = h;
664 codestr[nexti] = PACKOPARG(j, _Py_OPARG(codestr[nexti]));
665 break;
666 }
667 }
668 /* Intentional fallthrough */
669
670 /* Replace jumps to unconditional jumps */
671 case POP_JUMP_IF_FALSE:
672 case POP_JUMP_IF_TRUE:
673 case FOR_ITER:
674 case JUMP_FORWARD:
675 case JUMP_ABSOLUTE:
676 case CONTINUE_LOOP:
677 case SETUP_LOOP:
678 case SETUP_EXCEPT:
679 case SETUP_FINALLY:
680 case SETUP_WITH:
681 case SETUP_ASYNC_WITH:
682 h = GETJUMPTGT(codestr, i);
683 tgt = find_op(codestr, h);
684 /* Replace JUMP_* to a RETURN into just a RETURN */
685 if (UNCONDITIONAL_JUMP(opcode) &&
686 _Py_OPCODE(codestr[tgt]) == RETURN_VALUE) {
687 codestr[op_start] = PACKOPARG(RETURN_VALUE, 0);
688 fill_nops(codestr, op_start + 1, i + 1);
689 } else if (UNCONDITIONAL_JUMP(_Py_OPCODE(codestr[tgt]))) {
690 j = GETJUMPTGT(codestr, tgt);
691 if (opcode == JUMP_FORWARD) { /* JMP_ABS can go backwards */
692 opcode = JUMP_ABSOLUTE;
693 } else if (!ABSOLUTE_JUMP(opcode)) {
694 if ((Py_ssize_t)j < i + 1) {
695 break; /* No backward relative jumps */
696 }
697 j -= i + 1; /* Calc relative jump addr */
698 }
699 j *= sizeof(_Py_CODEUNIT);
700 copy_op_arg(codestr, op_start, opcode, j, i + 1);
701 }
702 break;
703
704 /* Remove unreachable ops after RETURN */
705 case RETURN_VALUE:
706 h = i + 1;
707 while (h < codelen && ISBASICBLOCK(blocks, i, h)) {
708 h++;
709 }
710 if (h > i + 1) {
711 fill_nops(codestr, i + 1, h);
712 nexti = find_op(codestr, h);
713 }
714 break;
715 }
716 }
717
718 /* Fixup lnotab */
719 for (i = 0, nops = 0; i < codelen; i++) {
720 assert(i - nops <= INT_MAX);
721 /* original code offset => new code offset */
722 blocks[i] = i - nops;
723 if (_Py_OPCODE(codestr[i]) == NOP)
724 nops++;
725 }
726 cum_orig_offset = 0;
727 last_offset = 0;
728 for (i=0 ; i < tabsiz ; i+=2) {
729 unsigned int offset_delta, new_offset;
730 cum_orig_offset += lnotab[i];
731 assert(cum_orig_offset % sizeof(_Py_CODEUNIT) == 0);
732 new_offset = blocks[cum_orig_offset / sizeof(_Py_CODEUNIT)] *
733 sizeof(_Py_CODEUNIT);
734 offset_delta = new_offset - last_offset;
735 assert(offset_delta <= 255);
736 lnotab[i] = (unsigned char)offset_delta;
737 last_offset = new_offset;
738 }
739
740 /* Remove NOPs and fixup jump targets */
741 for (op_start = i = h = 0; i < codelen; i++, op_start = i) {
742 j = _Py_OPARG(codestr[i]);
743 while (_Py_OPCODE(codestr[i]) == EXTENDED_ARG) {
744 i++;
745 j = j<<8 | _Py_OPARG(codestr[i]);
746 }
747 opcode = _Py_OPCODE(codestr[i]);
748 switch (opcode) {
749 case NOP:continue;
750
751 case JUMP_ABSOLUTE:
752 case CONTINUE_LOOP:
753 case POP_JUMP_IF_FALSE:
754 case POP_JUMP_IF_TRUE:
755 case JUMP_IF_FALSE_OR_POP:
756 case JUMP_IF_TRUE_OR_POP:
757 j = blocks[j / sizeof(_Py_CODEUNIT)] * sizeof(_Py_CODEUNIT);
758 break;
759
760 case FOR_ITER:
761 case JUMP_FORWARD:
762 case SETUP_LOOP:
763 case SETUP_EXCEPT:
764 case SETUP_FINALLY:
765 case SETUP_WITH:
766 case SETUP_ASYNC_WITH:
767 j = blocks[j / sizeof(_Py_CODEUNIT) + i + 1] - blocks[i] - 1;
768 j *= sizeof(_Py_CODEUNIT);
769 break;
770 }
771 nexti = i - op_start + 1;
772 if (instrsize(j) > nexti)
773 goto exitUnchanged;
774 /* If instrsize(j) < nexti, we'll emit EXTENDED_ARG 0 */
775 write_op_arg(codestr + h, opcode, j, nexti);
776 h += nexti;
777 }
778 assert(h + (Py_ssize_t)nops == codelen);
779
780 CONST_STACK_DELETE();
781 PyMem_Free(blocks);
782 code = PyBytes_FromStringAndSize((char *)codestr, h * sizeof(_Py_CODEUNIT));
783 PyMem_Free(codestr);
784 return code;
785
786 exitError:
787 code = NULL;
788
789 exitUnchanged:
790 Py_XINCREF(code);
791 CONST_STACK_DELETE();
792 PyMem_Free(blocks);
793 PyMem_Free(codestr);
794 return code;
795 }
796