1 /* 2 * Copyright © 2013 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include "brw_fs.h" 25 #include "brw_fs_live_variables.h" 26 #include "brw_cfg.h" 27 28 /** @file brw_fs_saturate_propagation.cpp 29 * 30 * Implements a pass that propagates the SAT modifier from a MOV.SAT into the 31 * instruction that produced the source of the MOV.SAT, thereby allowing the 32 * MOV's src and dst to be coalesced and the MOV removed. 33 * 34 * For instance, 35 * 36 * ADD tmp, src0, src1 37 * MOV.SAT dst, tmp 38 * 39 * would be transformed into 40 * 41 * ADD.SAT tmp, src0, src1 42 * MOV dst, tmp 43 */ 44 45 static bool 46 opt_saturate_propagation_local(fs_visitor *v, bblock_t *block) 47 { 48 bool progress = false; 49 int ip = block->end_ip + 1; 50 51 foreach_inst_in_block_reverse(fs_inst, inst, block) { 52 ip--; 53 54 if (inst->opcode != BRW_OPCODE_MOV || 55 !inst->saturate || 56 inst->dst.file != VGRF || 57 inst->dst.type != inst->src[0].type || 58 inst->src[0].file != VGRF || 59 inst->src[0].abs) 60 continue; 61 62 int src_var = v->live_intervals->var_from_reg(inst->src[0]); 63 int src_end_ip = v->live_intervals->end[src_var]; 64 65 bool interfered = false; 66 foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { 67 if (regions_overlap(scan_inst->dst, scan_inst->size_written, 68 inst->src[0], inst->size_read(0))) { 69 if (scan_inst->is_partial_write() || 70 (scan_inst->dst.type != inst->dst.type && 71 !scan_inst->can_change_types())) 72 break; 73 74 if (scan_inst->saturate) { 75 inst->saturate = false; 76 progress = true; 77 } else if (src_end_ip == ip || inst->dst.equals(inst->src[0])) { 78 if (scan_inst->can_do_saturate()) { 79 if (scan_inst->dst.type != inst->dst.type) { 80 scan_inst->dst.type = inst->dst.type; 81 for (int i = 0; i < scan_inst->sources; i++) { 82 scan_inst->src[i].type = inst->dst.type; 83 } 84 } 85 86 if (inst->src[0].negate) { 87 if (scan_inst->opcode == BRW_OPCODE_MUL) { 88 scan_inst->src[0].negate = !scan_inst->src[0].negate; 89 inst->src[0].negate = false; 90 } else if (scan_inst->opcode == BRW_OPCODE_MAD) { 91 scan_inst->src[0].negate = !scan_inst->src[0].negate; 92 scan_inst->src[1].negate = !scan_inst->src[1].negate; 93 inst->src[0].negate = false; 94 } else if (scan_inst->opcode == BRW_OPCODE_ADD) { 95 if (scan_inst->src[1].file == IMM) { 96 if (!brw_negate_immediate(scan_inst->src[1].type, 97 &scan_inst->src[1].as_brw_reg())) { 98 break; 99 } 100 } else { 101 scan_inst->src[1].negate = !scan_inst->src[1].negate; 102 } 103 scan_inst->src[0].negate = !scan_inst->src[0].negate; 104 inst->src[0].negate = false; 105 } else { 106 break; 107 } 108 } 109 110 scan_inst->saturate = true; 111 inst->saturate = false; 112 progress = true; 113 } 114 } 115 break; 116 } 117 for (int i = 0; i < scan_inst->sources; i++) { 118 if (scan_inst->src[i].file == VGRF && 119 scan_inst->src[i].nr == inst->src[0].nr && 120 scan_inst->src[i].offset / REG_SIZE == 121 inst->src[0].offset / REG_SIZE) { 122 if (scan_inst->opcode != BRW_OPCODE_MOV || 123 !scan_inst->saturate || 124 scan_inst->src[0].abs || 125 scan_inst->src[0].negate || 126 scan_inst->src[0].abs != inst->src[0].abs || 127 scan_inst->src[0].negate != inst->src[0].negate) { 128 interfered = true; 129 break; 130 } 131 } 132 } 133 134 if (interfered) 135 break; 136 } 137 } 138 139 return progress; 140 } 141 142 bool 143 fs_visitor::opt_saturate_propagation() 144 { 145 bool progress = false; 146 147 calculate_live_intervals(); 148 149 foreach_block (block, cfg) { 150 progress = opt_saturate_propagation_local(this, block) || progress; 151 } 152 153 /* Live intervals are still valid. */ 154 155 return progress; 156 } 157