1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #define PPH_DEBUG 0
28 
29 #if PPH_DEBUG
30 #define PPH_DUMP(q) do { q } while (0)
31 #else
32 #define PPH_DUMP(q)
33 #endif
34 
35 #include "sb_shader.h"
36 #include "sb_pass.h"
37 
38 namespace r600_sb {
39 
run()40 int peephole::run() {
41 
42 	run_on(sh.root);
43 
44 	return 0;
45 }
46 
run_on(container_node * c)47 void peephole::run_on(container_node* c) {
48 
49 	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
50 		node *n = *I;
51 
52 		if (n->is_container())
53 			run_on(static_cast<container_node*>(n));
54 		else {
55 			if (n->is_fetch_inst() && (n->fetch_op_flags() & FF_GDS)) {
56 				fetch_node *f = static_cast<fetch_node*>(n);
57 				bool has_dst = false;
58 
59 				for(vvec::iterator I = f->dst.begin(), E = f->dst.end(); I != E; ++I) {
60 					value *v = *I;
61 					if (v)
62 						has_dst = true;
63 				}
64 				if (!has_dst)
65 					if (f->bc.op >= FETCH_OP_GDS_ADD_RET && f->bc.op <= FETCH_OP_GDS_USHORT_READ_RET)
66 						f->bc.set_op(f->bc.op - FETCH_OP_GDS_ADD_RET + FETCH_OP_GDS_ADD);
67 			}
68 			if (n->is_alu_inst()) {
69 				alu_node *a = static_cast<alu_node*>(n);
70 
71 				if (a->bc.op_ptr->flags & AF_LDS) {
72 					if (!a->dst[0]) {
73 						if (a->bc.op >= LDS_OP2_LDS_ADD_RET && a->bc.op <= LDS_OP3_LDS_MSKOR_RET)
74 							a->bc.set_op(a->bc.op - LDS_OP2_LDS_ADD_RET + LDS_OP2_LDS_ADD);
75 						if (a->bc.op == LDS_OP1_LDS_READ_RET)
76 							a->src[0] = sh.get_undef_value();
77 					}
78 				} else if (a->bc.op_ptr->flags &
79 						(AF_PRED | AF_SET | AF_CMOV | AF_KILL)) {
80 					optimize_cc_op(a);
81 				} else if (a->bc.op == ALU_OP1_FLT_TO_INT) {
82 
83 					alu_node *s = a;
84 					if (get_bool_flt_to_int_source(s)) {
85 						convert_float_setcc(a, s);
86 					}
87 				}
88 			}
89 		}
90 	}
91 }
92 
optimize_cc_op(alu_node * a)93 void peephole::optimize_cc_op(alu_node* a) {
94 	unsigned aflags = a->bc.op_ptr->flags;
95 
96 	if (aflags & (AF_PRED | AF_SET | AF_KILL)) {
97 		optimize_cc_op2(a);
98 	} else if (aflags & AF_CMOV) {
99 		optimize_CNDcc_op(a);
100 	}
101 }
102 
convert_float_setcc(alu_node * f2i,alu_node * s)103 void peephole::convert_float_setcc(alu_node *f2i, alu_node *s) {
104 	alu_node *ns = sh.clone(s);
105 
106 	ns->dst[0] = f2i->dst[0];
107 	ns->dst[0]->def = ns;
108 	ns->bc.set_op(ns->bc.op + (ALU_OP2_SETE_DX10 - ALU_OP2_SETE));
109 	f2i->insert_after(ns);
110 	f2i->remove();
111 }
112 
optimize_cc_op2(alu_node * a)113 void peephole::optimize_cc_op2(alu_node* a) {
114 
115 	unsigned flags = a->bc.op_ptr->flags;
116 	unsigned cc = flags & AF_CC_MASK;
117 
118 	if ((cc != AF_CC_E && cc != AF_CC_NE) || a->pred)
119 		return;
120 
121 	unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
122 	unsigned dst_type = flags & AF_DST_TYPE_MASK;
123 
124 	int op_kind = (flags & AF_PRED) ? 1 :
125 			(flags & AF_SET) ? 2 :
126 			(flags & AF_KILL) ? 3 : 0;
127 
128 	bool swapped = false;
129 
130 	if (a->src[0]->is_const() && a->src[0]->literal_value == literal(0)) {
131 		std::swap(a->src[0],a->src[1]);
132 		swapped = true;
133 		// clear modifiers
134 		memset(&a->bc.src[0], 0, sizeof(bc_alu_src));
135 		memset(&a->bc.src[1], 0, sizeof(bc_alu_src));
136 	}
137 
138 	if (swapped || (a->src[1]->is_const() &&
139 			a->src[1]->literal_value == literal(0))) {
140 
141 		value *s = a->src[0];
142 
143 		bool_op_info bop = {};
144 
145 		PPH_DUMP(
146 			sblog << "cc_op2: ";
147 			dump::dump_op(a);
148 			sblog << "\n";
149 		);
150 
151 		if (!get_bool_op_info(s, bop))
152 			return;
153 
154 		if (cc == AF_CC_E)
155 			bop.invert = !bop.invert;
156 
157 		bool swap_args = false;
158 
159 		cc = bop.n->bc.op_ptr->flags & AF_CC_MASK;
160 
161 		if (bop.invert)
162 			cc = invert_setcc_condition(cc, swap_args);
163 
164 		if (bop.int_cvt) {
165 			assert(cmp_type != AF_FLOAT_CMP);
166 			cmp_type = AF_FLOAT_CMP;
167 		}
168 
169 		PPH_DUMP(
170 			sblog << "boi node: ";
171 			dump::dump_op(bop.n);
172 			sblog << " invert: " << bop.invert << "  int_cvt: " << bop.int_cvt;
173 			sblog <<"\n";
174 		);
175 
176 		unsigned newop;
177 
178 		switch(op_kind) {
179 		case 1:
180 			newop = get_predsetcc_op(cc, cmp_type);
181 			break;
182 		case 2:
183 			newop = get_setcc_op(cc, cmp_type, dst_type != AF_FLOAT_DST);
184 			break;
185 		case 3:
186 			newop = get_killcc_op(cc, cmp_type);
187 			break;
188 		default:
189 			newop = ALU_OP0_NOP;
190 			assert(!"invalid op kind");
191 			break;
192 		}
193 
194 		a->bc.set_op(newop);
195 
196 		if (swap_args) {
197 			a->src[0] = bop.n->src[1];
198 			a->src[1] = bop.n->src[0];
199 			a->bc.src[0] = bop.n->bc.src[1];
200 			a->bc.src[1] = bop.n->bc.src[0];
201 
202 		} else {
203 			a->src[0] = bop.n->src[0];
204 			a->src[1] = bop.n->src[1];
205 			a->bc.src[0] = bop.n->bc.src[0];
206 			a->bc.src[1] = bop.n->bc.src[1];
207 		}
208 	}
209 }
210 
optimize_CNDcc_op(alu_node * a)211 void peephole::optimize_CNDcc_op(alu_node* a) {
212 	unsigned flags = a->bc.op_ptr->flags;
213 	unsigned cc = flags & AF_CC_MASK;
214 	unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
215 	bool swap = false;
216 
217 	if (cc == AF_CC_E) {
218 		swap = !swap;
219 		cc = AF_CC_NE;
220 	} else if (cc != AF_CC_NE)
221 		return;
222 
223 	value *s = a->src[0];
224 
225 	bool_op_info bop = {};
226 
227 	PPH_DUMP(
228 		sblog << "cndcc: ";
229 		dump::dump_op(a);
230 		sblog << "\n";
231 	);
232 
233 	if (!get_bool_op_info(s, bop))
234 		return;
235 
236 	alu_node *d = bop.n;
237 
238 	if (d->bc.omod)
239 		return;
240 
241 	PPH_DUMP(
242 		sblog << "cndcc def: ";
243 		dump::dump_op(d);
244 		sblog << "\n";
245 	);
246 
247 
248 	unsigned dflags = d->bc.op_ptr->flags;
249 	unsigned dcc = dflags & AF_CC_MASK;
250 	unsigned dcmp_type = dflags & AF_CMP_TYPE_MASK;
251 	unsigned ddst_type = dflags & AF_DST_TYPE_MASK;
252 	int nds;
253 
254 	// TODO we can handle some of these cases,
255 	// though probably this shouldn't happen
256 	if (cmp_type != AF_FLOAT_CMP && ddst_type == AF_FLOAT_DST)
257 		return;
258 
259 	if (d->src[0]->is_const() && d->src[0]->literal_value == literal(0))
260 		nds = 1;
261 	else if ((d->src[1]->is_const() &&
262 			d->src[1]->literal_value == literal(0)))
263 		nds = 0;
264 	else
265 		return;
266 
267 	// can't propagate ABS modifier to CNDcc because it's OP3
268 	if (d->bc.src[nds].abs)
269 		return;
270 
271 	// TODO we can handle some cases for uint comparison
272 	if (dcmp_type == AF_UINT_CMP)
273 		return;
274 
275 	if (dcc == AF_CC_NE) {
276 		dcc = AF_CC_E;
277 		swap = !swap;
278 	}
279 
280 	if (nds == 1) {
281 		switch (dcc) {
282 		case AF_CC_GT: dcc = AF_CC_GE; swap = !swap; break;
283 		case AF_CC_GE: dcc = AF_CC_GT; swap = !swap; break;
284 		default: break;
285 		}
286 	}
287 
288 	a->src[0] = d->src[nds];
289 	a->bc.src[0] = d->bc.src[nds];
290 
291 	if (swap) {
292 		std::swap(a->src[1], a->src[2]);
293 		std::swap(a->bc.src[1], a->bc.src[2]);
294 	}
295 
296 	a->bc.set_op(get_cndcc_op(dcc, dcmp_type));
297 
298 }
299 
get_bool_flt_to_int_source(alu_node * & a)300 bool peephole::get_bool_flt_to_int_source(alu_node* &a) {
301 
302 	if (a->bc.op == ALU_OP1_FLT_TO_INT) {
303 
304 		if (a->bc.src[0].neg || a->bc.src[0].abs || a->bc.src[0].rel)
305 			return false;
306 
307 		value *s = a->src[0];
308 		if (!s || !s->def || !s->def->is_alu_inst())
309 			return false;
310 
311 		alu_node *dn = static_cast<alu_node*>(s->def);
312 
313 		if (dn->is_alu_op(ALU_OP1_TRUNC)) {
314 			s = dn->src[0];
315 			if (!s || !s->def || !s->def->is_alu_inst())
316 				return false;
317 
318 			if (dn->bc.src[0].neg != 1 || dn->bc.src[0].abs != 0 ||
319 					dn->bc.src[0].rel != 0) {
320 				return false;
321 			}
322 
323 			dn = static_cast<alu_node*>(s->def);
324 
325 		}
326 
327 		if (dn->bc.op_ptr->flags & AF_SET) {
328 			a = dn;
329 			return true;
330 		}
331 	}
332 	return false;
333 }
334 
get_bool_op_info(value * b,bool_op_info & bop)335 bool peephole::get_bool_op_info(value* b, bool_op_info& bop) {
336 
337 	node *d = b->def;
338 
339 	if (!d || !d->is_alu_inst())
340 		return false;
341 
342 	alu_node *dn = static_cast<alu_node*>(d);
343 
344 	if (dn->bc.op_ptr->flags & AF_SET) {
345 		bop.n = dn;
346 
347 		if (dn->bc.op_ptr->flags & AF_DX10)
348 			bop.int_cvt = true;
349 
350 		return true;
351 	}
352 
353 	if (get_bool_flt_to_int_source(dn)) {
354 		bop.n = dn;
355 		bop.int_cvt = true;
356 		return true;
357 	}
358 
359 	return false;
360 }
361 
362 } // namespace r600_sb
363