1 /*
2  * Copyright (C) 2009 Nicolai Haehnle.
3  * Copyright 2010 Tom Stellard <tstellar@gmail.com>
4  *
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining
8  * a copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sublicense, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial
17  * portions of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  */
28 
29 #include "radeon_dataflow.h"
30 
31 #include "radeon_compiler.h"
32 #include "radeon_compiler_util.h"
33 #include "radeon_list.h"
34 #include "radeon_swizzle.h"
35 #include "radeon_variable.h"
36 
37 struct src_clobbered_reads_cb_data {
38 	rc_register_file File;
39 	unsigned int Index;
40 	unsigned int Mask;
41 	struct rc_reader_data * ReaderData;
42 };
43 
44 typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
45 						struct rc_instruction *,
46 						unsigned int);
47 
chain_srcregs(struct rc_src_register outer,struct rc_src_register inner)48 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
49 {
50 	struct rc_src_register combine;
51 	combine.File = inner.File;
52 	combine.Index = inner.Index;
53 	combine.RelAddr = inner.RelAddr;
54 	if (outer.Abs) {
55 		combine.Abs = 1;
56 		combine.Negate = outer.Negate;
57 	} else {
58 		combine.Abs = inner.Abs;
59 		combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
60 		combine.Negate ^= outer.Negate;
61 	}
62 	combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
63 	return combine;
64 }
65 
copy_propagate_scan_read(void * data,struct rc_instruction * inst,struct rc_src_register * src)66 static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
67 						struct rc_src_register * src)
68 {
69 	rc_register_file file = src->File;
70 	struct rc_reader_data * reader_data = data;
71 
72 	if(!rc_inst_can_use_presub(inst,
73 				reader_data->Writer->U.I.PreSub.Opcode,
74 				rc_swizzle_to_writemask(src->Swizzle),
75 				src,
76 				&reader_data->Writer->U.I.PreSub.SrcReg[0],
77 				&reader_data->Writer->U.I.PreSub.SrcReg[1])) {
78 		reader_data->Abort = 1;
79 		return;
80 	}
81 
82 	/* XXX This could probably be handled better. */
83 	if (file == RC_FILE_ADDRESS) {
84 		reader_data->Abort = 1;
85 		return;
86 	}
87 
88 	/* These instructions cannot read from the constants file.
89 	 * see radeonTransformTEX()
90 	 */
91 	if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
92 			reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
93 				(inst->U.I.Opcode == RC_OPCODE_TEX ||
94 				inst->U.I.Opcode == RC_OPCODE_TXB ||
95 				inst->U.I.Opcode == RC_OPCODE_TXP ||
96 				inst->U.I.Opcode == RC_OPCODE_TXD ||
97 				inst->U.I.Opcode == RC_OPCODE_TXL ||
98 				inst->U.I.Opcode == RC_OPCODE_KIL)){
99 		reader_data->Abort = 1;
100 		return;
101 	}
102 }
103 
src_clobbered_reads_cb(void * data,struct rc_instruction * inst,struct rc_src_register * src)104 static void src_clobbered_reads_cb(
105 	void * data,
106 	struct rc_instruction * inst,
107 	struct rc_src_register * src)
108 {
109 	struct src_clobbered_reads_cb_data * sc_data = data;
110 
111 	if (src->File == sc_data->File
112 	    && src->Index == sc_data->Index
113 	    && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
114 
115 		sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
116 	}
117 
118 	if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
119 		sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
120 	}
121 }
122 
is_src_clobbered_scan_write(void * data,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)123 static void is_src_clobbered_scan_write(
124 	void * data,
125 	struct rc_instruction * inst,
126 	rc_register_file file,
127 	unsigned int index,
128 	unsigned int mask)
129 {
130 	struct src_clobbered_reads_cb_data sc_data;
131 	struct rc_reader_data * reader_data = data;
132 	sc_data.File = file;
133 	sc_data.Index = index;
134 	sc_data.Mask = mask;
135 	sc_data.ReaderData = reader_data;
136 	rc_for_all_reads_src(reader_data->Writer,
137 					src_clobbered_reads_cb, &sc_data);
138 }
139 
copy_propagate(struct radeon_compiler * c,struct rc_instruction * inst_mov)140 static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
141 {
142 	struct rc_reader_data reader_data;
143 	unsigned int i;
144 
145 	if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
146 	    inst_mov->U.I.WriteALUResult ||
147 	    inst_mov->U.I.SaturateMode)
148 		return;
149 
150 	/* Get a list of all the readers of this MOV instruction. */
151 	reader_data.ExitOnAbort = 1;
152 	rc_get_readers(c, inst_mov, &reader_data,
153 		       copy_propagate_scan_read, NULL,
154 		       is_src_clobbered_scan_write);
155 
156 	if (reader_data.Abort || reader_data.ReaderCount == 0)
157 		return;
158 
159 	/* Propagate the MOV instruction. */
160 	for (i = 0; i < reader_data.ReaderCount; i++) {
161 		struct rc_instruction * inst = reader_data.Readers[i].Inst;
162 		*reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);
163 
164 		if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
165 			inst->U.I.PreSub = inst_mov->U.I.PreSub;
166 	}
167 
168 	/* Finally, remove the original MOV instruction */
169 	rc_remove_instruction(inst_mov);
170 }
171 
172 /**
173  * Check if a source register is actually always the same
174  * swizzle constant.
175  */
is_src_uniform_constant(struct rc_src_register src,rc_swizzle * pswz,unsigned int * pnegate)176 static int is_src_uniform_constant(struct rc_src_register src,
177 		rc_swizzle * pswz, unsigned int * pnegate)
178 {
179 	int have_used = 0;
180 
181 	if (src.File != RC_FILE_NONE) {
182 		*pswz = 0;
183 		return 0;
184 	}
185 
186 	for(unsigned int chan = 0; chan < 4; ++chan) {
187 		unsigned int swz = GET_SWZ(src.Swizzle, chan);
188 		if (swz < 4) {
189 			*pswz = 0;
190 			return 0;
191 		}
192 		if (swz == RC_SWIZZLE_UNUSED)
193 			continue;
194 
195 		if (!have_used) {
196 			*pswz = swz;
197 			*pnegate = GET_BIT(src.Negate, chan);
198 			have_used = 1;
199 		} else {
200 			if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
201 				*pswz = 0;
202 				return 0;
203 			}
204 		}
205 	}
206 
207 	return 1;
208 }
209 
constant_folding_mad(struct rc_instruction * inst)210 static void constant_folding_mad(struct rc_instruction * inst)
211 {
212 	rc_swizzle swz = 0;
213 	unsigned int negate= 0;
214 
215 	if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
216 		if (swz == RC_SWIZZLE_ZERO) {
217 			inst->U.I.Opcode = RC_OPCODE_MUL;
218 			return;
219 		}
220 	}
221 
222 	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
223 		if (swz == RC_SWIZZLE_ONE) {
224 			inst->U.I.Opcode = RC_OPCODE_ADD;
225 			if (negate)
226 				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
227 			inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
228 			return;
229 		} else if (swz == RC_SWIZZLE_ZERO) {
230 			inst->U.I.Opcode = RC_OPCODE_MOV;
231 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
232 			return;
233 		}
234 	}
235 
236 	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
237 		if (swz == RC_SWIZZLE_ONE) {
238 			inst->U.I.Opcode = RC_OPCODE_ADD;
239 			if (negate)
240 				inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
241 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
242 			return;
243 		} else if (swz == RC_SWIZZLE_ZERO) {
244 			inst->U.I.Opcode = RC_OPCODE_MOV;
245 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
246 			return;
247 		}
248 	}
249 }
250 
constant_folding_mul(struct rc_instruction * inst)251 static void constant_folding_mul(struct rc_instruction * inst)
252 {
253 	rc_swizzle swz = 0;
254 	unsigned int negate = 0;
255 
256 	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
257 		if (swz == RC_SWIZZLE_ONE) {
258 			inst->U.I.Opcode = RC_OPCODE_MOV;
259 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
260 			if (negate)
261 				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
262 			return;
263 		} else if (swz == RC_SWIZZLE_ZERO) {
264 			inst->U.I.Opcode = RC_OPCODE_MOV;
265 			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
266 			return;
267 		}
268 	}
269 
270 	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
271 		if (swz == RC_SWIZZLE_ONE) {
272 			inst->U.I.Opcode = RC_OPCODE_MOV;
273 			if (negate)
274 				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
275 			return;
276 		} else if (swz == RC_SWIZZLE_ZERO) {
277 			inst->U.I.Opcode = RC_OPCODE_MOV;
278 			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
279 			return;
280 		}
281 	}
282 }
283 
constant_folding_add(struct rc_instruction * inst)284 static void constant_folding_add(struct rc_instruction * inst)
285 {
286 	rc_swizzle swz = 0;
287 	unsigned int negate = 0;
288 
289 	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
290 		if (swz == RC_SWIZZLE_ZERO) {
291 			inst->U.I.Opcode = RC_OPCODE_MOV;
292 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
293 			return;
294 		}
295 	}
296 
297 	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
298 		if (swz == RC_SWIZZLE_ZERO) {
299 			inst->U.I.Opcode = RC_OPCODE_MOV;
300 			return;
301 		}
302 	}
303 }
304 
305 /**
306  * Replace 0.0, 1.0 and 0.5 immediate constants by their
307  * respective swizzles. Simplify instructions like ADD dst, src, 0;
308  */
constant_folding(struct radeon_compiler * c,struct rc_instruction * inst)309 static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
310 {
311 	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
312 	unsigned int i;
313 
314 	/* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
315 	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
316 		struct rc_constant * constant;
317 		struct rc_src_register newsrc;
318 		int have_real_reference;
319 		unsigned int chan;
320 
321 		/* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
322 		for (chan = 0; chan < 4; ++chan)
323 			if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
324 				break;
325 		if (chan == 4) {
326 			inst->U.I.SrcReg[src].File = RC_FILE_NONE;
327 			continue;
328 		}
329 
330 		/* Convert immediates to swizzles. */
331 		if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
332 		    inst->U.I.SrcReg[src].RelAddr ||
333 		    inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
334 			continue;
335 
336 		constant =
337 			&c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
338 
339 		if (constant->Type != RC_CONSTANT_IMMEDIATE)
340 			continue;
341 
342 		newsrc = inst->U.I.SrcReg[src];
343 		have_real_reference = 0;
344 		for (chan = 0; chan < 4; ++chan) {
345 			unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
346 			unsigned int newswz;
347 			float imm;
348 			float baseimm;
349 
350 			if (swz >= 4)
351 				continue;
352 
353 			imm = constant->u.Immediate[swz];
354 			baseimm = imm;
355 			if (imm < 0.0)
356 				baseimm = -baseimm;
357 
358 			if (baseimm == 0.0) {
359 				newswz = RC_SWIZZLE_ZERO;
360 			} else if (baseimm == 1.0) {
361 				newswz = RC_SWIZZLE_ONE;
362 			} else if (baseimm == 0.5 && c->has_half_swizzles) {
363 				newswz = RC_SWIZZLE_HALF;
364 			} else {
365 				have_real_reference = 1;
366 				continue;
367 			}
368 
369 			SET_SWZ(newsrc.Swizzle, chan, newswz);
370 			if (imm < 0.0 && !newsrc.Abs)
371 				newsrc.Negate ^= 1 << chan;
372 		}
373 
374 		if (!have_real_reference) {
375 			newsrc.File = RC_FILE_NONE;
376 			newsrc.Index = 0;
377 		}
378 
379 		/* don't make the swizzle worse */
380 		if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
381 		    c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
382 			continue;
383 
384 		inst->U.I.SrcReg[src] = newsrc;
385 	}
386 
387 	/* Simplify instructions based on constants */
388 	if (inst->U.I.Opcode == RC_OPCODE_MAD)
389 		constant_folding_mad(inst);
390 
391 	/* note: MAD can simplify to MUL or ADD */
392 	if (inst->U.I.Opcode == RC_OPCODE_MUL)
393 		constant_folding_mul(inst);
394 	else if (inst->U.I.Opcode == RC_OPCODE_ADD)
395 		constant_folding_add(inst);
396 
397 	/* In case this instruction has been converted, make sure all of the
398 	 * registers that are no longer used are empty. */
399 	opcode = rc_get_opcode_info(inst->U.I.Opcode);
400 	for(i = opcode->NumSrcRegs; i < 3; i++) {
401 		memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
402 	}
403 }
404 
405 /**
406  * If src and dst use the same register, this function returns a writemask that
407  * indicates wich components are read by src.  Otherwise zero is returned.
408  */
src_reads_dst_mask(struct rc_src_register src,struct rc_dst_register dst)409 static unsigned int src_reads_dst_mask(struct rc_src_register src,
410 						struct rc_dst_register dst)
411 {
412 	if (dst.File != src.File || dst.Index != src.Index) {
413 		return 0;
414 	}
415 	return rc_swizzle_to_writemask(src.Swizzle);
416 }
417 
418 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
419  * in any of its channels.  Return 0 otherwise. */
src_has_const_swz(struct rc_src_register src)420 static int src_has_const_swz(struct rc_src_register src) {
421 	int chan;
422 	for(chan = 0; chan < 4; chan++) {
423 		unsigned int swz = GET_SWZ(src.Swizzle, chan);
424 		if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
425 						|| swz == RC_SWIZZLE_ONE) {
426 			return 1;
427 		}
428 	}
429 	return 0;
430 }
431 
presub_scan_read(void * data,struct rc_instruction * inst,struct rc_src_register * src)432 static void presub_scan_read(
433 	void * data,
434 	struct rc_instruction * inst,
435 	struct rc_src_register * src)
436 {
437 	struct rc_reader_data * reader_data = data;
438 	rc_presubtract_op * presub_opcode = reader_data->CbData;
439 
440 	if (!rc_inst_can_use_presub(inst, *presub_opcode,
441 			reader_data->Writer->U.I.DstReg.WriteMask,
442 			src,
443 			&reader_data->Writer->U.I.SrcReg[0],
444 			&reader_data->Writer->U.I.SrcReg[1])) {
445 		reader_data->Abort = 1;
446 		return;
447 	}
448 }
449 
presub_helper(struct radeon_compiler * c,struct rc_instruction * inst_add,rc_presubtract_op presub_opcode,rc_presub_replace_fn presub_replace)450 static int presub_helper(
451 	struct radeon_compiler * c,
452 	struct rc_instruction * inst_add,
453 	rc_presubtract_op presub_opcode,
454 	rc_presub_replace_fn presub_replace)
455 {
456 	struct rc_reader_data reader_data;
457 	unsigned int i;
458 	rc_presubtract_op cb_op = presub_opcode;
459 
460 	reader_data.CbData = &cb_op;
461 	reader_data.ExitOnAbort = 1;
462 	rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
463 						is_src_clobbered_scan_write);
464 
465 	if (reader_data.Abort || reader_data.ReaderCount == 0)
466 		return 0;
467 
468 	for(i = 0; i < reader_data.ReaderCount; i++) {
469 		unsigned int src_index;
470 		struct rc_reader reader = reader_data.Readers[i];
471 		const struct rc_opcode_info * info =
472 				rc_get_opcode_info(reader.Inst->U.I.Opcode);
473 
474 		for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
475 			if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)
476 				presub_replace(inst_add, reader.Inst, src_index);
477 		}
478 	}
479 	return 1;
480 }
481 
482 /* This function assumes that inst_add->U.I.SrcReg[0] and
483  * inst_add->U.I.SrcReg[1] aren't both negative. */
presub_replace_add(struct rc_instruction * inst_add,struct rc_instruction * inst_reader,unsigned int src_index)484 static void presub_replace_add(
485 	struct rc_instruction * inst_add,
486 	struct rc_instruction * inst_reader,
487 	unsigned int src_index)
488 {
489 	rc_presubtract_op presub_opcode;
490 	if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
491 		presub_opcode = RC_PRESUB_SUB;
492 	else
493 		presub_opcode = RC_PRESUB_ADD;
494 
495 	if (inst_add->U.I.SrcReg[1].Negate) {
496 		inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
497 		inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
498 	} else {
499 		inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
500 		inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
501 	}
502 	inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
503 	inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
504 	inst_reader->U.I.PreSub.Opcode = presub_opcode;
505 	inst_reader->U.I.SrcReg[src_index] =
506 			chain_srcregs(inst_reader->U.I.SrcReg[src_index],
507 					inst_reader->U.I.PreSub.SrcReg[0]);
508 	inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
509 	inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
510 }
511 
is_presub_candidate(struct radeon_compiler * c,struct rc_instruction * inst)512 static int is_presub_candidate(
513 	struct radeon_compiler * c,
514 	struct rc_instruction * inst)
515 {
516 	const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
517 	unsigned int i;
518 	unsigned int is_constant[2] = {0, 0};
519 
520 	assert(inst->U.I.Opcode == RC_OPCODE_ADD);
521 
522 	if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
523 			|| inst->U.I.SaturateMode
524 			|| inst->U.I.WriteALUResult
525 			|| inst->U.I.Omod) {
526 		return 0;
527 	}
528 
529 	/* If both sources use a constant swizzle, then we can't convert it to
530 	 * a presubtract operation.  In fact for the ADD and SUB presubtract
531 	 * operations neither source can contain a constant swizzle.  This
532 	 * specific case is checked in peephole_add_presub_add() when
533 	 * we make sure the swizzles for both sources are equal, so we
534 	 * don't need to worry about it here. */
535 	for (i = 0; i < 2; i++) {
536 		int chan;
537 		for (chan = 0; chan < 4; chan++) {
538 			rc_swizzle swz =
539 				get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
540 			if (swz == RC_SWIZZLE_ONE
541 					|| swz == RC_SWIZZLE_ZERO
542 					|| swz == RC_SWIZZLE_HALF) {
543 				is_constant[i] = 1;
544 			}
545 		}
546 	}
547 	if (is_constant[0] && is_constant[1])
548 		return 0;
549 
550 	for(i = 0; i < info->NumSrcRegs; i++) {
551 		struct rc_src_register src = inst->U.I.SrcReg[i];
552 		if (src_reads_dst_mask(src, inst->U.I.DstReg))
553 			return 0;
554 
555 		src.File = RC_FILE_PRESUB;
556 		if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
557 			return 0;
558 	}
559 	return 1;
560 }
561 
peephole_add_presub_add(struct radeon_compiler * c,struct rc_instruction * inst_add)562 static int peephole_add_presub_add(
563 	struct radeon_compiler * c,
564 	struct rc_instruction * inst_add)
565 {
566 	unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
567         unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
568         unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
569 
570 	if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
571 		return 0;
572 
573 	/* src0 and src1 can't have absolute values */
574 	if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
575 	        return 0;
576 
577 	/* presub_replace_add() assumes only one is negative */
578 	if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
579 	        return 0;
580 
581         /* if src0 is negative, at least all bits of dstmask have to be set */
582         if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
583 	        return 0;
584 
585         /* if src1 is negative, at least all bits of dstmask have to be set */
586         if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
587 	        return 0;
588 
589 	if (!is_presub_candidate(c, inst_add))
590 		return 0;
591 
592 	if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
593 		rc_remove_instruction(inst_add);
594 		return 1;
595 	}
596 	return 0;
597 }
598 
presub_replace_inv(struct rc_instruction * inst_add,struct rc_instruction * inst_reader,unsigned int src_index)599 static void presub_replace_inv(
600 	struct rc_instruction * inst_add,
601 	struct rc_instruction * inst_reader,
602 	unsigned int src_index)
603 {
604 	/* We must be careful not to modify inst_add, since it
605 	 * is possible it will remain part of the program.*/
606 	inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
607 	inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
608 	inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
609 	inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
610 						inst_reader->U.I.PreSub.SrcReg[0]);
611 
612 	inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
613 	inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
614 }
615 
616 /**
617  * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
618  * Use the presubtract 1 - src0 for all readers of TEMP[0].  The first source
619  * of the add instruction must have the constatnt 1 swizzle.  This function
620  * does not check const registers to see if their value is 1.0, so it should
621  * be called after the constant_folding optimization.
622  * @return
623  * 	0 if the ADD instruction is still part of the program.
624  * 	1 if the ADD instruction is no longer part of the program.
625  */
peephole_add_presub_inv(struct radeon_compiler * c,struct rc_instruction * inst_add)626 static int peephole_add_presub_inv(
627 	struct radeon_compiler * c,
628 	struct rc_instruction * inst_add)
629 {
630 	unsigned int i, swz;
631 
632 	if (!is_presub_candidate(c, inst_add))
633 		return 0;
634 
635 	/* Check if src0 is 1. */
636 	/* XXX It would be nice to use is_src_uniform_constant here, but that
637 	 * function only works if the register's file is RC_FILE_NONE */
638 	for(i = 0; i < 4; i++ ) {
639 		swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
640 		if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
641 						&& swz != RC_SWIZZLE_ONE) {
642 			return 0;
643 		}
644 	}
645 
646 	/* Check src1. */
647 	if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
648 						inst_add->U.I.DstReg.WriteMask
649 		|| inst_add->U.I.SrcReg[1].Abs
650 		|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
651 			&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
652 		|| src_has_const_swz(inst_add->U.I.SrcReg[1])) {
653 
654 		return 0;
655 	}
656 
657 	if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
658 		rc_remove_instruction(inst_add);
659 		return 1;
660 	}
661 	return 0;
662 }
663 
664 struct peephole_mul_cb_data {
665 	struct rc_dst_register * Writer;
666 	unsigned int Clobbered;
667 };
668 
omod_filter_reader_cb(void * userdata,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)669 static void omod_filter_reader_cb(
670 	void * userdata,
671 	struct rc_instruction * inst,
672 	rc_register_file file,
673 	unsigned int index,
674 	unsigned int mask)
675 {
676 	struct peephole_mul_cb_data * d = userdata;
677 	if (rc_src_reads_dst_mask(file, mask, index,
678 		d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) {
679 
680 		d->Clobbered = 1;
681 	}
682 }
683 
omod_filter_writer_cb(void * userdata,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)684 static void omod_filter_writer_cb(
685 	void * userdata,
686 	struct rc_instruction * inst,
687 	rc_register_file file,
688 	unsigned int index,
689 	unsigned int mask)
690 {
691 	struct peephole_mul_cb_data * d = userdata;
692 	if (file == d->Writer->File && index == d->Writer->Index &&
693 					(mask & d->Writer->WriteMask)) {
694 		d->Clobbered = 1;
695 	}
696 }
697 
peephole_mul_omod(struct radeon_compiler * c,struct rc_instruction * inst_mul,struct rc_list * var_list)698 static int peephole_mul_omod(
699 	struct radeon_compiler * c,
700 	struct rc_instruction * inst_mul,
701 	struct rc_list * var_list)
702 {
703 	unsigned int chan = 0, swz, i;
704 	int const_index = -1;
705 	int temp_index = -1;
706 	float const_value;
707 	rc_omod_op omod_op = RC_OMOD_DISABLE;
708 	struct rc_list * writer_list;
709 	struct rc_variable * var;
710 	struct peephole_mul_cb_data cb_data;
711 
712 	for (i = 0; i < 2; i++) {
713 		unsigned int j;
714 		if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT
715 			&& inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) {
716 			return 0;
717 		}
718 		if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
719 			if (temp_index != -1) {
720 				/* The instruction has two temp sources */
721 				return 0;
722 			} else {
723 				temp_index = i;
724 				continue;
725 			}
726 		}
727 		/* If we get this far Src[i] must be a constant src */
728 		if (inst_mul->U.I.SrcReg[i].Negate) {
729 			return 0;
730 		}
731 		/* The constant src needs to read from the same swizzle */
732 		swz = RC_SWIZZLE_UNUSED;
733 		chan = 0;
734 		for (j = 0; j < 4; j++) {
735 			unsigned int j_swz =
736 				GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j);
737 			if (j_swz == RC_SWIZZLE_UNUSED) {
738 				continue;
739 			}
740 			if (swz == RC_SWIZZLE_UNUSED) {
741 				swz = j_swz;
742 				chan = j;
743 			} else if (j_swz != swz) {
744 				return 0;
745 			}
746 		}
747 
748 		if (const_index != -1) {
749 			/* The instruction has two constant sources */
750 			return 0;
751 		} else {
752 			const_index = i;
753 		}
754 	}
755 
756 	if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File,
757 				inst_mul->U.I.SrcReg[const_index].Index)) {
758 		return 0;
759 	}
760 	const_value = rc_get_constant_value(c,
761 			inst_mul->U.I.SrcReg[const_index].Index,
762 			inst_mul->U.I.SrcReg[const_index].Swizzle,
763 			inst_mul->U.I.SrcReg[const_index].Negate,
764 			chan);
765 
766 	if (const_value == 2.0f) {
767 		omod_op = RC_OMOD_MUL_2;
768 	} else if (const_value == 4.0f) {
769 		omod_op = RC_OMOD_MUL_4;
770 	} else if (const_value == 8.0f) {
771 		omod_op = RC_OMOD_MUL_8;
772 	} else if (const_value == (1.0f / 2.0f)) {
773 		omod_op = RC_OMOD_DIV_2;
774 	} else if (const_value == (1.0f / 4.0f)) {
775 		omod_op = RC_OMOD_DIV_4;
776 	} else if (const_value == (1.0f / 8.0f)) {
777 		omod_op = RC_OMOD_DIV_8;
778 	} else {
779 		return 0;
780 	}
781 
782 	writer_list = rc_variable_list_get_writers_one_reader(var_list,
783 		RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]);
784 
785 	if (!writer_list) {
786 		return 0;
787 	}
788 
789 	cb_data.Clobbered = 0;
790 	cb_data.Writer = &inst_mul->U.I.DstReg;
791 	for (var = writer_list->Item; var; var = var->Friend) {
792 		struct rc_instruction * inst;
793 		const struct rc_opcode_info * info = rc_get_opcode_info(
794 				var->Inst->U.I.Opcode);
795 		if (info->HasTexture) {
796 			return 0;
797 		}
798 		if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) {
799 			return 0;
800 		}
801 		for (inst = inst_mul->Prev; inst != var->Inst;
802 							inst = inst->Prev) {
803 			rc_for_all_reads_mask(inst, omod_filter_reader_cb,
804 								&cb_data);
805 			rc_for_all_writes_mask(inst, omod_filter_writer_cb,
806 								&cb_data);
807 			if (cb_data.Clobbered) {
808 				break;
809 			}
810 		}
811 	}
812 
813 	if (cb_data.Clobbered) {
814 		return 0;
815 	}
816 
817 	/* Rewrite the instructions */
818 	for (var = writer_list->Item; var; var = var->Friend) {
819 		struct rc_variable * writer = writer_list->Item;
820 		unsigned conversion_swizzle = rc_make_conversion_swizzle(
821 					writer->Inst->U.I.DstReg.WriteMask,
822 					inst_mul->U.I.DstReg.WriteMask);
823 		writer->Inst->U.I.Omod = omod_op;
824 		writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File;
825 		writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index;
826 		rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle);
827 		writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode;
828 	}
829 
830 	rc_remove_instruction(inst_mul);
831 
832 	return 1;
833 }
834 
835 /**
836  * @return
837  * 	0 if inst is still part of the program.
838  * 	1 if inst is no longer part of the program.
839  */
peephole(struct radeon_compiler * c,struct rc_instruction * inst)840 static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
841 {
842 	switch(inst->U.I.Opcode){
843 	case RC_OPCODE_ADD:
844 		if (c->has_presub) {
845 			if(peephole_add_presub_inv(c, inst))
846 				return 1;
847 			if(peephole_add_presub_add(c, inst))
848 				return 1;
849 		}
850 		break;
851 	default:
852 		break;
853 	}
854 	return 0;
855 }
856 
rc_optimize(struct radeon_compiler * c,void * user)857 void rc_optimize(struct radeon_compiler * c, void *user)
858 {
859 	struct rc_instruction * inst = c->Program.Instructions.Next;
860 	struct rc_list * var_list;
861 	while(inst != &c->Program.Instructions) {
862 		struct rc_instruction * cur = inst;
863 		inst = inst->Next;
864 
865 		constant_folding(c, cur);
866 
867 		if(peephole(c, cur))
868 			continue;
869 
870 		if (cur->U.I.Opcode == RC_OPCODE_MOV) {
871 			copy_propagate(c, cur);
872 			/* cur may no longer be part of the program */
873 		}
874 	}
875 
876 	if (!c->has_omod) {
877 		return;
878 	}
879 
880 	inst = c->Program.Instructions.Next;
881 	while(inst != &c->Program.Instructions) {
882 		struct rc_instruction * cur = inst;
883 		inst = inst->Next;
884 		if (cur->U.I.Opcode == RC_OPCODE_MUL) {
885 			var_list = rc_get_variables(c);
886 			peephole_mul_omod(c, cur, var_list);
887 		}
888 	}
889 }
890