1 /*
2  * Copyright (C) 2009 Nicolai Haehnle.
3  * Copyright 2010 Tom Stellard <tstellar@gmail.com>
4  *
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining
8  * a copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sublicense, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial
17  * portions of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  */
28 
29 #include "radeon_dataflow.h"
30 
31 #include "radeon_compiler.h"
32 #include "radeon_compiler_util.h"
33 #include "radeon_list.h"
34 #include "radeon_swizzle.h"
35 #include "radeon_variable.h"
36 
37 struct src_clobbered_reads_cb_data {
38 	rc_register_file File;
39 	unsigned int Index;
40 	unsigned int Mask;
41 	struct rc_reader_data * ReaderData;
42 };
43 
44 typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
45 						struct rc_instruction *,
46 						unsigned int);
47 
chain_srcregs(struct rc_src_register outer,struct rc_src_register inner)48 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
49 {
50 	struct rc_src_register combine;
51 	combine.File = inner.File;
52 	combine.Index = inner.Index;
53 	combine.RelAddr = inner.RelAddr;
54 	if (outer.Abs) {
55 		combine.Abs = 1;
56 		combine.Negate = outer.Negate;
57 	} else {
58 		combine.Abs = inner.Abs;
59 		combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
60 		combine.Negate ^= outer.Negate;
61 	}
62 	combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
63 	return combine;
64 }
65 
copy_propagate_scan_read(void * data,struct rc_instruction * inst,struct rc_src_register * src)66 static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
67 						struct rc_src_register * src)
68 {
69 	rc_register_file file = src->File;
70 	struct rc_reader_data * reader_data = data;
71 
72 	if(!rc_inst_can_use_presub(inst,
73 				reader_data->Writer->U.I.PreSub.Opcode,
74 				rc_swizzle_to_writemask(src->Swizzle),
75 				src,
76 				&reader_data->Writer->U.I.PreSub.SrcReg[0],
77 				&reader_data->Writer->U.I.PreSub.SrcReg[1])) {
78 		reader_data->Abort = 1;
79 		return;
80 	}
81 
82 	/* XXX This could probably be handled better. */
83 	if (file == RC_FILE_ADDRESS) {
84 		reader_data->Abort = 1;
85 		return;
86 	}
87 
88 	/* These instructions cannot read from the constants file.
89 	 * see radeonTransformTEX()
90 	 */
91 	if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
92 			reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
93 				(inst->U.I.Opcode == RC_OPCODE_TEX ||
94 				inst->U.I.Opcode == RC_OPCODE_TXB ||
95 				inst->U.I.Opcode == RC_OPCODE_TXP ||
96 				inst->U.I.Opcode == RC_OPCODE_TXD ||
97 				inst->U.I.Opcode == RC_OPCODE_TXL ||
98 				inst->U.I.Opcode == RC_OPCODE_KIL)){
99 		reader_data->Abort = 1;
100 		return;
101 	}
102 }
103 
src_clobbered_reads_cb(void * data,struct rc_instruction * inst,struct rc_src_register * src)104 static void src_clobbered_reads_cb(
105 	void * data,
106 	struct rc_instruction * inst,
107 	struct rc_src_register * src)
108 {
109 	struct src_clobbered_reads_cb_data * sc_data = data;
110 
111 	if (src->File == sc_data->File
112 	    && src->Index == sc_data->Index
113 	    && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
114 
115 		sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
116 	}
117 
118 	if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
119 		sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
120 	}
121 }
122 
is_src_clobbered_scan_write(void * data,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)123 static void is_src_clobbered_scan_write(
124 	void * data,
125 	struct rc_instruction * inst,
126 	rc_register_file file,
127 	unsigned int index,
128 	unsigned int mask)
129 {
130 	struct src_clobbered_reads_cb_data sc_data;
131 	struct rc_reader_data * reader_data = data;
132 	sc_data.File = file;
133 	sc_data.Index = index;
134 	sc_data.Mask = mask;
135 	sc_data.ReaderData = reader_data;
136 	rc_for_all_reads_src(reader_data->Writer,
137 					src_clobbered_reads_cb, &sc_data);
138 }
139 
copy_propagate(struct radeon_compiler * c,struct rc_instruction * inst_mov)140 static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
141 {
142 	struct rc_reader_data reader_data;
143 	unsigned int i;
144 
145 	if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
146 	    inst_mov->U.I.WriteALUResult)
147 		return;
148 
149 	/* Get a list of all the readers of this MOV instruction. */
150 	reader_data.ExitOnAbort = 1;
151 	rc_get_readers(c, inst_mov, &reader_data,
152 		       copy_propagate_scan_read, NULL,
153 		       is_src_clobbered_scan_write);
154 
155 	if (reader_data.Abort || reader_data.ReaderCount == 0)
156 		return;
157 
158 	/* We can propagate SaturateMode if all the readers are MOV instructions
159 	 * without a presubtract operation, source negation and absolute.
160 	 * In that case, we just move SaturateMode to all readers. */
161         if (inst_mov->U.I.SaturateMode) {
162 		for (i = 0; i < reader_data.ReaderCount; i++) {
163 			struct rc_instruction * inst = reader_data.Readers[i].Inst;
164 
165 			if (inst->U.I.Opcode != RC_OPCODE_MOV ||
166 			    inst->U.I.SrcReg[0].File == RC_FILE_PRESUB ||
167 			    inst->U.I.SrcReg[0].Abs ||
168 			    inst->U.I.SrcReg[0].Negate) {
169 				return;
170 			}
171 		}
172 	}
173 
174 	/* Propagate the MOV instruction. */
175 	for (i = 0; i < reader_data.ReaderCount; i++) {
176 		struct rc_instruction * inst = reader_data.Readers[i].Inst;
177 		*reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);
178 
179 		if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
180 			inst->U.I.PreSub = inst_mov->U.I.PreSub;
181 		if (!inst->U.I.SaturateMode)
182 			inst->U.I.SaturateMode = inst_mov->U.I.SaturateMode;
183 	}
184 
185 	/* Finally, remove the original MOV instruction */
186 	rc_remove_instruction(inst_mov);
187 }
188 
189 /**
190  * Check if a source register is actually always the same
191  * swizzle constant.
192  */
is_src_uniform_constant(struct rc_src_register src,rc_swizzle * pswz,unsigned int * pnegate)193 static int is_src_uniform_constant(struct rc_src_register src,
194 		rc_swizzle * pswz, unsigned int * pnegate)
195 {
196 	int have_used = 0;
197 
198 	if (src.File != RC_FILE_NONE) {
199 		*pswz = 0;
200 		return 0;
201 	}
202 
203 	for(unsigned int chan = 0; chan < 4; ++chan) {
204 		unsigned int swz = GET_SWZ(src.Swizzle, chan);
205 		if (swz < 4) {
206 			*pswz = 0;
207 			return 0;
208 		}
209 		if (swz == RC_SWIZZLE_UNUSED)
210 			continue;
211 
212 		if (!have_used) {
213 			*pswz = swz;
214 			*pnegate = GET_BIT(src.Negate, chan);
215 			have_used = 1;
216 		} else {
217 			if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
218 				*pswz = 0;
219 				return 0;
220 			}
221 		}
222 	}
223 
224 	return 1;
225 }
226 
constant_folding_mad(struct rc_instruction * inst)227 static void constant_folding_mad(struct rc_instruction * inst)
228 {
229 	rc_swizzle swz = 0;
230 	unsigned int negate= 0;
231 
232 	if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
233 		if (swz == RC_SWIZZLE_ZERO) {
234 			inst->U.I.Opcode = RC_OPCODE_MUL;
235 			return;
236 		}
237 	}
238 
239 	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
240 		if (swz == RC_SWIZZLE_ONE) {
241 			inst->U.I.Opcode = RC_OPCODE_ADD;
242 			if (negate)
243 				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
244 			inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
245 			return;
246 		} else if (swz == RC_SWIZZLE_ZERO) {
247 			inst->U.I.Opcode = RC_OPCODE_MOV;
248 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
249 			return;
250 		}
251 	}
252 
253 	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
254 		if (swz == RC_SWIZZLE_ONE) {
255 			inst->U.I.Opcode = RC_OPCODE_ADD;
256 			if (negate)
257 				inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
258 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
259 			return;
260 		} else if (swz == RC_SWIZZLE_ZERO) {
261 			inst->U.I.Opcode = RC_OPCODE_MOV;
262 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
263 			return;
264 		}
265 	}
266 }
267 
constant_folding_mul(struct rc_instruction * inst)268 static void constant_folding_mul(struct rc_instruction * inst)
269 {
270 	rc_swizzle swz = 0;
271 	unsigned int negate = 0;
272 
273 	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
274 		if (swz == RC_SWIZZLE_ONE) {
275 			inst->U.I.Opcode = RC_OPCODE_MOV;
276 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
277 			if (negate)
278 				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
279 			return;
280 		} else if (swz == RC_SWIZZLE_ZERO) {
281 			inst->U.I.Opcode = RC_OPCODE_MOV;
282 			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
283 			return;
284 		}
285 	}
286 
287 	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
288 		if (swz == RC_SWIZZLE_ONE) {
289 			inst->U.I.Opcode = RC_OPCODE_MOV;
290 			if (negate)
291 				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
292 			return;
293 		} else if (swz == RC_SWIZZLE_ZERO) {
294 			inst->U.I.Opcode = RC_OPCODE_MOV;
295 			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
296 			return;
297 		}
298 	}
299 }
300 
constant_folding_add(struct rc_instruction * inst)301 static void constant_folding_add(struct rc_instruction * inst)
302 {
303 	rc_swizzle swz = 0;
304 	unsigned int negate = 0;
305 
306 	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
307 		if (swz == RC_SWIZZLE_ZERO) {
308 			inst->U.I.Opcode = RC_OPCODE_MOV;
309 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
310 			return;
311 		}
312 	}
313 
314 	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
315 		if (swz == RC_SWIZZLE_ZERO) {
316 			inst->U.I.Opcode = RC_OPCODE_MOV;
317 			return;
318 		}
319 	}
320 }
321 
322 /**
323  * Replace 0.0, 1.0 and 0.5 immediate constants by their
324  * respective swizzles. Simplify instructions like ADD dst, src, 0;
325  */
constant_folding(struct radeon_compiler * c,struct rc_instruction * inst)326 static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
327 {
328 	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
329 	unsigned int i;
330 
331 	/* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
332 	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
333 		struct rc_constant * constant;
334 		struct rc_src_register newsrc;
335 		int have_real_reference;
336 		unsigned int chan;
337 
338 		/* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
339 		for (chan = 0; chan < 4; ++chan)
340 			if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
341 				break;
342 		if (chan == 4) {
343 			inst->U.I.SrcReg[src].File = RC_FILE_NONE;
344 			continue;
345 		}
346 
347 		/* Convert immediates to swizzles. */
348 		if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
349 		    inst->U.I.SrcReg[src].RelAddr ||
350 		    inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
351 			continue;
352 
353 		constant =
354 			&c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
355 
356 		if (constant->Type != RC_CONSTANT_IMMEDIATE)
357 			continue;
358 
359 		newsrc = inst->U.I.SrcReg[src];
360 		have_real_reference = 0;
361 		for (chan = 0; chan < 4; ++chan) {
362 			unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
363 			unsigned int newswz;
364 			float imm;
365 			float baseimm;
366 
367 			if (swz >= 4)
368 				continue;
369 
370 			imm = constant->u.Immediate[swz];
371 			baseimm = imm;
372 			if (imm < 0.0)
373 				baseimm = -baseimm;
374 
375 			if (baseimm == 0.0) {
376 				newswz = RC_SWIZZLE_ZERO;
377 			} else if (baseimm == 1.0) {
378 				newswz = RC_SWIZZLE_ONE;
379 			} else if (baseimm == 0.5 && c->has_half_swizzles) {
380 				newswz = RC_SWIZZLE_HALF;
381 			} else {
382 				have_real_reference = 1;
383 				continue;
384 			}
385 
386 			SET_SWZ(newsrc.Swizzle, chan, newswz);
387 			if (imm < 0.0 && !newsrc.Abs)
388 				newsrc.Negate ^= 1 << chan;
389 		}
390 
391 		if (!have_real_reference) {
392 			newsrc.File = RC_FILE_NONE;
393 			newsrc.Index = 0;
394 		}
395 
396 		/* don't make the swizzle worse */
397 		if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
398 		    c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
399 			continue;
400 
401 		inst->U.I.SrcReg[src] = newsrc;
402 	}
403 
404 	/* Simplify instructions based on constants */
405 	if (inst->U.I.Opcode == RC_OPCODE_MAD)
406 		constant_folding_mad(inst);
407 
408 	/* note: MAD can simplify to MUL or ADD */
409 	if (inst->U.I.Opcode == RC_OPCODE_MUL)
410 		constant_folding_mul(inst);
411 	else if (inst->U.I.Opcode == RC_OPCODE_ADD)
412 		constant_folding_add(inst);
413 
414 	/* In case this instruction has been converted, make sure all of the
415 	 * registers that are no longer used are empty. */
416 	opcode = rc_get_opcode_info(inst->U.I.Opcode);
417 	for(i = opcode->NumSrcRegs; i < 3; i++) {
418 		memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
419 	}
420 }
421 
422 /**
423  * If src and dst use the same register, this function returns a writemask that
424  * indicates wich components are read by src.  Otherwise zero is returned.
425  */
src_reads_dst_mask(struct rc_src_register src,struct rc_dst_register dst)426 static unsigned int src_reads_dst_mask(struct rc_src_register src,
427 						struct rc_dst_register dst)
428 {
429 	if (dst.File != src.File || dst.Index != src.Index) {
430 		return 0;
431 	}
432 	return rc_swizzle_to_writemask(src.Swizzle);
433 }
434 
435 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
436  * in any of its channels.  Return 0 otherwise. */
src_has_const_swz(struct rc_src_register src)437 static int src_has_const_swz(struct rc_src_register src) {
438 	int chan;
439 	for(chan = 0; chan < 4; chan++) {
440 		unsigned int swz = GET_SWZ(src.Swizzle, chan);
441 		if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
442 						|| swz == RC_SWIZZLE_ONE) {
443 			return 1;
444 		}
445 	}
446 	return 0;
447 }
448 
presub_scan_read(void * data,struct rc_instruction * inst,struct rc_src_register * src)449 static void presub_scan_read(
450 	void * data,
451 	struct rc_instruction * inst,
452 	struct rc_src_register * src)
453 {
454 	struct rc_reader_data * reader_data = data;
455 	rc_presubtract_op * presub_opcode = reader_data->CbData;
456 
457 	if (!rc_inst_can_use_presub(inst, *presub_opcode,
458 			reader_data->Writer->U.I.DstReg.WriteMask,
459 			src,
460 			&reader_data->Writer->U.I.SrcReg[0],
461 			&reader_data->Writer->U.I.SrcReg[1])) {
462 		reader_data->Abort = 1;
463 		return;
464 	}
465 }
466 
presub_helper(struct radeon_compiler * c,struct rc_instruction * inst_add,rc_presubtract_op presub_opcode,rc_presub_replace_fn presub_replace)467 static int presub_helper(
468 	struct radeon_compiler * c,
469 	struct rc_instruction * inst_add,
470 	rc_presubtract_op presub_opcode,
471 	rc_presub_replace_fn presub_replace)
472 {
473 	struct rc_reader_data reader_data;
474 	unsigned int i;
475 	rc_presubtract_op cb_op = presub_opcode;
476 
477 	reader_data.CbData = &cb_op;
478 	reader_data.ExitOnAbort = 1;
479 	rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
480 						is_src_clobbered_scan_write);
481 
482 	if (reader_data.Abort || reader_data.ReaderCount == 0)
483 		return 0;
484 
485 	for(i = 0; i < reader_data.ReaderCount; i++) {
486 		unsigned int src_index;
487 		struct rc_reader reader = reader_data.Readers[i];
488 		const struct rc_opcode_info * info =
489 				rc_get_opcode_info(reader.Inst->U.I.Opcode);
490 
491 		for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
492 			if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)
493 				presub_replace(inst_add, reader.Inst, src_index);
494 		}
495 	}
496 	return 1;
497 }
498 
499 /* This function assumes that inst_add->U.I.SrcReg[0] and
500  * inst_add->U.I.SrcReg[1] aren't both negative. */
presub_replace_add(struct rc_instruction * inst_add,struct rc_instruction * inst_reader,unsigned int src_index)501 static void presub_replace_add(
502 	struct rc_instruction * inst_add,
503 	struct rc_instruction * inst_reader,
504 	unsigned int src_index)
505 {
506 	rc_presubtract_op presub_opcode;
507 	if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
508 		presub_opcode = RC_PRESUB_SUB;
509 	else
510 		presub_opcode = RC_PRESUB_ADD;
511 
512 	if (inst_add->U.I.SrcReg[1].Negate) {
513 		inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
514 		inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
515 	} else {
516 		inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
517 		inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
518 	}
519 	inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
520 	inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
521 	inst_reader->U.I.PreSub.Opcode = presub_opcode;
522 	inst_reader->U.I.SrcReg[src_index] =
523 			chain_srcregs(inst_reader->U.I.SrcReg[src_index],
524 					inst_reader->U.I.PreSub.SrcReg[0]);
525 	inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
526 	inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
527 }
528 
is_presub_candidate(struct radeon_compiler * c,struct rc_instruction * inst)529 static int is_presub_candidate(
530 	struct radeon_compiler * c,
531 	struct rc_instruction * inst)
532 {
533 	const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
534 	unsigned int i;
535 	unsigned int is_constant[2] = {0, 0};
536 
537 	assert(inst->U.I.Opcode == RC_OPCODE_ADD);
538 
539 	if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
540 			|| inst->U.I.SaturateMode
541 			|| inst->U.I.WriteALUResult
542 			|| inst->U.I.Omod) {
543 		return 0;
544 	}
545 
546 	/* If both sources use a constant swizzle, then we can't convert it to
547 	 * a presubtract operation.  In fact for the ADD and SUB presubtract
548 	 * operations neither source can contain a constant swizzle.  This
549 	 * specific case is checked in peephole_add_presub_add() when
550 	 * we make sure the swizzles for both sources are equal, so we
551 	 * don't need to worry about it here. */
552 	for (i = 0; i < 2; i++) {
553 		int chan;
554 		for (chan = 0; chan < 4; chan++) {
555 			rc_swizzle swz =
556 				get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
557 			if (swz == RC_SWIZZLE_ONE
558 					|| swz == RC_SWIZZLE_ZERO
559 					|| swz == RC_SWIZZLE_HALF) {
560 				is_constant[i] = 1;
561 			}
562 		}
563 	}
564 	if (is_constant[0] && is_constant[1])
565 		return 0;
566 
567 	for(i = 0; i < info->NumSrcRegs; i++) {
568 		struct rc_src_register src = inst->U.I.SrcReg[i];
569 		if (src_reads_dst_mask(src, inst->U.I.DstReg))
570 			return 0;
571 
572 		src.File = RC_FILE_PRESUB;
573 		if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
574 			return 0;
575 	}
576 	return 1;
577 }
578 
peephole_add_presub_add(struct radeon_compiler * c,struct rc_instruction * inst_add)579 static int peephole_add_presub_add(
580 	struct radeon_compiler * c,
581 	struct rc_instruction * inst_add)
582 {
583 	unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
584         unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
585         unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
586 
587 	if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
588 		return 0;
589 
590 	/* src0 and src1 can't have absolute values */
591 	if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
592 	        return 0;
593 
594 	/* presub_replace_add() assumes only one is negative */
595 	if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
596 	        return 0;
597 
598         /* if src0 is negative, at least all bits of dstmask have to be set */
599         if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
600 	        return 0;
601 
602         /* if src1 is negative, at least all bits of dstmask have to be set */
603         if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
604 	        return 0;
605 
606 	if (!is_presub_candidate(c, inst_add))
607 		return 0;
608 
609 	if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
610 		rc_remove_instruction(inst_add);
611 		return 1;
612 	}
613 	return 0;
614 }
615 
presub_replace_inv(struct rc_instruction * inst_add,struct rc_instruction * inst_reader,unsigned int src_index)616 static void presub_replace_inv(
617 	struct rc_instruction * inst_add,
618 	struct rc_instruction * inst_reader,
619 	unsigned int src_index)
620 {
621 	/* We must be careful not to modify inst_add, since it
622 	 * is possible it will remain part of the program.*/
623 	inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
624 	inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
625 	inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
626 	inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
627 						inst_reader->U.I.PreSub.SrcReg[0]);
628 
629 	inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
630 	inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
631 }
632 
633 /**
634  * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
635  * Use the presubtract 1 - src0 for all readers of TEMP[0].  The first source
636  * of the add instruction must have the constatnt 1 swizzle.  This function
637  * does not check const registers to see if their value is 1.0, so it should
638  * be called after the constant_folding optimization.
639  * @return
640  * 	0 if the ADD instruction is still part of the program.
641  * 	1 if the ADD instruction is no longer part of the program.
642  */
peephole_add_presub_inv(struct radeon_compiler * c,struct rc_instruction * inst_add)643 static int peephole_add_presub_inv(
644 	struct radeon_compiler * c,
645 	struct rc_instruction * inst_add)
646 {
647 	unsigned int i, swz;
648 
649 	if (!is_presub_candidate(c, inst_add))
650 		return 0;
651 
652 	/* Check if src0 is 1. */
653 	/* XXX It would be nice to use is_src_uniform_constant here, but that
654 	 * function only works if the register's file is RC_FILE_NONE */
655 	for(i = 0; i < 4; i++ ) {
656 		swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
657 		if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
658 						&& swz != RC_SWIZZLE_ONE) {
659 			return 0;
660 		}
661 	}
662 
663 	/* Check src1. */
664 	if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
665 						inst_add->U.I.DstReg.WriteMask
666 		|| inst_add->U.I.SrcReg[1].Abs
667 		|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
668 			&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
669 		|| src_has_const_swz(inst_add->U.I.SrcReg[1])) {
670 
671 		return 0;
672 	}
673 
674 	if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
675 		rc_remove_instruction(inst_add);
676 		return 1;
677 	}
678 	return 0;
679 }
680 
681 struct peephole_mul_cb_data {
682 	struct rc_dst_register * Writer;
683 	unsigned int Clobbered;
684 };
685 
omod_filter_reader_cb(void * userdata,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)686 static void omod_filter_reader_cb(
687 	void * userdata,
688 	struct rc_instruction * inst,
689 	rc_register_file file,
690 	unsigned int index,
691 	unsigned int mask)
692 {
693 	struct peephole_mul_cb_data * d = userdata;
694 	if (rc_src_reads_dst_mask(file, mask, index,
695 		d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) {
696 
697 		d->Clobbered = 1;
698 	}
699 }
700 
omod_filter_writer_cb(void * userdata,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)701 static void omod_filter_writer_cb(
702 	void * userdata,
703 	struct rc_instruction * inst,
704 	rc_register_file file,
705 	unsigned int index,
706 	unsigned int mask)
707 {
708 	struct peephole_mul_cb_data * d = userdata;
709 	if (file == d->Writer->File && index == d->Writer->Index &&
710 					(mask & d->Writer->WriteMask)) {
711 		d->Clobbered = 1;
712 	}
713 }
714 
peephole_mul_omod(struct radeon_compiler * c,struct rc_instruction * inst_mul,struct rc_list * var_list)715 static int peephole_mul_omod(
716 	struct radeon_compiler * c,
717 	struct rc_instruction * inst_mul,
718 	struct rc_list * var_list)
719 {
720 	unsigned int chan = 0, swz, i;
721 	int const_index = -1;
722 	int temp_index = -1;
723 	float const_value;
724 	rc_omod_op omod_op = RC_OMOD_DISABLE;
725 	struct rc_list * writer_list;
726 	struct rc_variable * var;
727 	struct peephole_mul_cb_data cb_data;
728 	unsigned writemask_sum;
729 
730 	for (i = 0; i < 2; i++) {
731 		unsigned int j;
732 		if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT
733 			&& inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) {
734 			return 0;
735 		}
736 		if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
737 			if (temp_index != -1) {
738 				/* The instruction has two temp sources */
739 				return 0;
740 			} else {
741 				temp_index = i;
742 				continue;
743 			}
744 		}
745 		/* If we get this far Src[i] must be a constant src */
746 		if (inst_mul->U.I.SrcReg[i].Negate) {
747 			return 0;
748 		}
749 		/* The constant src needs to read from the same swizzle */
750 		swz = RC_SWIZZLE_UNUSED;
751 		chan = 0;
752 		for (j = 0; j < 4; j++) {
753 			unsigned int j_swz =
754 				GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j);
755 			if (j_swz == RC_SWIZZLE_UNUSED) {
756 				continue;
757 			}
758 			if (swz == RC_SWIZZLE_UNUSED) {
759 				swz = j_swz;
760 				chan = j;
761 			} else if (j_swz != swz) {
762 				return 0;
763 			}
764 		}
765 
766 		if (const_index != -1) {
767 			/* The instruction has two constant sources */
768 			return 0;
769 		} else {
770 			const_index = i;
771 		}
772 	}
773 
774 	if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File,
775 				inst_mul->U.I.SrcReg[const_index].Index)) {
776 		return 0;
777 	}
778 	const_value = rc_get_constant_value(c,
779 			inst_mul->U.I.SrcReg[const_index].Index,
780 			inst_mul->U.I.SrcReg[const_index].Swizzle,
781 			inst_mul->U.I.SrcReg[const_index].Negate,
782 			chan);
783 
784 	if (const_value == 2.0f) {
785 		omod_op = RC_OMOD_MUL_2;
786 	} else if (const_value == 4.0f) {
787 		omod_op = RC_OMOD_MUL_4;
788 	} else if (const_value == 8.0f) {
789 		omod_op = RC_OMOD_MUL_8;
790 	} else if (const_value == (1.0f / 2.0f)) {
791 		omod_op = RC_OMOD_DIV_2;
792 	} else if (const_value == (1.0f / 4.0f)) {
793 		omod_op = RC_OMOD_DIV_4;
794 	} else if (const_value == (1.0f / 8.0f)) {
795 		omod_op = RC_OMOD_DIV_8;
796 	} else {
797 		return 0;
798 	}
799 
800 	writer_list = rc_variable_list_get_writers_one_reader(var_list,
801 		RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]);
802 
803 	if (!writer_list) {
804 		return 0;
805 	}
806 
807 	cb_data.Clobbered = 0;
808 	cb_data.Writer = &inst_mul->U.I.DstReg;
809 	for (var = writer_list->Item; var; var = var->Friend) {
810 		struct rc_instruction * inst;
811 		const struct rc_opcode_info * info = rc_get_opcode_info(
812 				var->Inst->U.I.Opcode);
813 		if (info->HasTexture) {
814 			return 0;
815 		}
816 		if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) {
817 			return 0;
818 		}
819 		for (inst = inst_mul->Prev; inst != var->Inst;
820 							inst = inst->Prev) {
821 			rc_for_all_reads_mask(inst, omod_filter_reader_cb,
822 								&cb_data);
823 			rc_for_all_writes_mask(inst, omod_filter_writer_cb,
824 								&cb_data);
825 			if (cb_data.Clobbered) {
826 				break;
827 			}
828 		}
829 	}
830 
831 	if (cb_data.Clobbered) {
832 		return 0;
833 	}
834 
835 	/* Rewrite the instructions */
836 	writemask_sum = rc_variable_writemask_sum(writer_list->Item);
837 	for (var = writer_list->Item; var; var = var->Friend) {
838 		struct rc_variable * writer = var;
839 		unsigned conversion_swizzle = rc_make_conversion_swizzle(
840 					writemask_sum,
841 					inst_mul->U.I.DstReg.WriteMask);
842 		writer->Inst->U.I.Omod = omod_op;
843 		writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File;
844 		writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index;
845 		rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle);
846 		writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode;
847 	}
848 
849 	rc_remove_instruction(inst_mul);
850 
851 	return 1;
852 }
853 
854 /**
855  * @return
856  * 	0 if inst is still part of the program.
857  * 	1 if inst is no longer part of the program.
858  */
peephole(struct radeon_compiler * c,struct rc_instruction * inst)859 static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
860 {
861 	switch(inst->U.I.Opcode){
862 	case RC_OPCODE_ADD:
863 		if (c->has_presub) {
864 			if(peephole_add_presub_inv(c, inst))
865 				return 1;
866 			if(peephole_add_presub_add(c, inst))
867 				return 1;
868 		}
869 		break;
870 	default:
871 		break;
872 	}
873 	return 0;
874 }
875 
rc_optimize(struct radeon_compiler * c,void * user)876 void rc_optimize(struct radeon_compiler * c, void *user)
877 {
878 	struct rc_instruction * inst = c->Program.Instructions.Next;
879 	struct rc_list * var_list;
880 	while(inst != &c->Program.Instructions) {
881 		struct rc_instruction * cur = inst;
882 		inst = inst->Next;
883 
884 		constant_folding(c, cur);
885 
886 		if(peephole(c, cur))
887 			continue;
888 
889 		if (cur->U.I.Opcode == RC_OPCODE_MOV) {
890 			copy_propagate(c, cur);
891 			/* cur may no longer be part of the program */
892 		}
893 	}
894 
895 	if (!c->has_omod) {
896 		return;
897 	}
898 
899 	inst = c->Program.Instructions.Next;
900 	while(inst != &c->Program.Instructions) {
901 		struct rc_instruction * cur = inst;
902 		inst = inst->Next;
903 		if (cur->U.I.Opcode == RC_OPCODE_MUL) {
904 			var_list = rc_get_variables(c);
905 			peephole_mul_omod(c, cur, var_list);
906 		}
907 	}
908 }
909