1 /*
2  * Copyright (C) 2009 Nicolai Haehnle.
3  * Copyright 2012 Advanced Micro Devices, Inc.
4  *
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining
8  * a copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sublicense, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial
17  * portions of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  * Authors:
28  * Nicolai Haehnle
29  * Tom Stellard <thomas.stellard@amd.com>
30  */
31 
32 #include "radeon_dataflow.h"
33 
34 #include "radeon_code.h"
35 #include "radeon_compiler.h"
36 #include "radeon_compiler_util.h"
37 #include "radeon_swizzle.h"
38 
39 
rewrite_source(struct radeon_compiler * c,struct rc_instruction * inst,unsigned src)40 static void rewrite_source(struct radeon_compiler * c,
41 		struct rc_instruction * inst, unsigned src)
42 {
43 	struct rc_swizzle_split split;
44 	unsigned int tempreg = rc_find_free_temporary(c);
45 	unsigned int usemask;
46 
47 	usemask = 0;
48 	for(unsigned int chan = 0; chan < 4; ++chan) {
49 		if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED)
50 			usemask |= 1 << chan;
51 	}
52 
53 	c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split);
54 
55 	for(unsigned int phase = 0; phase < split.NumPhases; ++phase) {
56 		struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev);
57 		unsigned int phase_refmask;
58 		unsigned int masked_negate;
59 
60 		mov->U.I.Opcode = RC_OPCODE_MOV;
61 		mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
62 		mov->U.I.DstReg.Index = tempreg;
63 		mov->U.I.DstReg.WriteMask = split.Phase[phase];
64 		mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src];
65 		mov->U.I.PreSub = inst->U.I.PreSub;
66 
67 		phase_refmask = 0;
68 		for(unsigned int chan = 0; chan < 4; ++chan) {
69 			if (!GET_BIT(split.Phase[phase], chan))
70 				SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED);
71 			else
72 				phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan);
73 		}
74 
75 		phase_refmask &= RC_MASK_XYZW;
76 
77 		masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate;
78 		if (masked_negate == 0)
79 			mov->U.I.SrcReg[0].Negate = 0;
80 		else if (masked_negate == split.Phase[phase])
81 			mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
82 
83 	}
84 
85 	inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY;
86 	inst->U.I.SrcReg[src].Index = tempreg;
87 	inst->U.I.SrcReg[src].Swizzle = 0;
88 	inst->U.I.SrcReg[src].Negate = RC_MASK_NONE;
89 	inst->U.I.SrcReg[src].Abs = 0;
90 	for(unsigned int chan = 0; chan < 4; ++chan) {
91 		SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan,
92 				GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED);
93 	}
94 }
95 
96 /**
97  * This function will attempt to rewrite non-native swizzles that read from
98  * immediate registers by rearranging the immediates to allow the
99  * instruction to use native swizzles.
100  */
try_rewrite_constant(struct radeon_compiler * c,struct rc_src_register * reg)101 static unsigned try_rewrite_constant(struct radeon_compiler *c,
102 					struct rc_src_register *reg)
103 {
104 	unsigned new_swizzle, chan, swz0, swz1, swz2, swz3, found_swizzle, swz;
105 	unsigned all_inline = 0;
106 	float imms[4] = {0.0f, 0.0f, 0.0f, 0.0f};
107 
108 	if (!rc_src_reg_is_immediate(c, reg->File, reg->Index)) {
109 		/* The register does not contain immediates, but if all
110 		 * the swizzles are inline constants, we can still rewrite
111 		 * it. */
112 
113 		new_swizzle = RC_SWIZZLE_XYZW;
114 		for (chan = 0 ; chan < 4; chan++) {
115 			unsigned swz = GET_SWZ(reg->Swizzle, chan);
116 			if (swz <= RC_SWIZZLE_W) {
117 				return 0;
118 			}
119 			if (swz == RC_SWIZZLE_UNUSED) {
120 				SET_SWZ(new_swizzle, chan, RC_SWIZZLE_UNUSED);
121 			}
122 		}
123 		all_inline = 1;
124 	} else {
125 		new_swizzle = reg->Swizzle;
126 	}
127 
128 	swz = RC_SWIZZLE_UNUSED;
129 	found_swizzle = 1;
130 	/* Check if all channels have the same swizzle.  If they do we can skip
131 	 * the search for a native swizzle.  We only need to check the first
132 	 * three channels, because any swizzle is legal in the fourth channel.
133 	 */
134 	for (chan = 0; chan < 3; chan++) {
135 		unsigned chan_swz = GET_SWZ(reg->Swizzle, chan);
136 		if (chan_swz == RC_SWIZZLE_UNUSED) {
137 			continue;
138 		}
139 		if (swz == RC_SWIZZLE_UNUSED) {
140 			swz = chan_swz;
141 		} else if (swz != chan_swz) {
142 			found_swizzle = 0;
143 			break;
144 		}
145 	}
146 
147 	/* Find a legal swizzle */
148 
149 	/* This loop attempts to find a native swizzle where all the
150 	 * channels are different. */
151 	while (!found_swizzle && !all_inline) {
152 		swz0 = GET_SWZ(new_swizzle, 0);
153 		swz1 = GET_SWZ(new_swizzle, 1);
154 		swz2 = GET_SWZ(new_swizzle, 2);
155 
156 		/* Swizzle .W. is never legal. */
157 		if (swz1 == RC_SWIZZLE_W ||
158 			swz1 == RC_SWIZZLE_UNUSED ||
159 			swz1 == RC_SWIZZLE_ZERO ||
160 			swz1 == RC_SWIZZLE_HALF ||
161 			swz1 == RC_SWIZZLE_ONE) {
162 			/* We chose Z, because there are two non-repeating
163 			 * swizzle combinations of the form .Z. There are
164 			 * only one combination each for .X. and .Y. */
165 			SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
166 			continue;
167 		}
168 
169 		if (swz2 == RC_SWIZZLE_UNUSED) {
170 			/* We choose Y, because there are two non-repeating
171 			 * swizzle combinations of the form ..Y */
172 			SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
173 			continue;
174 		}
175 
176 		switch (swz0) {
177 		/* X.. */
178 		case RC_SWIZZLE_X:
179 			/* Legal swizzles that start with X: XYZ, XXX */
180 			switch (swz1) {
181 			/* XX. */
182 			case RC_SWIZZLE_X:
183 				/*  The new swizzle will be:
184 				 *  ZXY (XX. => ZX. => ZXY) */
185 				SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z);
186 				break;
187 			/* XY. */
188 			case RC_SWIZZLE_Y:
189 				/* The new swizzle is XYZ */
190 				SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Z);
191 				found_swizzle = 1;
192 				break;
193 			/* XZ. */
194 			case RC_SWIZZLE_Z:
195 				/* XZZ */
196 				if (swz2 == RC_SWIZZLE_Z) {
197 					/* The new swizzle is XYZ */
198 					SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Y);
199 					found_swizzle = 1;
200 				} else { /* XZ[^Z] */
201 					/* The new swizzle will be:
202 					 * YZX (XZ. => YZ. => YZX) */
203 					SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Y);
204 				}
205 				break;
206 			/* XW. Should have already been handled. */
207 			case RC_SWIZZLE_W:
208 				assert(0);
209 				break;
210 			}
211 			break;
212 		/* Y.. */
213 		case RC_SWIZZLE_Y:
214 			/* Legal swizzles that start with Y: YYY, YZX */
215 			switch (swz1) {
216 			/* YY. */
217 			case RC_SWIZZLE_Y:
218 				/* The new swizzle will be:
219 				 * XYZ (YY. => XY. => XYZ) */
220 				SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
221 				break;
222 			/* YZ. */
223 			case RC_SWIZZLE_Z:
224 				/* The new swizzle is YZX */
225 				SET_SWZ(new_swizzle, 2, RC_SWIZZLE_X);
226 				found_swizzle = 1;
227 				break;
228 			/* YX. */
229 			case RC_SWIZZLE_X:
230 				/* YXX */
231 				if (swz2 == RC_SWIZZLE_X) {
232 					/*The new swizzle is YZX */
233 					SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
234 					found_swizzle = 1;
235 				} else { /* YX[^X] */
236 					/* The new swizzle will be:
237 					 * ZXY (YX. => ZX. -> ZXY) */
238 					SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z);
239 				}
240 				break;
241 			/* YW. Should have already been handled. */
242 			case RC_SWIZZLE_W:
243 				assert(0);
244 				break;
245 			}
246 			break;
247 		/* Z.. */
248 		case RC_SWIZZLE_Z:
249 			/* Legal swizzles that start with Z: ZZZ, ZXY */
250 			switch (swz1) {
251 			/* ZZ. */
252 			case RC_SWIZZLE_Z:
253 				/* The new swizzle will be:
254 				 * WZY (ZZ. => WZ. => WZY) */
255 				SET_SWZ(new_swizzle, 0, RC_SWIZZLE_W);
256 				break;
257 			/* ZX. */
258 			case RC_SWIZZLE_X:
259 				/* The new swizzle is ZXY */
260 				SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
261 				found_swizzle = 1;
262 				break;
263 			/* ZY. */
264 			case RC_SWIZZLE_Y:
265 				/* ZYY */
266 				if (swz2 == RC_SWIZZLE_Y) {
267 					/* The new swizzle is ZXY */
268 					SET_SWZ(new_swizzle, 1, RC_SWIZZLE_X);
269 					found_swizzle = 1;
270 				} else { /* ZY[^Y] */
271 					/* The new swizzle will be:
272 					 * XYZ (ZY. => XY. => XYZ) */
273 					SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
274 				}
275 				break;
276 			/* ZW. Should have already been handled. */
277 			case RC_SWIZZLE_W:
278 				assert(0);
279 				break;
280 			}
281 			break;
282 
283 		/* W.. */
284 		case RC_SWIZZLE_W:
285 			/* Legal swizzles that start with X: WWW, WZY */
286 			switch (swz1) {
287 			/* WW. Should have already been handled. */
288 			case RC_SWIZZLE_W:
289 				assert(0);
290 				break;
291 			/* WZ. */
292 			case RC_SWIZZLE_Z:
293 				/* The new swizzle will be WZY */
294 				SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
295 				found_swizzle = 1;
296 				break;
297 			/* WX. */
298 			case RC_SWIZZLE_X:
299 			/* WY. */
300 			case RC_SWIZZLE_Y:
301 				/* W[XY]Y */
302 				if (swz2 == RC_SWIZZLE_Y) {
303 					/* The new swizzle will be WZY */
304 					SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
305 					found_swizzle = 1;
306 				} else { /* W[XY][^Y] */
307 					/* The new swizzle will be:
308 					 * ZXY (WX. => XX. => ZX. => ZXY) or
309 					 * XYZ (WY. => XY. => XYZ)
310 					 */
311 					SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
312 				}
313 				break;
314 			}
315 			break;
316 		/* U.. 0.. 1.. H..*/
317 		case RC_SWIZZLE_UNUSED:
318 		case RC_SWIZZLE_ZERO:
319 		case RC_SWIZZLE_ONE:
320 		case RC_SWIZZLE_HALF:
321 			SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
322 			break;
323 		}
324 	}
325 
326 	/* Handle the swizzle in the w channel. */
327 	swz3 = GET_SWZ(reg->Swizzle, 3);
328 
329 	/* We can skip this if the swizzle in channel w is an inline constant. */
330 	if (swz3 <= RC_SWIZZLE_W) {
331 		for (chan = 0; chan < 3; chan++) {
332 			unsigned old_swz = GET_SWZ(reg->Swizzle, chan);
333 			unsigned new_swz = GET_SWZ(new_swizzle, chan);
334 			/* If the swizzle in the w channel is the same as the
335 			 * swizzle in any other channels, we need to rewrite it.
336 			 * For example:
337 			 * reg->Swizzle == XWZW
338 			 * new_swizzle  == XYZX
339 			 * Since the swizzle in the y channel is being
340 			 * rewritten from W -> Y we need to change the swizzle
341 			 * in the w channel from W -> Y as well.
342 			 */
343 			if (old_swz == swz3) {
344 				SET_SWZ(new_swizzle, 3,
345 						GET_SWZ(new_swizzle, chan));
346 				break;
347 			}
348 
349 			/* The swizzle in channel w will be overwritten by one
350 			 * of the new swizzles. */
351 			if (new_swz == swz3) {
352 				/* Find an unused swizzle */
353 				unsigned i;
354 				unsigned used = 0;
355 				for (i = 0; i < 3; i++) {
356 					used |= 1 << GET_SWZ(new_swizzle, i);
357 				}
358 				for (i = 0; i < 4; i++) {
359 					if (used & (1 << i)) {
360 						continue;
361 					}
362 					SET_SWZ(new_swizzle, 3, i);
363 				}
364 			}
365 		}
366 	}
367 
368 	for (chan = 0; chan < 4; chan++) {
369 		unsigned old_swz = GET_SWZ(reg->Swizzle, chan);
370 		unsigned new_swz = GET_SWZ(new_swizzle, chan);
371 
372 		if (old_swz == RC_SWIZZLE_UNUSED) {
373 			continue;
374 		}
375 
376 		/* We don't need to change the swizzle in channel w if it is
377 		 * an inline constant.  These are always legal in the w channel.
378 		 *
379 		 * Swizzles with a value > RC_SWIZZLE_W are inline constants.
380 		 */
381 		if (chan == 3 && old_swz > RC_SWIZZLE_W) {
382 			continue;
383 		}
384 
385 		assert(new_swz <= RC_SWIZZLE_W);
386 
387 		switch (old_swz) {
388 		case RC_SWIZZLE_ZERO:
389 			imms[new_swz] = 0.0f;
390 			break;
391 		case RC_SWIZZLE_HALF:
392 			if (reg->Negate & (1 << chan)) {
393 				imms[new_swz] = -0.5f;
394 			} else {
395 				imms[new_swz] = 0.5f;
396 			}
397 			break;
398 		case RC_SWIZZLE_ONE:
399 			if (reg->Negate & (1 << chan)) {
400 				imms[new_swz] = -1.0f;
401 			} else {
402 				imms[new_swz] = 1.0f;
403 			}
404 			break;
405 		default:
406 			imms[new_swz] = rc_get_constant_value(c, reg->Index,
407 					reg->Swizzle, reg->Negate, chan);
408 		}
409 		SET_SWZ(reg->Swizzle, chan, new_swz);
410 	}
411 	reg->Index = rc_constants_add_immediate_vec4(&c->Program.Constants,
412 							imms);
413 	/* We need to set the register file to CONSTANT in case we are
414 	 * converting a non-constant register with constant swizzles (e.g.
415 	 * ONE, ZERO, HALF).
416 	 */
417 	reg->File = RC_FILE_CONSTANT;
418 	reg->Negate = 0;
419 	return 1;
420 }
421 
rc_dataflow_swizzles(struct radeon_compiler * c,void * user)422 void rc_dataflow_swizzles(struct radeon_compiler * c, void *user)
423 {
424 	struct rc_instruction * inst;
425 
426 	for(inst = c->Program.Instructions.Next;
427 					inst != &c->Program.Instructions;
428 					inst = inst->Next) {
429 		const struct rc_opcode_info * opcode =
430 					rc_get_opcode_info(inst->U.I.Opcode);
431 		unsigned int src;
432 
433 		for(src = 0; src < opcode->NumSrcRegs; ++src) {
434 			struct rc_src_register *reg = &inst->U.I.SrcReg[src];
435 			if (c->SwizzleCaps->IsNative(inst->U.I.Opcode, *reg)) {
436 				continue;
437 			}
438 			if (!c->is_r500 &&
439 			    c->Program.Constants.Count < R300_PFS_NUM_CONST_REGS &&
440 			    try_rewrite_constant(c, reg)) {
441 				continue;
442 			}
443 			rewrite_source(c, inst, src);
444 		}
445 	}
446 	if (c->Debug & RC_DBG_LOG)
447 		rc_constants_print(&c->Program.Constants);
448 }
449