1 /*
2  * Copyright (C) 2009 Nicolai Haehnle.
3  *
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining
7  * a copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sublicense, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial
16  * portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  */
27 
28 #include "radeon_program_pair.h"
29 
30 #include "radeon_compiler.h"
31 #include "radeon_compiler_util.h"
32 
33 
34 /**
35  * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
36  * and reverse the order of arguments for CMP.
37  */
final_rewrite(struct rc_sub_instruction * inst)38 static void final_rewrite(struct rc_sub_instruction *inst)
39 {
40 	struct rc_src_register tmp;
41 
42 	switch(inst->Opcode) {
43 	case RC_OPCODE_ADD:
44 		inst->SrcReg[2] = inst->SrcReg[1];
45 		inst->SrcReg[1].File = RC_FILE_NONE;
46 		inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
47 		inst->SrcReg[1].Negate = RC_MASK_NONE;
48 		inst->Opcode = RC_OPCODE_MAD;
49 		break;
50 	case RC_OPCODE_CMP:
51 		tmp = inst->SrcReg[2];
52 		inst->SrcReg[2] = inst->SrcReg[0];
53 		inst->SrcReg[0] = tmp;
54 		break;
55 	case RC_OPCODE_MOV:
56 		/* AMD say we should use CMP.
57 		 * However, when we transform
58 		 *  KIL -r0;
59 		 * into
60 		 *  CMP tmp, -r0, -r0, 0;
61 		 *  KIL tmp;
62 		 * we get incorrect behaviour on R500 when r0 == 0.0.
63 		 * It appears that the R500 KIL hardware treats -0.0 as less
64 		 * than zero.
65 		 */
66 		inst->SrcReg[1].File = RC_FILE_NONE;
67 		inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
68 		inst->SrcReg[2].File = RC_FILE_NONE;
69 		inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
70 		inst->Opcode = RC_OPCODE_MAD;
71 		break;
72 	case RC_OPCODE_MUL:
73 		inst->SrcReg[2].File = RC_FILE_NONE;
74 		inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
75 		inst->Opcode = RC_OPCODE_MAD;
76 		break;
77 	default:
78 		/* nothing to do */
79 		break;
80 	}
81 }
82 
83 
84 /**
85  * Classify an instruction according to which ALUs etc. it needs
86  */
classify_instruction(struct rc_sub_instruction * inst,int * needrgb,int * needalpha,int * istranscendent)87 static void classify_instruction(struct rc_sub_instruction * inst,
88 	int * needrgb, int * needalpha, int * istranscendent)
89 {
90 	*needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
91 	*needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
92 	*istranscendent = 0;
93 
94 	if (inst->WriteALUResult == RC_ALURESULT_X)
95 		*needrgb = 1;
96 	else if (inst->WriteALUResult == RC_ALURESULT_W)
97 		*needalpha = 1;
98 
99 	switch(inst->Opcode) {
100 	case RC_OPCODE_ADD:
101 	case RC_OPCODE_CMP:
102 	case RC_OPCODE_CND:
103 	case RC_OPCODE_DDX:
104 	case RC_OPCODE_DDY:
105 	case RC_OPCODE_FRC:
106 	case RC_OPCODE_MAD:
107 	case RC_OPCODE_MAX:
108 	case RC_OPCODE_MIN:
109 	case RC_OPCODE_MOV:
110 	case RC_OPCODE_MUL:
111 		break;
112 	case RC_OPCODE_COS:
113 	case RC_OPCODE_EX2:
114 	case RC_OPCODE_LG2:
115 	case RC_OPCODE_RCP:
116 	case RC_OPCODE_RSQ:
117 	case RC_OPCODE_SIN:
118 		*istranscendent = 1;
119 		*needalpha = 1;
120 		break;
121 	case RC_OPCODE_DP4:
122 		*needalpha = 1;
123 		/* fall through */
124 	case RC_OPCODE_DP3:
125 		*needrgb = 1;
126 		break;
127 	default:
128 		break;
129 	}
130 }
131 
src_uses(struct rc_src_register src,unsigned int * rgb,unsigned int * alpha)132 static void src_uses(struct rc_src_register src, unsigned int * rgb,
133 							unsigned int * alpha)
134 {
135 	int j;
136 	for(j = 0; j < 4; ++j) {
137 		unsigned int swz = GET_SWZ(src.Swizzle, j);
138 		if (swz < 3)
139 			*rgb = 1;
140 		else if (swz < 4)
141 			*alpha = 1;
142 	}
143 }
144 
145 /**
146  * Fill the given ALU instruction's opcodes and source operands into the given pair,
147  * if possible.
148  */
set_pair_instruction(struct r300_fragment_program_compiler * c,struct rc_pair_instruction * pair,struct rc_sub_instruction * inst)149 static void set_pair_instruction(struct r300_fragment_program_compiler *c,
150 	struct rc_pair_instruction * pair,
151 	struct rc_sub_instruction * inst)
152 {
153 	int needrgb, needalpha, istranscendent;
154 	const struct rc_opcode_info * opcode;
155 	int i;
156 
157 	memset(pair, 0, sizeof(struct rc_pair_instruction));
158 
159 	classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
160 
161 	if (needrgb) {
162 		if (istranscendent)
163 			pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
164 		else
165 			pair->RGB.Opcode = inst->Opcode;
166 		if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
167 			pair->RGB.Saturate = 1;
168 	}
169 	if (needalpha) {
170 		pair->Alpha.Opcode = inst->Opcode;
171 		if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
172 			pair->Alpha.Saturate = 1;
173 	}
174 
175 	opcode = rc_get_opcode_info(inst->Opcode);
176 
177 	/* Presubtract handling:
178 	 * We need to make sure that the values used by the presubtract
179 	 * operation end up in src0 or src1. */
180 	if(inst->PreSub.Opcode != RC_PRESUB_NONE) {
181 		/* rc_pair_alloc_source() will fill in data for
182 		 * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
183 		int j;
184 		for(j = 0; j < 3; j++) {
185 			int src_regs;
186 			if(inst->SrcReg[j].File != RC_FILE_PRESUB)
187 				continue;
188 
189 			src_regs = rc_presubtract_src_reg_count(
190 							inst->PreSub.Opcode);
191 			for(i = 0; i < src_regs; i++) {
192 				unsigned int rgb = 0;
193 				unsigned int alpha = 0;
194 				src_uses(inst->SrcReg[j], &rgb, &alpha);
195 				if(rgb) {
196 					pair->RGB.Src[i].File =
197 						inst->PreSub.SrcReg[i].File;
198 					pair->RGB.Src[i].Index =
199 						inst->PreSub.SrcReg[i].Index;
200 					pair->RGB.Src[i].Used = 1;
201 				}
202 				if(alpha) {
203 					pair->Alpha.Src[i].File =
204 						inst->PreSub.SrcReg[i].File;
205 					pair->Alpha.Src[i].Index =
206 						inst->PreSub.SrcReg[i].Index;
207 					pair->Alpha.Src[i].Used = 1;
208 				}
209 			}
210 		}
211 	}
212 
213 	for(i = 0; i < opcode->NumSrcRegs; ++i) {
214 		int source;
215 		if (needrgb && !istranscendent) {
216 			unsigned int srcrgb = 0;
217 			unsigned int srcalpha = 0;
218 			unsigned int srcmask = 0;
219 			int j;
220 			/* We don't care about the alpha channel here.  We only
221 			 * want the part of the swizzle that writes to rgb,
222 			 * since we are creating an rgb instruction. */
223 			for(j = 0; j < 3; ++j) {
224 				unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
225 
226 				if (swz < RC_SWIZZLE_W)
227 					srcrgb = 1;
228 				else if (swz == RC_SWIZZLE_W)
229 					srcalpha = 1;
230 
231 				if (swz < RC_SWIZZLE_UNUSED)
232 					srcmask |= 1 << j;
233 			}
234 			source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
235 							inst->SrcReg[i].File, inst->SrcReg[i].Index);
236 			if (source < 0) {
237 				rc_error(&c->Base, "Failed to translate "
238 							"rgb instruction.\n");
239 				return;
240 			}
241 			pair->RGB.Arg[i].Source = source;
242 			pair->RGB.Arg[i].Swizzle =
243 				rc_init_swizzle(inst->SrcReg[i].Swizzle, 3);
244 			pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
245 			pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
246 		}
247 		if (needalpha) {
248 			unsigned int srcrgb = 0;
249 			unsigned int srcalpha = 0;
250 			unsigned int swz;
251 			if (istranscendent) {
252 				swz = rc_get_scalar_src_swz(inst->SrcReg[i].Swizzle);
253 			} else {
254 				swz = GET_SWZ(inst->SrcReg[i].Swizzle, 3);
255 			}
256 
257 			if (swz < 3)
258 				srcrgb = 1;
259 			else if (swz < 4)
260 				srcalpha = 1;
261 			source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
262 							inst->SrcReg[i].File, inst->SrcReg[i].Index);
263 			if (source < 0) {
264 				rc_error(&c->Base, "Failed to translate "
265 							"alpha instruction.\n");
266 				return;
267 			}
268 			pair->Alpha.Arg[i].Source = source;
269 			pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
270 			pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
271 
272 			if (istranscendent) {
273 				pair->Alpha.Arg[i].Negate =
274 					!!(inst->SrcReg[i].Negate &
275 							inst->DstReg.WriteMask);
276 			} else {
277 				pair->Alpha.Arg[i].Negate =
278 					!!(inst->SrcReg[i].Negate & RC_MASK_W);
279 			}
280 		}
281 	}
282 
283 	/* Destination handling */
284 	if (inst->DstReg.File == RC_FILE_OUTPUT) {
285         if (inst->DstReg.Index == c->OutputDepth) {
286             pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
287         } else {
288             for (i = 0; i < 4; i++) {
289                 if (inst->DstReg.Index == c->OutputColor[i]) {
290                     pair->RGB.Target = i;
291                     pair->Alpha.Target = i;
292                     pair->RGB.OutputWriteMask |=
293                         inst->DstReg.WriteMask & RC_MASK_XYZ;
294                     pair->Alpha.OutputWriteMask |=
295                         GET_BIT(inst->DstReg.WriteMask, 3);
296                     break;
297                 }
298             }
299         }
300 	} else {
301 		if (needrgb) {
302 			pair->RGB.DestIndex = inst->DstReg.Index;
303 			pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
304 		}
305 
306 		if (needalpha) {
307 			pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3);
308 			if (pair->Alpha.WriteMask) {
309 				pair->Alpha.DestIndex = inst->DstReg.Index;
310 			}
311 		}
312 	}
313 
314 	if (needrgb) {
315 		pair->RGB.Omod = inst->Omod;
316 	}
317 	if (needalpha) {
318 		pair->Alpha.Omod = inst->Omod;
319 	}
320 
321 	if (inst->WriteALUResult) {
322 		pair->WriteALUResult = inst->WriteALUResult;
323 		pair->ALUResultCompare = inst->ALUResultCompare;
324 	}
325 }
326 
327 
check_opcode_support(struct r300_fragment_program_compiler * c,struct rc_sub_instruction * inst)328 static void check_opcode_support(struct r300_fragment_program_compiler *c,
329 				 struct rc_sub_instruction *inst)
330 {
331 	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
332 
333 	if (opcode->HasDstReg) {
334 		if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
335 			rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
336 			return;
337 		}
338 	}
339 
340 	for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
341 		if (inst->SrcReg[i].RelAddr) {
342 			rc_error(&c->Base, "Fragment program does not support relative addressing "
343 				 " of source operands.\n");
344 			return;
345 		}
346 	}
347 }
348 
349 
350 /**
351  * Translate all ALU instructions into corresponding pair instructions,
352  * performing no other changes.
353  */
rc_pair_translate(struct radeon_compiler * cc,void * user)354 void rc_pair_translate(struct radeon_compiler *cc, void *user)
355 {
356 	struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
357 
358 	for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
359 	    inst != &c->Base.Program.Instructions;
360 	    inst = inst->Next) {
361 		const struct rc_opcode_info * opcode;
362 		struct rc_sub_instruction copy;
363 
364 		if (inst->Type != RC_INSTRUCTION_NORMAL)
365 			continue;
366 
367 		opcode = rc_get_opcode_info(inst->U.I.Opcode);
368 
369 		if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
370 			continue;
371 
372 		copy = inst->U.I;
373 
374 		check_opcode_support(c, &copy);
375 
376 		final_rewrite(&copy);
377 		inst->Type = RC_INSTRUCTION_PAIR;
378 		set_pair_instruction(c, &inst->U.P, &copy);
379 	}
380 }
381