1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 *
31 * Emit the r300_fragment_program_code that can be understood by the hardware.
32 * Input is a pre-transformed radeon_program.
33 *
34 * \author Ben Skeggs <darktama@iinet.net.au>
35 *
36 * \author Jerome Glisse <j.glisse@gmail.com>
37 */
38
39 #include "r300_fragprog.h"
40
41 #include "../r300_reg.h"
42
43 #include "radeon_program_pair.h"
44 #include "r300_fragprog_swizzle.h"
45
46
47 struct r300_emit_state {
48 struct r300_fragment_program_compiler * compiler;
49
50 unsigned current_node : 2;
51 unsigned node_first_tex : 8;
52 unsigned node_first_alu : 8;
53 uint32_t node_flags;
54 };
55
56 #define PROG_CODE \
57 struct r300_fragment_program_compiler *c = emit->compiler; \
58 struct r300_fragment_program_code *code = &c->code->code.r300
59
60 #define error(fmt, args...) do { \
61 rc_error(&c->Base, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 } while(0)
64
get_msbs_alu(unsigned int bits)65 static unsigned int get_msbs_alu(unsigned int bits)
66 {
67 return (bits >> 6) & 0x7;
68 }
69
70 /**
71 * @param lsbs The number of least significant bits
72 */
get_msbs_tex(unsigned int bits,unsigned int lsbs)73 static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs)
74 {
75 return (bits >> lsbs) & 0x15;
76 }
77
78 #define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask)
79
80 /**
81 * Mark a temporary register as used.
82 */
use_temporary(struct r300_fragment_program_code * code,unsigned int index)83 static void use_temporary(struct r300_fragment_program_code *code, unsigned int index)
84 {
85 if (index > code->pixsize)
86 code->pixsize = index;
87 }
88
use_source(struct r300_fragment_program_code * code,struct rc_pair_instruction_source src)89 static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src)
90 {
91 if (!src.Used)
92 return 0;
93
94 if (src.File == RC_FILE_CONSTANT) {
95 return src.Index | (1 << 5);
96 } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
97 use_temporary(code, src.Index);
98 return src.Index & 0x1f;
99 }
100
101 return 0;
102 }
103
104
translate_rgb_opcode(struct r300_fragment_program_compiler * c,rc_opcode opcode)105 static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
106 {
107 switch(opcode) {
108 case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;
109 case RC_OPCODE_CND: return R300_ALU_OUTC_CND;
110 case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;
111 case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;
112 case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;
113 default:
114 error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
115 /* fall through */
116 case RC_OPCODE_NOP:
117 /* fall through */
118 case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD;
119 case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX;
120 case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN;
121 case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
122 }
123 }
124
translate_alpha_opcode(struct r300_fragment_program_compiler * c,rc_opcode opcode)125 static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
126 {
127 switch(opcode) {
128 case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;
129 case RC_OPCODE_CND: return R300_ALU_OUTA_CND;
130 case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;
131 case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;
132 case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;
133 case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC;
134 case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2;
135 default:
136 error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
137 /* fall through */
138 case RC_OPCODE_NOP:
139 /* fall through */
140 case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD;
141 case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX;
142 case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN;
143 case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP;
144 case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
145 }
146 }
147
148 /**
149 * Emit one paired ALU instruction.
150 */
emit_alu(struct r300_emit_state * emit,struct rc_pair_instruction * inst)151 static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst)
152 {
153 int ip;
154 int j;
155 PROG_CODE;
156
157 if (code->alu.length >= c->Base.max_alu_insts) {
158 error("Too many ALU instructions");
159 return 0;
160 }
161
162 ip = code->alu.length++;
163
164 code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
165 code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
166
167 for(j = 0; j < 3; ++j) {
168 /* Set the RGB address */
169 unsigned int src = use_source(code, inst->RGB.Src[j]);
170 unsigned int arg;
171 if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
172 code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j);
173
174 code->alu.inst[ip].rgb_addr |= src << (6*j);
175
176 /* Set the Alpha address */
177 src = use_source(code, inst->Alpha.Src[j]);
178 if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
179 code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j);
180
181 code->alu.inst[ip].alpha_addr |= src << (6*j);
182
183 arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
184 arg |= inst->RGB.Arg[j].Abs << 6;
185 arg |= inst->RGB.Arg[j].Negate << 5;
186 code->alu.inst[ip].rgb_inst |= arg << (7*j);
187
188 arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
189 arg |= inst->Alpha.Arg[j].Abs << 6;
190 arg |= inst->Alpha.Arg[j].Negate << 5;
191 code->alu.inst[ip].alpha_inst |= arg << (7*j);
192 }
193
194 /* Presubtract */
195 if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
196 switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
197 case RC_PRESUB_BIAS:
198 code->alu.inst[ip].rgb_inst |=
199 R300_ALU_SRCP_1_MINUS_2_SRC0;
200 break;
201 case RC_PRESUB_ADD:
202 code->alu.inst[ip].rgb_inst |=
203 R300_ALU_SRCP_SRC1_PLUS_SRC0;
204 break;
205 case RC_PRESUB_SUB:
206 code->alu.inst[ip].rgb_inst |=
207 R300_ALU_SRCP_SRC1_MINUS_SRC0;
208 break;
209 case RC_PRESUB_INV:
210 code->alu.inst[ip].rgb_inst |=
211 R300_ALU_SRCP_1_MINUS_SRC0;
212 break;
213 default:
214 break;
215 }
216 }
217
218 if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
219 switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
220 case RC_PRESUB_BIAS:
221 code->alu.inst[ip].alpha_inst |=
222 R300_ALU_SRCP_1_MINUS_2_SRC0;
223 break;
224 case RC_PRESUB_ADD:
225 code->alu.inst[ip].alpha_inst |=
226 R300_ALU_SRCP_SRC1_PLUS_SRC0;
227 break;
228 case RC_PRESUB_SUB:
229 code->alu.inst[ip].alpha_inst |=
230 R300_ALU_SRCP_SRC1_MINUS_SRC0;
231 break;
232 case RC_PRESUB_INV:
233 code->alu.inst[ip].alpha_inst |=
234 R300_ALU_SRCP_1_MINUS_SRC0;
235 break;
236 default:
237 break;
238 }
239 }
240
241 if (inst->RGB.Saturate)
242 code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
243 if (inst->Alpha.Saturate)
244 code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
245
246 if (inst->RGB.WriteMask) {
247 use_temporary(code, inst->RGB.DestIndex);
248 if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS)
249 code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT;
250 code->alu.inst[ip].rgb_addr |=
251 ((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) |
252 (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
253 }
254 if (inst->RGB.OutputWriteMask) {
255 code->alu.inst[ip].rgb_addr |=
256 (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) |
257 R300_RGB_TARGET(inst->RGB.Target);
258 emit->node_flags |= R300_RGBA_OUT;
259 }
260
261 if (inst->Alpha.WriteMask) {
262 use_temporary(code, inst->Alpha.DestIndex);
263 if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS)
264 code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT;
265 code->alu.inst[ip].alpha_addr |=
266 ((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) |
267 R300_ALU_DSTA_REG;
268 }
269 if (inst->Alpha.OutputWriteMask) {
270 code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT |
271 R300_ALPHA_TARGET(inst->Alpha.Target);
272 emit->node_flags |= R300_RGBA_OUT;
273 }
274 if (inst->Alpha.DepthWriteMask) {
275 code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
276 emit->node_flags |= R300_W_OUT;
277 c->code->writes_depth = 1;
278 }
279 if (inst->Nop)
280 code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP;
281
282 /* Handle Output Modifier
283 * According to the r300 docs, there is no RC_OMOD_DISABLE for r300 */
284 if (inst->RGB.Omod) {
285 if (inst->RGB.Omod == RC_OMOD_DISABLE) {
286 rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
287 }
288 code->alu.inst[ip].rgb_inst |=
289 (inst->RGB.Omod << R300_ALU_OUTC_MOD_SHIFT);
290 }
291 if (inst->Alpha.Omod) {
292 if (inst->Alpha.Omod == RC_OMOD_DISABLE) {
293 rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
294 }
295 code->alu.inst[ip].alpha_inst |=
296 (inst->Alpha.Omod << R300_ALU_OUTC_MOD_SHIFT);
297 }
298 return 1;
299 }
300
301
302 /**
303 * Finish the current node without advancing to the next one.
304 */
finish_node(struct r300_emit_state * emit)305 static int finish_node(struct r300_emit_state * emit)
306 {
307 struct r300_fragment_program_compiler * c = emit->compiler;
308 struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
309 unsigned alu_offset;
310 unsigned alu_end;
311 unsigned tex_offset;
312 unsigned tex_end;
313
314 unsigned int alu_offset_msbs, alu_end_msbs;
315
316 if (code->alu.length == emit->node_first_alu) {
317 /* Generate a single NOP for this node */
318 struct rc_pair_instruction inst;
319 memset(&inst, 0, sizeof(inst));
320 if (!emit_alu(emit, &inst))
321 return 0;
322 }
323
324 alu_offset = emit->node_first_alu;
325 alu_end = code->alu.length - alu_offset - 1;
326 tex_offset = emit->node_first_tex;
327 tex_end = code->tex.length - tex_offset - 1;
328
329 if (code->tex.length == emit->node_first_tex) {
330 if (emit->current_node > 0) {
331 error("Node %i has no TEX instructions", emit->current_node);
332 return 0;
333 }
334
335 tex_end = 0;
336 } else {
337 if (emit->current_node == 0)
338 code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
339 }
340
341 /* Write the config register.
342 * Note: The order in which the words for each node are written
343 * is not correct here and needs to be fixed up once we're entirely
344 * done
345 *
346 * Also note that the register specification from AMD is slightly
347 * incorrect in its description of this register. */
348 code->code_addr[emit->current_node] =
349 ((alu_offset << R300_ALU_START_SHIFT)
350 & R300_ALU_START_MASK)
351 | ((alu_end << R300_ALU_SIZE_SHIFT)
352 & R300_ALU_SIZE_MASK)
353 | ((tex_offset << R300_TEX_START_SHIFT)
354 & R300_TEX_START_MASK)
355 | ((tex_end << R300_TEX_SIZE_SHIFT)
356 & R300_TEX_SIZE_MASK)
357 | emit->node_flags
358 | (get_msbs_tex(tex_offset, 5)
359 << R400_TEX_START_MSB_SHIFT)
360 | (get_msbs_tex(tex_end, 5)
361 << R400_TEX_SIZE_MSB_SHIFT)
362 ;
363
364 /* Write r400 extended instruction fields. These will be ignored on
365 * r300 cards. */
366 alu_offset_msbs = get_msbs_alu(alu_offset);
367 alu_end_msbs = get_msbs_alu(alu_end);
368 switch(emit->current_node) {
369 case 0:
370 code->r400_code_offset_ext |=
371 alu_offset_msbs << R400_ALU_START3_MSB_SHIFT
372 | alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT;
373 break;
374 case 1:
375 code->r400_code_offset_ext |=
376 alu_offset_msbs << R400_ALU_START2_MSB_SHIFT
377 | alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT;
378 break;
379 case 2:
380 code->r400_code_offset_ext |=
381 alu_offset_msbs << R400_ALU_START1_MSB_SHIFT
382 | alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT;
383 break;
384 case 3:
385 code->r400_code_offset_ext |=
386 alu_offset_msbs << R400_ALU_START0_MSB_SHIFT
387 | alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT;
388 break;
389 }
390 return 1;
391 }
392
393
394 /**
395 * Begin a block of texture instructions.
396 * Create the necessary indirection.
397 */
begin_tex(struct r300_emit_state * emit)398 static int begin_tex(struct r300_emit_state * emit)
399 {
400 PROG_CODE;
401
402 if (code->alu.length == emit->node_first_alu &&
403 code->tex.length == emit->node_first_tex) {
404 return 1;
405 }
406
407 if (emit->current_node == 3) {
408 error("Too many texture indirections");
409 return 0;
410 }
411
412 if (!finish_node(emit))
413 return 0;
414
415 emit->current_node++;
416 emit->node_first_tex = code->tex.length;
417 emit->node_first_alu = code->alu.length;
418 emit->node_flags = 0;
419 return 1;
420 }
421
422
emit_tex(struct r300_emit_state * emit,struct rc_instruction * inst)423 static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
424 {
425 unsigned int unit;
426 unsigned int dest;
427 unsigned int opcode;
428 PROG_CODE;
429
430 if (code->tex.length >= emit->compiler->Base.max_tex_insts) {
431 error("Too many TEX instructions");
432 return 0;
433 }
434
435 unit = inst->U.I.TexSrcUnit;
436 dest = inst->U.I.DstReg.Index;
437
438 switch(inst->U.I.Opcode) {
439 case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
440 case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
441 case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
442 case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
443 default:
444 error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name);
445 return 0;
446 }
447
448 if (inst->U.I.Opcode == RC_OPCODE_KIL) {
449 unit = 0;
450 dest = 0;
451 } else {
452 use_temporary(code, dest);
453 }
454
455 use_temporary(code, inst->U.I.SrcReg[0].Index);
456
457 code->tex.inst[code->tex.length++] =
458 ((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT)
459 & R300_SRC_ADDR_MASK)
460 | ((dest << R300_DST_ADDR_SHIFT)
461 & R300_DST_ADDR_MASK)
462 | (unit << R300_TEX_ID_SHIFT)
463 | (opcode << R300_TEX_INST_SHIFT)
464 | (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ?
465 R400_SRC_ADDR_EXT_BIT : 0)
466 | (dest >= R300_PFS_NUM_TEMP_REGS ?
467 R400_DST_ADDR_EXT_BIT : 0)
468 ;
469 return 1;
470 }
471
472
473 /**
474 * Final compilation step: Turn the intermediate radeon_program into
475 * machine-readable instructions.
476 */
r300BuildFragmentProgramHwCode(struct radeon_compiler * c,void * user)477 void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
478 {
479 struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
480 struct r300_emit_state emit;
481 struct r300_fragment_program_code *code = &compiler->code->code.r300;
482 unsigned int tex_end;
483
484 memset(&emit, 0, sizeof(emit));
485 emit.compiler = compiler;
486
487 memset(code, 0, sizeof(struct r300_fragment_program_code));
488
489 for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
490 inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
491 inst = inst->Next) {
492 if (inst->Type == RC_INSTRUCTION_NORMAL) {
493 if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
494 begin_tex(&emit);
495 continue;
496 }
497
498 emit_tex(&emit, inst);
499 } else {
500 emit_alu(&emit, &inst->U.P);
501 }
502 }
503
504 if (code->pixsize >= compiler->Base.max_temp_regs)
505 rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
506
507 if (compiler->Base.Error)
508 return;
509
510 /* Finish the program */
511 finish_node(&emit);
512
513 code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
514
515 /* Set r400 extended instruction fields. These values will be ignored
516 * on r300 cards. */
517 code->r400_code_offset_ext |=
518 (get_msbs_alu(0)
519 << R400_ALU_OFFSET_MSB_SHIFT)
520 | (get_msbs_alu(code->alu.length - 1)
521 << R400_ALU_SIZE_MSB_SHIFT);
522
523 tex_end = code->tex.length ? code->tex.length - 1 : 0;
524 code->code_offset =
525 ((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
526 & R300_PFS_CNTL_ALU_OFFSET_MASK)
527 | (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT)
528 & R300_PFS_CNTL_ALU_END_MASK)
529 | ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
530 & R300_PFS_CNTL_TEX_OFFSET_MASK)
531 | ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT)
532 & R300_PFS_CNTL_TEX_END_MASK)
533 | (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT)
534 | (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT)
535 ;
536
537 if (emit.current_node < 3) {
538 int shift = 3 - emit.current_node;
539 int i;
540 for(i = emit.current_node; i >= 0; --i)
541 code->code_addr[shift + i] = code->code_addr[i];
542 for(i = 0; i < shift; ++i)
543 code->code_addr[i] = 0;
544 }
545
546 if (code->pixsize >= R300_PFS_NUM_TEMP_REGS
547 || code->alu.length > R300_PFS_MAX_ALU_INST
548 || code->tex.length > R300_PFS_MAX_TEX_INST) {
549
550 code->r390_mode = 1;
551 }
552 }
553