1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "r600_sq.h"
24 #include "r600_llvm.h"
25 #include "r600_formats.h"
26 #include "r600_opcodes.h"
27 #include "r600d.h"
28
29 #include "pipe/p_shader_tokens.h"
30 #include "tgsi/tgsi_info.h"
31 #include "tgsi/tgsi_parse.h"
32 #include "tgsi/tgsi_scan.h"
33 #include "tgsi/tgsi_dump.h"
34 #include "util/u_memory.h"
35 #include <stdio.h>
36 #include <errno.h>
37 #include <byteswap.h>
38
39 /* CAYMAN notes
40 Why CAYMAN got loops for lots of instructions is explained here.
41
42 -These 8xx t-slot only ops are implemented in all vector slots.
43 MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
44 These 8xx t-slot only opcodes become vector ops, with all four
45 slots expecting the arguments on sources a and b. Result is
46 broadcast to all channels.
47 MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
48 These 8xx t-slot only opcodes become vector ops in the z, y, and
49 x slots.
50 EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
51 RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
52 SQRT_IEEE/_64
53 SIN/COS
54 The w slot may have an independent co-issued operation, or if the
55 result is required to be in the w slot, the opcode above may be
56 issued in the w slot as well.
57 The compiler must issue the source argument to slots z, y, and x
58 */
59
r600_pipe_shader(struct pipe_context * ctx,struct r600_pipe_shader * shader)60 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
61 {
62 struct r600_context *rctx = (struct r600_context *)ctx;
63 struct r600_shader *rshader = &shader->shader;
64 uint32_t *ptr;
65 int i;
66
67 /* copy new shader */
68 if (shader->bo == NULL) {
69 shader->bo = (struct r600_resource*)
70 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, rshader->bc.ndw * 4);
71 if (shader->bo == NULL) {
72 return -ENOMEM;
73 }
74 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
75 if (R600_BIG_ENDIAN) {
76 for (i = 0; i < rshader->bc.ndw; ++i) {
77 ptr[i] = bswap_32(rshader->bc.bytecode[i]);
78 }
79 } else {
80 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr));
81 }
82 rctx->ws->buffer_unmap(shader->bo->cs_buf);
83 }
84 /* build state */
85 switch (rshader->processor_type) {
86 case TGSI_PROCESSOR_VERTEX:
87 if (rctx->chip_class >= EVERGREEN) {
88 evergreen_pipe_shader_vs(ctx, shader);
89 } else {
90 r600_pipe_shader_vs(ctx, shader);
91 }
92 break;
93 case TGSI_PROCESSOR_FRAGMENT:
94 if (rctx->chip_class >= EVERGREEN) {
95 evergreen_pipe_shader_ps(ctx, shader);
96 } else {
97 r600_pipe_shader_ps(ctx, shader);
98 }
99 break;
100 default:
101 return -EINVAL;
102 }
103 return 0;
104 }
105
106 static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_shader *pipeshader);
107
r600_pipe_shader_create(struct pipe_context * ctx,struct r600_pipe_shader * shader)108 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader)
109 {
110 static int dump_shaders = -1;
111 struct r600_context *rctx = (struct r600_context *)ctx;
112 struct r600_pipe_shader_selector *sel = shader->selector;
113 int r;
114
115 /* Would like some magic "get_bool_option_once" routine.
116 */
117 if (dump_shaders == -1)
118 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
119
120 if (dump_shaders) {
121 fprintf(stderr, "--------------------------------------------------------------\n");
122 tgsi_dump(sel->tokens, 0);
123
124 if (sel->so.num_outputs) {
125 unsigned i;
126 fprintf(stderr, "STREAMOUT\n");
127 for (i = 0; i < sel->so.num_outputs; i++) {
128 unsigned mask = ((1 << sel->so.output[i].num_components) - 1) <<
129 sel->so.output[i].start_component;
130 fprintf(stderr, " %i: MEM_STREAM0_BUF%i OUT[%i].%s%s%s%s\n", i,
131 sel->so.output[i].output_buffer, sel->so.output[i].register_index,
132 mask & 1 ? "x" : "_",
133 (mask >> 1) & 1 ? "y" : "_",
134 (mask >> 2) & 1 ? "z" : "_",
135 (mask >> 3) & 1 ? "w" : "_");
136 }
137 }
138 }
139 r = r600_shader_from_tgsi(rctx, shader);
140 if (r) {
141 R600_ERR("translation from TGSI failed !\n");
142 return r;
143 }
144 r = r600_bytecode_build(&shader->shader.bc);
145 if (r) {
146 R600_ERR("building bytecode failed !\n");
147 return r;
148 }
149 if (dump_shaders) {
150 r600_bytecode_dump(&shader->shader.bc);
151 fprintf(stderr, "______________________________________________________________\n");
152 }
153 return r600_pipe_shader(ctx, shader);
154 }
155
r600_pipe_shader_destroy(struct pipe_context * ctx,struct r600_pipe_shader * shader)156 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
157 {
158 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL);
159 r600_bytecode_clear(&shader->shader.bc);
160 }
161
162 /*
163 * tgsi -> r600 shader
164 */
165 struct r600_shader_tgsi_instruction;
166
167 struct r600_shader_src {
168 unsigned sel;
169 unsigned swizzle[4];
170 unsigned neg;
171 unsigned abs;
172 unsigned rel;
173 uint32_t value[4];
174 };
175
176 struct r600_shader_ctx {
177 struct tgsi_shader_info info;
178 struct tgsi_parse_context parse;
179 const struct tgsi_token *tokens;
180 unsigned type;
181 unsigned file_offset[TGSI_FILE_COUNT];
182 unsigned temp_reg;
183 struct r600_shader_tgsi_instruction *inst_info;
184 struct r600_bytecode *bc;
185 struct r600_shader *shader;
186 struct r600_shader_src src[4];
187 uint32_t *literals;
188 uint32_t nliterals;
189 uint32_t max_driver_temp_used;
190 /* needed for evergreen interpolation */
191 boolean input_centroid;
192 boolean input_linear;
193 boolean input_perspective;
194 int num_interp_gpr;
195 int face_gpr;
196 int colors_used;
197 boolean clip_vertex_write;
198 unsigned cv_output;
199 int fragcoord_input;
200 int native_integers;
201 };
202
203 struct r600_shader_tgsi_instruction {
204 unsigned tgsi_opcode;
205 unsigned is_op3;
206 unsigned r600_opcode;
207 int (*process)(struct r600_shader_ctx *ctx);
208 };
209
210 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
211 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
212 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only);
213 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
214 static int tgsi_else(struct r600_shader_ctx *ctx);
215 static int tgsi_endif(struct r600_shader_ctx *ctx);
216 static int tgsi_bgnloop(struct r600_shader_ctx *ctx);
217 static int tgsi_endloop(struct r600_shader_ctx *ctx);
218 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
219
220 /*
221 * bytestream -> r600 shader
222 *
223 * These functions are used to transform the output of the LLVM backend into
224 * struct r600_bytecode.
225 */
226
227 static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
228 unsigned char * bytes, unsigned num_bytes);
229
230 #ifdef HAVE_OPENCL
r600_compute_shader_create(struct pipe_context * ctx,LLVMModuleRef mod,struct r600_bytecode * bytecode)231 int r600_compute_shader_create(struct pipe_context * ctx,
232 LLVMModuleRef mod, struct r600_bytecode * bytecode)
233 {
234 struct r600_context *r600_ctx = (struct r600_context *)ctx;
235 unsigned char * bytes;
236 unsigned byte_count;
237 struct r600_shader_ctx shader_ctx;
238 unsigned dump = 0;
239
240 if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
241 dump = 1;
242 }
243
244 r600_llvm_compile(mod, &bytes, &byte_count, r600_ctx->family , dump);
245 shader_ctx.bc = bytecode;
246 r600_bytecode_init(shader_ctx.bc, r600_ctx->chip_class, r600_ctx->family);
247 shader_ctx.bc->type = TGSI_PROCESSOR_COMPUTE;
248 r600_bytecode_from_byte_stream(&shader_ctx, bytes, byte_count);
249 if (shader_ctx.bc->chip_class == CAYMAN) {
250 cm_bytecode_add_cf_end(shader_ctx.bc);
251 }
252 r600_bytecode_build(shader_ctx.bc);
253 if (dump) {
254 r600_bytecode_dump(shader_ctx.bc);
255 }
256 return 1;
257 }
258
259 #endif /* HAVE_OPENCL */
260
i32_from_byte_stream(unsigned char * bytes,unsigned * bytes_read)261 static uint32_t i32_from_byte_stream(unsigned char * bytes,
262 unsigned * bytes_read)
263 {
264 unsigned i;
265 uint32_t out = 0;
266 for (i = 0; i < 4; i++) {
267 out |= bytes[(*bytes_read)++] << (8 * i);
268 }
269 return out;
270 }
271
r600_src_from_byte_stream(unsigned char * bytes,unsigned bytes_read,struct r600_bytecode_alu * alu,unsigned src_idx)272 static unsigned r600_src_from_byte_stream(unsigned char * bytes,
273 unsigned bytes_read, struct r600_bytecode_alu * alu, unsigned src_idx)
274 {
275 unsigned i;
276 unsigned sel0, sel1;
277 sel0 = bytes[bytes_read++];
278 sel1 = bytes[bytes_read++];
279 alu->src[src_idx].sel = sel0 | (sel1 << 8);
280 alu->src[src_idx].chan = bytes[bytes_read++];
281 alu->src[src_idx].neg = bytes[bytes_read++];
282 alu->src[src_idx].abs = bytes[bytes_read++];
283 alu->src[src_idx].rel = bytes[bytes_read++];
284 alu->src[src_idx].kc_bank = bytes[bytes_read++];
285 for (i = 0; i < 4; i++) {
286 alu->src[src_idx].value |= bytes[bytes_read++] << (i * 8);
287 }
288 return bytes_read;
289 }
290
r600_alu_from_byte_stream(struct r600_shader_ctx * ctx,unsigned char * bytes,unsigned bytes_read)291 static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx,
292 unsigned char * bytes, unsigned bytes_read)
293 {
294 unsigned src_idx;
295 unsigned inst0, inst1;
296 unsigned push_modifier;
297 struct r600_bytecode_alu alu;
298 memset(&alu, 0, sizeof(alu));
299 for(src_idx = 0; src_idx < 3; src_idx++) {
300 bytes_read = r600_src_from_byte_stream(bytes, bytes_read,
301 &alu, src_idx);
302 }
303
304 alu.dst.sel = bytes[bytes_read++];
305 alu.dst.chan = bytes[bytes_read++];
306 alu.dst.clamp = bytes[bytes_read++];
307 alu.dst.write = bytes[bytes_read++];
308 alu.dst.rel = bytes[bytes_read++];
309 inst0 = bytes[bytes_read++];
310 inst1 = bytes[bytes_read++];
311 alu.inst = inst0 | (inst1 << 8);
312 alu.last = bytes[bytes_read++];
313 alu.is_op3 = bytes[bytes_read++];
314 push_modifier = bytes[bytes_read++];
315 alu.pred_sel = bytes[bytes_read++];
316 alu.bank_swizzle = bytes[bytes_read++];
317 alu.bank_swizzle_force = bytes[bytes_read++];
318 alu.omod = bytes[bytes_read++];
319 alu.index_mode = bytes[bytes_read++];
320
321
322 if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE) ||
323 alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE) ||
324 alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT) ||
325 alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT)) {
326 alu.update_pred = 1;
327 alu.dst.write = 0;
328 alu.src[1].sel = V_SQ_ALU_SRC_0;
329 alu.src[1].chan = 0;
330 alu.last = 1;
331 }
332
333 if (push_modifier) {
334 alu.pred_sel = 0;
335 alu.execute_mask = 1;
336 r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
337 } else
338 r600_bytecode_add_alu(ctx->bc, &alu);
339
340
341 /* XXX: Handle other KILL instructions */
342 if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT)) {
343 ctx->shader->uses_kill = 1;
344 /* XXX: This should be enforced in the LLVM backend. */
345 ctx->bc->force_add_cf = 1;
346 }
347 return bytes_read;
348 }
349
llvm_if(struct r600_shader_ctx * ctx,struct r600_bytecode_alu * alu,unsigned pred_inst)350 static void llvm_if(struct r600_shader_ctx *ctx, struct r600_bytecode_alu * alu,
351 unsigned pred_inst)
352 {
353 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
354 fc_pushlevel(ctx, FC_IF);
355 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
356 }
357
r600_break_from_byte_stream(struct r600_shader_ctx * ctx,struct r600_bytecode_alu * alu,unsigned compare_opcode)358 static void r600_break_from_byte_stream(struct r600_shader_ctx *ctx,
359 struct r600_bytecode_alu *alu, unsigned compare_opcode)
360 {
361 unsigned opcode = TGSI_OPCODE_BRK;
362 if (ctx->bc->chip_class == CAYMAN)
363 ctx->inst_info = &cm_shader_tgsi_instruction[opcode];
364 else if (ctx->bc->chip_class >= EVERGREEN)
365 ctx->inst_info = &eg_shader_tgsi_instruction[opcode];
366 else
367 ctx->inst_info = &r600_shader_tgsi_instruction[opcode];
368 llvm_if(ctx, alu, compare_opcode);
369 tgsi_loop_brk_cont(ctx);
370 tgsi_endif(ctx);
371 }
372
r600_fc_from_byte_stream(struct r600_shader_ctx * ctx,unsigned char * bytes,unsigned bytes_read)373 static unsigned r600_fc_from_byte_stream(struct r600_shader_ctx *ctx,
374 unsigned char * bytes, unsigned bytes_read)
375 {
376 struct r600_bytecode_alu alu;
377 unsigned inst;
378 memset(&alu, 0, sizeof(alu));
379 bytes_read = r600_src_from_byte_stream(bytes, bytes_read, &alu, 0);
380 inst = bytes[bytes_read++];
381 switch (inst) {
382 case 0: /* FC_IF */
383 llvm_if(ctx, &alu,
384 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
385 break;
386 case 1: /* FC_IF_INT */
387 llvm_if(ctx, &alu,
388 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
389 break;
390 case 2: /* FC_ELSE */
391 tgsi_else(ctx);
392 break;
393 case 3: /* FC_ENDIF */
394 tgsi_endif(ctx);
395 break;
396 case 4: /* FC_BGNLOOP */
397 tgsi_bgnloop(ctx);
398 break;
399 case 5: /* FC_ENDLOOP */
400 tgsi_endloop(ctx);
401 break;
402 case 6: /* FC_BREAK */
403 r600_break_from_byte_stream(ctx, &alu,
404 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT));
405 break;
406 case 7: /* FC_BREAK_NZ_INT */
407 r600_break_from_byte_stream(ctx, &alu,
408 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
409 break;
410 case 8: /* FC_CONTINUE */
411 {
412 unsigned opcode = TGSI_OPCODE_CONT;
413 if (ctx->bc->chip_class == CAYMAN) {
414 ctx->inst_info =
415 &cm_shader_tgsi_instruction[opcode];
416 } else if (ctx->bc->chip_class >= EVERGREEN) {
417 ctx->inst_info =
418 &eg_shader_tgsi_instruction[opcode];
419 } else {
420 ctx->inst_info =
421 &r600_shader_tgsi_instruction[opcode];
422 }
423 tgsi_loop_brk_cont(ctx);
424 }
425 break;
426 case 9: /* FC_BREAK_Z_INT */
427 r600_break_from_byte_stream(ctx, &alu,
428 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT));
429 break;
430 case 10: /* FC_BREAK_NZ */
431 r600_break_from_byte_stream(ctx, &alu,
432 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
433 break;
434 }
435
436 return bytes_read;
437 }
438
r600_tex_from_byte_stream(struct r600_shader_ctx * ctx,unsigned char * bytes,unsigned bytes_read)439 static unsigned r600_tex_from_byte_stream(struct r600_shader_ctx *ctx,
440 unsigned char * bytes, unsigned bytes_read)
441 {
442 struct r600_bytecode_tex tex;
443
444 tex.inst = bytes[bytes_read++];
445 tex.resource_id = bytes[bytes_read++];
446 tex.src_gpr = bytes[bytes_read++];
447 tex.src_rel = bytes[bytes_read++];
448 tex.dst_gpr = bytes[bytes_read++];
449 tex.dst_rel = bytes[bytes_read++];
450 tex.dst_sel_x = bytes[bytes_read++];
451 tex.dst_sel_y = bytes[bytes_read++];
452 tex.dst_sel_z = bytes[bytes_read++];
453 tex.dst_sel_w = bytes[bytes_read++];
454 tex.lod_bias = bytes[bytes_read++];
455 tex.coord_type_x = bytes[bytes_read++];
456 tex.coord_type_y = bytes[bytes_read++];
457 tex.coord_type_z = bytes[bytes_read++];
458 tex.coord_type_w = bytes[bytes_read++];
459 tex.offset_x = bytes[bytes_read++];
460 tex.offset_y = bytes[bytes_read++];
461 tex.offset_z = bytes[bytes_read++];
462 tex.sampler_id = bytes[bytes_read++];
463 tex.src_sel_x = bytes[bytes_read++];
464 tex.src_sel_y = bytes[bytes_read++];
465 tex.src_sel_z = bytes[bytes_read++];
466 tex.src_sel_w = bytes[bytes_read++];
467
468 r600_bytecode_add_tex(ctx->bc, &tex);
469
470 return bytes_read;
471 }
472
r600_vtx_from_byte_stream(struct r600_shader_ctx * ctx,unsigned char * bytes,unsigned bytes_read)473 static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx,
474 unsigned char * bytes, unsigned bytes_read)
475 {
476 struct r600_bytecode_vtx vtx;
477
478 uint32_t word0 = i32_from_byte_stream(bytes, &bytes_read);
479 uint32_t word1 = i32_from_byte_stream(bytes, &bytes_read);
480 uint32_t word2 = i32_from_byte_stream(bytes, &bytes_read);
481
482 memset(&vtx, 0, sizeof(vtx));
483
484 /* WORD0 */
485 vtx.inst = G_SQ_VTX_WORD0_VTX_INST(word0);
486 vtx.fetch_type = G_SQ_VTX_WORD0_FETCH_TYPE(word0);
487 vtx.buffer_id = G_SQ_VTX_WORD0_BUFFER_ID(word0);
488 vtx.src_gpr = G_SQ_VTX_WORD0_SRC_GPR(word0);
489 vtx.src_sel_x = G_SQ_VTX_WORD0_SRC_SEL_X(word0);
490 vtx.mega_fetch_count = G_SQ_VTX_WORD0_MEGA_FETCH_COUNT(word0);
491
492 /* WORD1 */
493 vtx.dst_gpr = G_SQ_VTX_WORD1_GPR_DST_GPR(word1);
494 vtx.dst_sel_x = G_SQ_VTX_WORD1_DST_SEL_X(word1);
495 vtx.dst_sel_y = G_SQ_VTX_WORD1_DST_SEL_Y(word1);
496 vtx.dst_sel_z = G_SQ_VTX_WORD1_DST_SEL_Z(word1);
497 vtx.dst_sel_w = G_SQ_VTX_WORD1_DST_SEL_W(word1);
498 vtx.use_const_fields = G_SQ_VTX_WORD1_USE_CONST_FIELDS(word1);
499 vtx.data_format = G_SQ_VTX_WORD1_DATA_FORMAT(word1);
500 vtx.num_format_all = G_SQ_VTX_WORD1_NUM_FORMAT_ALL(word1);
501 vtx.format_comp_all = G_SQ_VTX_WORD1_FORMAT_COMP_ALL(word1);
502 vtx.srf_mode_all = G_SQ_VTX_WORD1_SRF_MODE_ALL(word1);
503
504 /* WORD 2*/
505 vtx.offset = G_SQ_VTX_WORD2_OFFSET(word2);
506 vtx.endian = G_SQ_VTX_WORD2_ENDIAN_SWAP(word2);
507
508 if (r600_bytecode_add_vtx(ctx->bc, &vtx)) {
509 fprintf(stderr, "Error adding vtx\n");
510 }
511 /* Use the Texture Cache */
512 ctx->bc->cf_last->inst = EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX;
513 return bytes_read;
514 }
515
r600_bytecode_from_byte_stream(struct r600_shader_ctx * ctx,unsigned char * bytes,unsigned num_bytes)516 static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
517 unsigned char * bytes, unsigned num_bytes)
518 {
519 unsigned bytes_read = 0;
520 unsigned i, byte;
521 while (bytes_read < num_bytes) {
522 char inst_type = bytes[bytes_read++];
523 switch (inst_type) {
524 case 0:
525 bytes_read = r600_alu_from_byte_stream(ctx, bytes,
526 bytes_read);
527 break;
528 case 1:
529 bytes_read = r600_tex_from_byte_stream(ctx, bytes,
530 bytes_read);
531 break;
532 case 2:
533 bytes_read = r600_fc_from_byte_stream(ctx, bytes,
534 bytes_read);
535 break;
536 case 3:
537 r600_bytecode_add_cfinst(ctx->bc, CF_NATIVE);
538 for (i = 0; i < 2; i++) {
539 for (byte = 0 ; byte < 4; byte++) {
540 ctx->bc->cf_last->isa[i] |=
541 (bytes[bytes_read++] << (byte * 8));
542 }
543 }
544 break;
545
546 case 4:
547 bytes_read = r600_vtx_from_byte_stream(ctx, bytes,
548 bytes_read);
549 break;
550 default:
551 /* XXX: Error here */
552 break;
553 }
554 }
555 }
556
557 /* End bytestream -> r600 shader functions*/
558
tgsi_is_supported(struct r600_shader_ctx * ctx)559 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
560 {
561 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
562 int j;
563
564 if (i->Instruction.NumDstRegs > 1) {
565 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
566 return -EINVAL;
567 }
568 if (i->Instruction.Predicate) {
569 R600_ERR("predicate unsupported\n");
570 return -EINVAL;
571 }
572 #if 0
573 if (i->Instruction.Label) {
574 R600_ERR("label unsupported\n");
575 return -EINVAL;
576 }
577 #endif
578 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
579 if (i->Src[j].Register.Dimension) {
580 R600_ERR("unsupported src %d (dimension %d)\n", j,
581 i->Src[j].Register.Dimension);
582 return -EINVAL;
583 }
584 }
585 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
586 if (i->Dst[j].Register.Dimension) {
587 R600_ERR("unsupported dst (dimension)\n");
588 return -EINVAL;
589 }
590 }
591 return 0;
592 }
593
evergreen_interp_alu(struct r600_shader_ctx * ctx,int input)594 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
595 {
596 int i, r;
597 struct r600_bytecode_alu alu;
598 int gpr = 0, base_chan = 0;
599 int ij_index = 0;
600
601 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
602 ij_index = 0;
603 if (ctx->shader->input[input].centroid)
604 ij_index++;
605 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
606 ij_index = 0;
607 /* if we have perspective add one */
608 if (ctx->input_perspective) {
609 ij_index++;
610 /* if we have perspective centroid */
611 if (ctx->input_centroid)
612 ij_index++;
613 }
614 if (ctx->shader->input[input].centroid)
615 ij_index++;
616 }
617
618 /* work out gpr and base_chan from index */
619 gpr = ij_index / 2;
620 base_chan = (2 * (ij_index % 2)) + 1;
621
622 for (i = 0; i < 8; i++) {
623 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
624
625 if (i < 4)
626 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_ZW;
627 else
628 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_XY;
629
630 if ((i > 1) && (i < 6)) {
631 alu.dst.sel = ctx->shader->input[input].gpr;
632 alu.dst.write = 1;
633 }
634
635 alu.dst.chan = i % 4;
636
637 alu.src[0].sel = gpr;
638 alu.src[0].chan = (base_chan - (i % 2));
639
640 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
641
642 alu.bank_swizzle_force = SQ_ALU_VEC_210;
643 if ((i % 4) == 3)
644 alu.last = 1;
645 r = r600_bytecode_add_alu(ctx->bc, &alu);
646 if (r)
647 return r;
648 }
649 return 0;
650 }
651
evergreen_interp_flat(struct r600_shader_ctx * ctx,int input)652 static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input)
653 {
654 int i, r;
655 struct r600_bytecode_alu alu;
656
657 for (i = 0; i < 4; i++) {
658 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
659
660 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_LOAD_P0;
661
662 alu.dst.sel = ctx->shader->input[input].gpr;
663 alu.dst.write = 1;
664
665 alu.dst.chan = i;
666
667 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
668 alu.src[0].chan = i;
669
670 if (i == 3)
671 alu.last = 1;
672 r = r600_bytecode_add_alu(ctx->bc, &alu);
673 if (r)
674 return r;
675 }
676 return 0;
677 }
678
679 /*
680 * Special export handling in shaders
681 *
682 * shader export ARRAY_BASE for EXPORT_POS:
683 * 60 is position
684 * 61 is misc vector
685 * 62, 63 are clip distance vectors
686 *
687 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL:
688 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61
689 * USE_VTX_POINT_SIZE - point size in the X channel of export 61
690 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61
691 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61
692 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61
693 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually
694 * exclusive from render target index)
695 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors
696 *
697 *
698 * shader export ARRAY_BASE for EXPORT_PIXEL:
699 * 0-7 CB targets
700 * 61 computed Z vector
701 *
702 * The use of the values exported in the computed Z vector are controlled
703 * by DB_SHADER_CONTROL:
704 * Z_EXPORT_ENABLE - Z as a float in RED
705 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN
706 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA
707 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE
708 * DB_SOURCE_FORMAT - export control restrictions
709 *
710 */
711
712
713 /* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */
r600_spi_sid(struct r600_shader_io * io)714 static int r600_spi_sid(struct r600_shader_io * io)
715 {
716 int index, name = io->name;
717
718 /* These params are handled differently, they don't need
719 * semantic indices, so we'll use 0 for them.
720 */
721 if (name == TGSI_SEMANTIC_POSITION ||
722 name == TGSI_SEMANTIC_PSIZE ||
723 name == TGSI_SEMANTIC_FACE)
724 index = 0;
725 else {
726 if (name == TGSI_SEMANTIC_GENERIC) {
727 /* For generic params simply use sid from tgsi */
728 index = io->sid;
729 } else {
730 /* For non-generic params - pack name and sid into 8 bits */
731 index = 0x80 | (name<<3) | (io->sid);
732 }
733
734 /* Make sure that all really used indices have nonzero value, so
735 * we can just compare it to 0 later instead of comparing the name
736 * with different values to detect special cases. */
737 index++;
738 }
739
740 return index;
741 };
742
743 /* turn input into interpolate on EG */
evergreen_interp_input(struct r600_shader_ctx * ctx,int index)744 static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index)
745 {
746 int r = 0;
747
748 if (ctx->shader->input[index].spi_sid) {
749 ctx->shader->input[index].lds_pos = ctx->shader->nlds++;
750 if (ctx->shader->input[index].interpolate > 0) {
751 r = evergreen_interp_alu(ctx, index);
752 } else {
753 r = evergreen_interp_flat(ctx, index);
754 }
755 }
756 return r;
757 }
758
select_twoside_color(struct r600_shader_ctx * ctx,int front,int back)759 static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back)
760 {
761 struct r600_bytecode_alu alu;
762 int i, r;
763 int gpr_front = ctx->shader->input[front].gpr;
764 int gpr_back = ctx->shader->input[back].gpr;
765
766 for (i = 0; i < 4; i++) {
767 memset(&alu, 0, sizeof(alu));
768 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
769 alu.is_op3 = 1;
770 alu.dst.write = 1;
771 alu.dst.sel = gpr_front;
772 alu.src[0].sel = ctx->face_gpr;
773 alu.src[1].sel = gpr_front;
774 alu.src[2].sel = gpr_back;
775
776 alu.dst.chan = i;
777 alu.src[1].chan = i;
778 alu.src[2].chan = i;
779 alu.last = (i==3);
780
781 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
782 return r;
783 }
784
785 return 0;
786 }
787
tgsi_declaration(struct r600_shader_ctx * ctx)788 static int tgsi_declaration(struct r600_shader_ctx *ctx)
789 {
790 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
791 unsigned i;
792 int r;
793
794 switch (d->Declaration.File) {
795 case TGSI_FILE_INPUT:
796 i = ctx->shader->ninput++;
797 ctx->shader->input[i].name = d->Semantic.Name;
798 ctx->shader->input[i].sid = d->Semantic.Index;
799 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
800 ctx->shader->input[i].interpolate = d->Interp.Interpolate;
801 ctx->shader->input[i].centroid = d->Interp.Centroid;
802 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
803 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
804 switch (ctx->shader->input[i].name) {
805 case TGSI_SEMANTIC_FACE:
806 ctx->face_gpr = ctx->shader->input[i].gpr;
807 break;
808 case TGSI_SEMANTIC_COLOR:
809 ctx->colors_used++;
810 break;
811 case TGSI_SEMANTIC_POSITION:
812 ctx->fragcoord_input = i;
813 break;
814 }
815 if (ctx->bc->chip_class >= EVERGREEN) {
816 if ((r = evergreen_interp_input(ctx, i)))
817 return r;
818 }
819 }
820 break;
821 case TGSI_FILE_OUTPUT:
822 i = ctx->shader->noutput++;
823 ctx->shader->output[i].name = d->Semantic.Name;
824 ctx->shader->output[i].sid = d->Semantic.Index;
825 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
826 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First;
827 ctx->shader->output[i].interpolate = d->Interp.Interpolate;
828 ctx->shader->output[i].write_mask = d->Declaration.UsageMask;
829 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
830 switch (d->Semantic.Name) {
831 case TGSI_SEMANTIC_CLIPDIST:
832 ctx->shader->clip_dist_write |= d->Declaration.UsageMask << (d->Semantic.Index << 2);
833 break;
834 case TGSI_SEMANTIC_PSIZE:
835 ctx->shader->vs_out_misc_write = 1;
836 ctx->shader->vs_out_point_size = 1;
837 break;
838 case TGSI_SEMANTIC_CLIPVERTEX:
839 ctx->clip_vertex_write = TRUE;
840 ctx->cv_output = i;
841 break;
842 }
843 } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
844 switch (d->Semantic.Name) {
845 case TGSI_SEMANTIC_COLOR:
846 ctx->shader->nr_ps_max_color_exports++;
847 break;
848 }
849 }
850 break;
851 case TGSI_FILE_CONSTANT:
852 case TGSI_FILE_TEMPORARY:
853 case TGSI_FILE_SAMPLER:
854 case TGSI_FILE_ADDRESS:
855 break;
856
857 case TGSI_FILE_SYSTEM_VALUE:
858 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
859 if (!ctx->native_integers) {
860 struct r600_bytecode_alu alu;
861 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
862
863 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
864 alu.src[0].sel = 0;
865 alu.src[0].chan = 3;
866
867 alu.dst.sel = 0;
868 alu.dst.chan = 3;
869 alu.dst.write = 1;
870 alu.last = 1;
871
872 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
873 return r;
874 }
875 break;
876 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
877 break;
878 default:
879 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
880 return -EINVAL;
881 }
882 return 0;
883 }
884
r600_get_temp(struct r600_shader_ctx * ctx)885 static int r600_get_temp(struct r600_shader_ctx *ctx)
886 {
887 return ctx->temp_reg + ctx->max_driver_temp_used++;
888 }
889
890 /*
891 * for evergreen we need to scan the shader to find the number of GPRs we need to
892 * reserve for interpolation.
893 *
894 * we need to know if we are going to emit
895 * any centroid inputs
896 * if perspective and linear are required
897 */
evergreen_gpr_count(struct r600_shader_ctx * ctx)898 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
899 {
900 int i;
901 int num_baryc;
902
903 ctx->input_linear = FALSE;
904 ctx->input_perspective = FALSE;
905 ctx->input_centroid = FALSE;
906 ctx->num_interp_gpr = 1;
907
908 /* any centroid inputs */
909 for (i = 0; i < ctx->info.num_inputs; i++) {
910 /* skip position/face */
911 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
912 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
913 continue;
914 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
915 ctx->input_linear = TRUE;
916 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
917 ctx->input_perspective = TRUE;
918 if (ctx->info.input_centroid[i])
919 ctx->input_centroid = TRUE;
920 }
921
922 num_baryc = 0;
923 /* ignoring sample for now */
924 if (ctx->input_perspective)
925 num_baryc++;
926 if (ctx->input_linear)
927 num_baryc++;
928 if (ctx->input_centroid)
929 num_baryc *= 2;
930
931 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
932
933 /* XXX PULL MODEL and LINE STIPPLE, FIXED PT POS */
934 return ctx->num_interp_gpr;
935 }
936
tgsi_src(struct r600_shader_ctx * ctx,const struct tgsi_full_src_register * tgsi_src,struct r600_shader_src * r600_src)937 static void tgsi_src(struct r600_shader_ctx *ctx,
938 const struct tgsi_full_src_register *tgsi_src,
939 struct r600_shader_src *r600_src)
940 {
941 memset(r600_src, 0, sizeof(*r600_src));
942 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
943 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
944 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
945 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
946 r600_src->neg = tgsi_src->Register.Negate;
947 r600_src->abs = tgsi_src->Register.Absolute;
948
949 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
950 int index;
951 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
952 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
953 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
954
955 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
956 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
957 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
958 return;
959 }
960 index = tgsi_src->Register.Index;
961 r600_src->sel = V_SQ_ALU_SRC_LITERAL;
962 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
963 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
964 if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) {
965 r600_src->swizzle[0] = 3;
966 r600_src->swizzle[1] = 3;
967 r600_src->swizzle[2] = 3;
968 r600_src->swizzle[3] = 3;
969 r600_src->sel = 0;
970 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) {
971 r600_src->swizzle[0] = 0;
972 r600_src->swizzle[1] = 0;
973 r600_src->swizzle[2] = 0;
974 r600_src->swizzle[3] = 0;
975 r600_src->sel = 0;
976 }
977 } else {
978 if (tgsi_src->Register.Indirect)
979 r600_src->rel = V_SQ_REL_RELATIVE;
980 r600_src->sel = tgsi_src->Register.Index;
981 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
982 }
983 }
984
tgsi_fetch_rel_const(struct r600_shader_ctx * ctx,unsigned int offset,unsigned int dst_reg)985 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
986 {
987 struct r600_bytecode_vtx vtx;
988 unsigned int ar_reg;
989 int r;
990
991 if (offset) {
992 struct r600_bytecode_alu alu;
993
994 memset(&alu, 0, sizeof(alu));
995
996 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
997 alu.src[0].sel = ctx->bc->ar_reg;
998
999 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1000 alu.src[1].value = offset;
1001
1002 alu.dst.sel = dst_reg;
1003 alu.dst.write = 1;
1004 alu.last = 1;
1005
1006 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
1007 return r;
1008
1009 ar_reg = dst_reg;
1010 } else {
1011 ar_reg = ctx->bc->ar_reg;
1012 }
1013
1014 memset(&vtx, 0, sizeof(vtx));
1015 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
1016 vtx.src_gpr = ar_reg;
1017 vtx.mega_fetch_count = 16;
1018 vtx.dst_gpr = dst_reg;
1019 vtx.dst_sel_x = 0; /* SEL_X */
1020 vtx.dst_sel_y = 1; /* SEL_Y */
1021 vtx.dst_sel_z = 2; /* SEL_Z */
1022 vtx.dst_sel_w = 3; /* SEL_W */
1023 vtx.data_format = FMT_32_32_32_32_FLOAT;
1024 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
1025 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
1026 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
1027 vtx.endian = r600_endian_swap(32);
1028
1029 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
1030 return r;
1031
1032 return 0;
1033 }
1034
tgsi_split_constant(struct r600_shader_ctx * ctx)1035 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
1036 {
1037 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1038 struct r600_bytecode_alu alu;
1039 int i, j, k, nconst, r;
1040
1041 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
1042 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
1043 nconst++;
1044 }
1045 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
1046 }
1047 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
1048 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
1049 continue;
1050 }
1051
1052 if (ctx->src[i].rel) {
1053 int treg = r600_get_temp(ctx);
1054 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
1055 return r;
1056
1057 ctx->src[i].sel = treg;
1058 ctx->src[i].rel = 0;
1059 j--;
1060 } else if (j > 0) {
1061 int treg = r600_get_temp(ctx);
1062 for (k = 0; k < 4; k++) {
1063 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1064 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1065 alu.src[0].sel = ctx->src[i].sel;
1066 alu.src[0].chan = k;
1067 alu.src[0].rel = ctx->src[i].rel;
1068 alu.dst.sel = treg;
1069 alu.dst.chan = k;
1070 alu.dst.write = 1;
1071 if (k == 3)
1072 alu.last = 1;
1073 r = r600_bytecode_add_alu(ctx->bc, &alu);
1074 if (r)
1075 return r;
1076 }
1077 ctx->src[i].sel = treg;
1078 ctx->src[i].rel =0;
1079 j--;
1080 }
1081 }
1082 return 0;
1083 }
1084
1085 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
tgsi_split_literal_constant(struct r600_shader_ctx * ctx)1086 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
1087 {
1088 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1089 struct r600_bytecode_alu alu;
1090 int i, j, k, nliteral, r;
1091
1092 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
1093 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
1094 nliteral++;
1095 }
1096 }
1097 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
1098 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
1099 int treg = r600_get_temp(ctx);
1100 for (k = 0; k < 4; k++) {
1101 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1102 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1103 alu.src[0].sel = ctx->src[i].sel;
1104 alu.src[0].chan = k;
1105 alu.src[0].value = ctx->src[i].value[k];
1106 alu.dst.sel = treg;
1107 alu.dst.chan = k;
1108 alu.dst.write = 1;
1109 if (k == 3)
1110 alu.last = 1;
1111 r = r600_bytecode_add_alu(ctx->bc, &alu);
1112 if (r)
1113 return r;
1114 }
1115 ctx->src[i].sel = treg;
1116 j--;
1117 }
1118 }
1119 return 0;
1120 }
1121
process_twoside_color_inputs(struct r600_shader_ctx * ctx)1122 static int process_twoside_color_inputs(struct r600_shader_ctx *ctx)
1123 {
1124 int i, r, count = ctx->shader->ninput;
1125
1126 /* additional inputs will be allocated right after the existing inputs,
1127 * we won't need them after the color selection, so we don't need to
1128 * reserve these gprs for the rest of the shader code and to adjust
1129 * output offsets etc. */
1130 int gpr = ctx->file_offset[TGSI_FILE_INPUT] +
1131 ctx->info.file_max[TGSI_FILE_INPUT] + 1;
1132
1133 if (ctx->face_gpr == -1) {
1134 i = ctx->shader->ninput++;
1135 ctx->shader->input[i].name = TGSI_SEMANTIC_FACE;
1136 ctx->shader->input[i].spi_sid = 0;
1137 ctx->shader->input[i].gpr = gpr++;
1138 ctx->face_gpr = ctx->shader->input[i].gpr;
1139 }
1140
1141 for (i = 0; i < count; i++) {
1142 if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) {
1143 int ni = ctx->shader->ninput++;
1144 memcpy(&ctx->shader->input[ni],&ctx->shader->input[i], sizeof(struct r600_shader_io));
1145 ctx->shader->input[ni].name = TGSI_SEMANTIC_BCOLOR;
1146 ctx->shader->input[ni].spi_sid = r600_spi_sid(&ctx->shader->input[ni]);
1147 ctx->shader->input[ni].gpr = gpr++;
1148
1149 if (ctx->bc->chip_class >= EVERGREEN) {
1150 r = evergreen_interp_input(ctx, ni);
1151 if (r)
1152 return r;
1153 }
1154
1155 r = select_twoside_color(ctx, i, ni);
1156 if (r)
1157 return r;
1158 }
1159 }
1160 return 0;
1161 }
1162
r600_shader_from_tgsi(struct r600_context * rctx,struct r600_pipe_shader * pipeshader)1163 static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_shader *pipeshader)
1164 {
1165 struct r600_shader *shader = &pipeshader->shader;
1166 struct tgsi_token *tokens = pipeshader->selector->tokens;
1167 struct pipe_stream_output_info so = pipeshader->selector->so;
1168 struct tgsi_full_immediate *immediate;
1169 struct tgsi_full_property *property;
1170 struct r600_shader_ctx ctx;
1171 struct r600_bytecode_output output[32];
1172 unsigned output_done, noutput;
1173 unsigned opcode;
1174 int i, j, k, r = 0;
1175 int next_pixel_base = 0, next_pos_base = 60, next_param_base = 0;
1176 /* Declarations used by llvm code */
1177 bool use_llvm = false;
1178 unsigned char * inst_bytes = NULL;
1179 unsigned inst_byte_count = 0;
1180
1181 #ifdef R600_USE_LLVM
1182 use_llvm = debug_get_bool_option("R600_LLVM", TRUE);
1183 #endif
1184 ctx.bc = &shader->bc;
1185 ctx.shader = shader;
1186 ctx.native_integers = true;
1187
1188 r600_bytecode_init(ctx.bc, rctx->chip_class, rctx->family);
1189 ctx.tokens = tokens;
1190 tgsi_scan_shader(tokens, &ctx.info);
1191 tgsi_parse_init(&ctx.parse, tokens);
1192 ctx.type = ctx.parse.FullHeader.Processor.Processor;
1193 shader->processor_type = ctx.type;
1194 ctx.bc->type = shader->processor_type;
1195
1196 ctx.face_gpr = -1;
1197 ctx.fragcoord_input = -1;
1198 ctx.colors_used = 0;
1199 ctx.clip_vertex_write = 0;
1200
1201 shader->nr_ps_color_exports = 0;
1202 shader->nr_ps_max_color_exports = 0;
1203
1204 shader->two_side = (ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->two_side;
1205
1206 /* register allocations */
1207 /* Values [0,127] correspond to GPR[0..127].
1208 * Values [128,159] correspond to constant buffer bank 0
1209 * Values [160,191] correspond to constant buffer bank 1
1210 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
1211 * Values [256,287] correspond to constant buffer bank 2 (EG)
1212 * Values [288,319] correspond to constant buffer bank 3 (EG)
1213 * Other special values are shown in the list below.
1214 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
1215 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
1216 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
1217 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
1218 * 248 SQ_ALU_SRC_0: special constant 0.0.
1219 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
1220 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1221 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1222 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1223 * 253 SQ_ALU_SRC_LITERAL: literal constant.
1224 * 254 SQ_ALU_SRC_PV: previous vector result.
1225 * 255 SQ_ALU_SRC_PS: previous scalar result.
1226 */
1227 for (i = 0; i < TGSI_FILE_COUNT; i++) {
1228 ctx.file_offset[i] = 0;
1229 }
1230 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
1231 ctx.file_offset[TGSI_FILE_INPUT] = 1;
1232 if (ctx.bc->chip_class >= EVERGREEN) {
1233 r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
1234 } else {
1235 r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
1236 }
1237 }
1238 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
1239 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
1240 }
1241
1242 /* LLVM backend setup */
1243 #ifdef R600_USE_LLVM
1244 if (use_llvm && ctx.info.indirect_files) {
1245 fprintf(stderr, "Warning: R600 LLVM backend does not support "
1246 "indirect adressing. Falling back to TGSI "
1247 "backend.\n");
1248 use_llvm = 0;
1249 }
1250 if (use_llvm) {
1251 struct radeon_llvm_context radeon_llvm_ctx;
1252 LLVMModuleRef mod;
1253 unsigned dump = 0;
1254 memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx));
1255 radeon_llvm_ctx.reserved_reg_count = ctx.file_offset[TGSI_FILE_INPUT];
1256 mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
1257 if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
1258 dump = 1;
1259 }
1260 if (r600_llvm_compile(mod, &inst_bytes, &inst_byte_count,
1261 rctx->family, dump)) {
1262 FREE(inst_bytes);
1263 radeon_llvm_dispose(&radeon_llvm_ctx);
1264 use_llvm = 0;
1265 fprintf(stderr, "R600 LLVM backend failed to compile "
1266 "shader. Falling back to TGSI\n");
1267 } else {
1268 ctx.file_offset[TGSI_FILE_OUTPUT] =
1269 ctx.file_offset[TGSI_FILE_INPUT];
1270 }
1271 radeon_llvm_dispose(&radeon_llvm_ctx);
1272 }
1273 #endif
1274 /* End of LLVM backend setup */
1275
1276 if (!use_llvm) {
1277 ctx.file_offset[TGSI_FILE_OUTPUT] =
1278 ctx.file_offset[TGSI_FILE_INPUT] +
1279 ctx.info.file_max[TGSI_FILE_INPUT] + 1;
1280 }
1281 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
1282 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
1283
1284 /* Outside the GPR range. This will be translated to one of the
1285 * kcache banks later. */
1286 ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
1287
1288 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
1289 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
1290 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
1291 ctx.temp_reg = ctx.bc->ar_reg + 1;
1292
1293 ctx.nliterals = 0;
1294 ctx.literals = NULL;
1295 shader->fs_write_all = FALSE;
1296 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
1297 tgsi_parse_token(&ctx.parse);
1298 switch (ctx.parse.FullToken.Token.Type) {
1299 case TGSI_TOKEN_TYPE_IMMEDIATE:
1300 immediate = &ctx.parse.FullToken.FullImmediate;
1301 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
1302 if(ctx.literals == NULL) {
1303 r = -ENOMEM;
1304 goto out_err;
1305 }
1306 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
1307 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
1308 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
1309 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
1310 ctx.nliterals++;
1311 break;
1312 case TGSI_TOKEN_TYPE_DECLARATION:
1313 r = tgsi_declaration(&ctx);
1314 if (r)
1315 goto out_err;
1316 break;
1317 case TGSI_TOKEN_TYPE_INSTRUCTION:
1318 break;
1319 case TGSI_TOKEN_TYPE_PROPERTY:
1320 property = &ctx.parse.FullToken.FullProperty;
1321 switch (property->Property.PropertyName) {
1322 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
1323 if (property->u[0].Data == 1)
1324 shader->fs_write_all = TRUE;
1325 break;
1326 case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
1327 if (property->u[0].Data == 1)
1328 shader->vs_prohibit_ucps = TRUE;
1329 break;
1330 }
1331 break;
1332 default:
1333 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
1334 r = -EINVAL;
1335 goto out_err;
1336 }
1337 }
1338
1339 if (shader->fs_write_all && rctx->chip_class >= EVERGREEN)
1340 shader->nr_ps_max_color_exports = 8;
1341
1342 if (ctx.fragcoord_input >= 0) {
1343 if (ctx.bc->chip_class == CAYMAN) {
1344 for (j = 0 ; j < 4; j++) {
1345 struct r600_bytecode_alu alu;
1346 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1347 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1348 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
1349 alu.src[0].chan = 3;
1350
1351 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
1352 alu.dst.chan = j;
1353 alu.dst.write = (j == 3);
1354 alu.last = 1;
1355 if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
1356 return r;
1357 }
1358 } else {
1359 struct r600_bytecode_alu alu;
1360 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1361 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1362 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
1363 alu.src[0].chan = 3;
1364
1365 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
1366 alu.dst.chan = 3;
1367 alu.dst.write = 1;
1368 alu.last = 1;
1369 if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
1370 return r;
1371 }
1372 }
1373
1374 if (shader->two_side && ctx.colors_used) {
1375 if ((r = process_twoside_color_inputs(&ctx)))
1376 return r;
1377 }
1378
1379 tgsi_parse_init(&ctx.parse, tokens);
1380 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
1381 tgsi_parse_token(&ctx.parse);
1382 switch (ctx.parse.FullToken.Token.Type) {
1383 case TGSI_TOKEN_TYPE_INSTRUCTION:
1384 if (use_llvm) {
1385 continue;
1386 }
1387 r = tgsi_is_supported(&ctx);
1388 if (r)
1389 goto out_err;
1390 ctx.max_driver_temp_used = 0;
1391 /* reserve first tmp for everyone */
1392 r600_get_temp(&ctx);
1393
1394 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
1395 if ((r = tgsi_split_constant(&ctx)))
1396 goto out_err;
1397 if ((r = tgsi_split_literal_constant(&ctx)))
1398 goto out_err;
1399 if (ctx.bc->chip_class == CAYMAN)
1400 ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
1401 else if (ctx.bc->chip_class >= EVERGREEN)
1402 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
1403 else
1404 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
1405 r = ctx.inst_info->process(&ctx);
1406 if (r)
1407 goto out_err;
1408 break;
1409 default:
1410 break;
1411 }
1412 }
1413
1414 /* Get instructions if we are using the LLVM backend. */
1415 if (use_llvm) {
1416 r600_bytecode_from_byte_stream(&ctx, inst_bytes, inst_byte_count);
1417 FREE(inst_bytes);
1418 }
1419
1420 noutput = shader->noutput;
1421
1422 if (ctx.clip_vertex_write) {
1423 /* need to convert a clipvertex write into clipdistance writes and not export
1424 the clip vertex anymore */
1425
1426 memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io));
1427 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
1428 shader->output[noutput].gpr = ctx.temp_reg;
1429 noutput++;
1430 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
1431 shader->output[noutput].gpr = ctx.temp_reg+1;
1432 noutput++;
1433
1434 /* reset spi_sid for clipvertex output to avoid confusing spi */
1435 shader->output[ctx.cv_output].spi_sid = 0;
1436
1437 shader->clip_dist_write = 0xFF;
1438
1439 for (i = 0; i < 8; i++) {
1440 int oreg = i >> 2;
1441 int ochan = i & 3;
1442
1443 for (j = 0; j < 4; j++) {
1444 struct r600_bytecode_alu alu;
1445 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1446 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4);
1447 alu.src[0].sel = shader->output[ctx.cv_output].gpr;
1448 alu.src[0].chan = j;
1449
1450 alu.src[1].sel = 512 + i;
1451 alu.src[1].kc_bank = 1;
1452 alu.src[1].chan = j;
1453
1454 alu.dst.sel = ctx.temp_reg + oreg;
1455 alu.dst.chan = j;
1456 alu.dst.write = (j == ochan);
1457 if (j == 3)
1458 alu.last = 1;
1459 r = r600_bytecode_add_alu(ctx.bc, &alu);
1460 if (r)
1461 return r;
1462 }
1463 }
1464 }
1465
1466 /* Add stream outputs. */
1467 if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) {
1468 for (i = 0; i < so.num_outputs; i++) {
1469 struct r600_bytecode_output output;
1470
1471 if (so.output[i].output_buffer >= 4) {
1472 R600_ERR("exceeded the max number of stream output buffers, got: %d\n",
1473 so.output[i].output_buffer);
1474 r = -EINVAL;
1475 goto out_err;
1476 }
1477 if (so.output[i].dst_offset < so.output[i].start_component) {
1478 R600_ERR("stream_output - dst_offset cannot be less than start_component\n");
1479 r = -EINVAL;
1480 goto out_err;
1481 }
1482
1483 memset(&output, 0, sizeof(struct r600_bytecode_output));
1484 output.gpr = shader->output[so.output[i].register_index].gpr;
1485 output.elem_size = 0;
1486 output.array_base = so.output[i].dst_offset - so.output[i].start_component;
1487 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
1488 output.burst_count = 1;
1489 output.barrier = 1;
1490 /* array_size is an upper limit for the burst_count
1491 * with MEM_STREAM instructions */
1492 output.array_size = 0xFFF;
1493 output.comp_mask = ((1 << so.output[i].num_components) - 1) << so.output[i].start_component;
1494 if (ctx.bc->chip_class >= EVERGREEN) {
1495 switch (so.output[i].output_buffer) {
1496 case 0:
1497 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0;
1498 break;
1499 case 1:
1500 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1;
1501 break;
1502 case 2:
1503 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2;
1504 break;
1505 case 3:
1506 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3;
1507 break;
1508 }
1509 } else {
1510 switch (so.output[i].output_buffer) {
1511 case 0:
1512 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0;
1513 break;
1514 case 1:
1515 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1;
1516 break;
1517 case 2:
1518 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2;
1519 break;
1520 case 3:
1521 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3;
1522 break;
1523 }
1524 }
1525 r = r600_bytecode_add_output(ctx.bc, &output);
1526 if (r)
1527 goto out_err;
1528 }
1529 }
1530
1531 /* export output */
1532 for (i = 0, j = 0; i < noutput; i++, j++) {
1533 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1534 output[j].gpr = shader->output[i].gpr;
1535 output[j].elem_size = 3;
1536 output[j].swizzle_x = 0;
1537 output[j].swizzle_y = 1;
1538 output[j].swizzle_z = 2;
1539 output[j].swizzle_w = 3;
1540 output[j].burst_count = 1;
1541 output[j].barrier = 1;
1542 output[j].type = -1;
1543 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1544 switch (ctx.type) {
1545 case TGSI_PROCESSOR_VERTEX:
1546 switch (shader->output[i].name) {
1547 case TGSI_SEMANTIC_POSITION:
1548 output[j].array_base = next_pos_base++;
1549 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1550 break;
1551
1552 case TGSI_SEMANTIC_PSIZE:
1553 output[j].array_base = next_pos_base++;
1554 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1555 break;
1556 case TGSI_SEMANTIC_CLIPVERTEX:
1557 j--;
1558 break;
1559 case TGSI_SEMANTIC_CLIPDIST:
1560 output[j].array_base = next_pos_base++;
1561 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1562 /* spi_sid is 0 for clipdistance outputs that were generated
1563 * for clipvertex - we don't need to pass them to PS */
1564 if (shader->output[i].spi_sid) {
1565 j++;
1566 /* duplicate it as PARAM to pass to the pixel shader */
1567 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
1568 output[j].array_base = next_param_base++;
1569 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1570 }
1571 break;
1572 case TGSI_SEMANTIC_FOG:
1573 output[j].swizzle_y = 4; /* 0 */
1574 output[j].swizzle_z = 4; /* 0 */
1575 output[j].swizzle_w = 5; /* 1 */
1576 break;
1577 }
1578 break;
1579 case TGSI_PROCESSOR_FRAGMENT:
1580 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
1581 /* never export more colors than the number of CBs */
1582 if (next_pixel_base && next_pixel_base >= (rctx->nr_cbufs + rctx->dual_src_blend * 1)) {
1583 /* skip export */
1584 j--;
1585 continue;
1586 }
1587 output[j].swizzle_w = rctx->alpha_to_one && rctx->multisample_enable && !rctx->cb0_is_integer ? 5 : 3;
1588 output[j].array_base = next_pixel_base++;
1589 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1590 shader->nr_ps_color_exports++;
1591 if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) {
1592 for (k = 1; k < rctx->nr_cbufs; k++) {
1593 j++;
1594 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1595 output[j].gpr = shader->output[i].gpr;
1596 output[j].elem_size = 3;
1597 output[j].swizzle_x = 0;
1598 output[j].swizzle_y = 1;
1599 output[j].swizzle_z = 2;
1600 output[j].swizzle_w = rctx->alpha_to_one && rctx->multisample_enable && !rctx->cb0_is_integer ? 5 : 3;
1601 output[j].burst_count = 1;
1602 output[j].barrier = 1;
1603 output[j].array_base = next_pixel_base++;
1604 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1605 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1606 shader->nr_ps_color_exports++;
1607 }
1608 }
1609 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
1610 output[j].array_base = 61;
1611 output[j].swizzle_x = 2;
1612 output[j].swizzle_y = 7;
1613 output[j].swizzle_z = output[j].swizzle_w = 7;
1614 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1615 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
1616 output[j].array_base = 61;
1617 output[j].swizzle_x = 7;
1618 output[j].swizzle_y = 1;
1619 output[j].swizzle_z = output[j].swizzle_w = 7;
1620 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1621 } else {
1622 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
1623 r = -EINVAL;
1624 goto out_err;
1625 }
1626 break;
1627 default:
1628 R600_ERR("unsupported processor type %d\n", ctx.type);
1629 r = -EINVAL;
1630 goto out_err;
1631 }
1632
1633 if (output[j].type==-1) {
1634 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1635 output[j].array_base = next_param_base++;
1636 }
1637 }
1638
1639 /* add fake param output for vertex shader if no param is exported */
1640 if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) {
1641 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1642 output[j].gpr = 0;
1643 output[j].elem_size = 3;
1644 output[j].swizzle_x = 7;
1645 output[j].swizzle_y = 7;
1646 output[j].swizzle_z = 7;
1647 output[j].swizzle_w = 7;
1648 output[j].burst_count = 1;
1649 output[j].barrier = 1;
1650 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1651 output[j].array_base = 0;
1652 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1653 j++;
1654 }
1655
1656 /* add fake pixel export */
1657 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0) {
1658 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1659 output[j].gpr = 0;
1660 output[j].elem_size = 3;
1661 output[j].swizzle_x = 7;
1662 output[j].swizzle_y = 7;
1663 output[j].swizzle_z = 7;
1664 output[j].swizzle_w = 7;
1665 output[j].burst_count = 1;
1666 output[j].barrier = 1;
1667 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1668 output[j].array_base = 0;
1669 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1670 j++;
1671 }
1672
1673 noutput = j;
1674
1675 /* set export done on last export of each type */
1676 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
1677 if (ctx.bc->chip_class < CAYMAN) {
1678 if (i == (noutput - 1)) {
1679 output[i].end_of_program = 1;
1680 }
1681 }
1682 if (!(output_done & (1 << output[i].type))) {
1683 output_done |= (1 << output[i].type);
1684 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
1685 }
1686 }
1687 /* add output to bytecode */
1688 for (i = 0; i < noutput; i++) {
1689 r = r600_bytecode_add_output(ctx.bc, &output[i]);
1690 if (r)
1691 goto out_err;
1692 }
1693 /* add program end */
1694 if (ctx.bc->chip_class == CAYMAN)
1695 cm_bytecode_add_cf_end(ctx.bc);
1696
1697 /* check GPR limit - we have 124 = 128 - 4
1698 * (4 are reserved as alu clause temporary registers) */
1699 if (ctx.bc->ngpr > 124) {
1700 R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr);
1701 r = -ENOMEM;
1702 goto out_err;
1703 }
1704
1705 free(ctx.literals);
1706 tgsi_parse_free(&ctx.parse);
1707 return 0;
1708 out_err:
1709 free(ctx.literals);
1710 tgsi_parse_free(&ctx.parse);
1711 return r;
1712 }
1713
tgsi_unsupported(struct r600_shader_ctx * ctx)1714 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
1715 {
1716 R600_ERR("%s tgsi opcode unsupported\n",
1717 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
1718 return -EINVAL;
1719 }
1720
tgsi_end(struct r600_shader_ctx * ctx)1721 static int tgsi_end(struct r600_shader_ctx *ctx)
1722 {
1723 return 0;
1724 }
1725
r600_bytecode_src(struct r600_bytecode_alu_src * bc_src,const struct r600_shader_src * shader_src,unsigned chan)1726 static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
1727 const struct r600_shader_src *shader_src,
1728 unsigned chan)
1729 {
1730 bc_src->sel = shader_src->sel;
1731 bc_src->chan = shader_src->swizzle[chan];
1732 bc_src->neg = shader_src->neg;
1733 bc_src->abs = shader_src->abs;
1734 bc_src->rel = shader_src->rel;
1735 bc_src->value = shader_src->value[bc_src->chan];
1736 }
1737
r600_bytecode_src_set_abs(struct r600_bytecode_alu_src * bc_src)1738 static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src)
1739 {
1740 bc_src->abs = 1;
1741 bc_src->neg = 0;
1742 }
1743
r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src * bc_src)1744 static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src)
1745 {
1746 bc_src->neg = !bc_src->neg;
1747 }
1748
tgsi_dst(struct r600_shader_ctx * ctx,const struct tgsi_full_dst_register * tgsi_dst,unsigned swizzle,struct r600_bytecode_alu_dst * r600_dst)1749 static void tgsi_dst(struct r600_shader_ctx *ctx,
1750 const struct tgsi_full_dst_register *tgsi_dst,
1751 unsigned swizzle,
1752 struct r600_bytecode_alu_dst *r600_dst)
1753 {
1754 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1755
1756 r600_dst->sel = tgsi_dst->Register.Index;
1757 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
1758 r600_dst->chan = swizzle;
1759 r600_dst->write = 1;
1760 if (tgsi_dst->Register.Indirect)
1761 r600_dst->rel = V_SQ_REL_RELATIVE;
1762 if (inst->Instruction.Saturate) {
1763 r600_dst->clamp = 1;
1764 }
1765 }
1766
tgsi_last_instruction(unsigned writemask)1767 static int tgsi_last_instruction(unsigned writemask)
1768 {
1769 int i, lasti = 0;
1770
1771 for (i = 0; i < 4; i++) {
1772 if (writemask & (1 << i)) {
1773 lasti = i;
1774 }
1775 }
1776 return lasti;
1777 }
1778
tgsi_op2_s(struct r600_shader_ctx * ctx,int swap,int trans_only)1779 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
1780 {
1781 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1782 struct r600_bytecode_alu alu;
1783 int i, j, r;
1784 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1785
1786 for (i = 0; i < lasti + 1; i++) {
1787 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1788 continue;
1789
1790 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1791 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1792
1793 alu.inst = ctx->inst_info->r600_opcode;
1794 if (!swap) {
1795 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1796 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1797 }
1798 } else {
1799 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
1800 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1801 }
1802 /* handle some special cases */
1803 switch (ctx->inst_info->tgsi_opcode) {
1804 case TGSI_OPCODE_SUB:
1805 r600_bytecode_src_toggle_neg(&alu.src[1]);
1806 break;
1807 case TGSI_OPCODE_ABS:
1808 r600_bytecode_src_set_abs(&alu.src[0]);
1809 break;
1810 default:
1811 break;
1812 }
1813 if (i == lasti || trans_only) {
1814 alu.last = 1;
1815 }
1816 r = r600_bytecode_add_alu(ctx->bc, &alu);
1817 if (r)
1818 return r;
1819 }
1820 return 0;
1821 }
1822
tgsi_op2(struct r600_shader_ctx * ctx)1823 static int tgsi_op2(struct r600_shader_ctx *ctx)
1824 {
1825 return tgsi_op2_s(ctx, 0, 0);
1826 }
1827
tgsi_op2_swap(struct r600_shader_ctx * ctx)1828 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1829 {
1830 return tgsi_op2_s(ctx, 1, 0);
1831 }
1832
tgsi_op2_trans(struct r600_shader_ctx * ctx)1833 static int tgsi_op2_trans(struct r600_shader_ctx *ctx)
1834 {
1835 return tgsi_op2_s(ctx, 0, 1);
1836 }
1837
tgsi_ineg(struct r600_shader_ctx * ctx)1838 static int tgsi_ineg(struct r600_shader_ctx *ctx)
1839 {
1840 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1841 struct r600_bytecode_alu alu;
1842 int i, r;
1843 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1844
1845 for (i = 0; i < lasti + 1; i++) {
1846
1847 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1848 continue;
1849 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1850 alu.inst = ctx->inst_info->r600_opcode;
1851
1852 alu.src[0].sel = V_SQ_ALU_SRC_0;
1853
1854 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1855
1856 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1857
1858 if (i == lasti) {
1859 alu.last = 1;
1860 }
1861 r = r600_bytecode_add_alu(ctx->bc, &alu);
1862 if (r)
1863 return r;
1864 }
1865 return 0;
1866
1867 }
1868
cayman_emit_float_instr(struct r600_shader_ctx * ctx)1869 static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
1870 {
1871 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1872 int i, j, r;
1873 struct r600_bytecode_alu alu;
1874 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1875
1876 for (i = 0 ; i < last_slot; i++) {
1877 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1878 alu.inst = ctx->inst_info->r600_opcode;
1879 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1880 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0);
1881
1882 /* RSQ should take the absolute value of src */
1883 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_RSQ) {
1884 r600_bytecode_src_set_abs(&alu.src[j]);
1885 }
1886 }
1887 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1888 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1889
1890 if (i == last_slot - 1)
1891 alu.last = 1;
1892 r = r600_bytecode_add_alu(ctx->bc, &alu);
1893 if (r)
1894 return r;
1895 }
1896 return 0;
1897 }
1898
cayman_mul_int_instr(struct r600_shader_ctx * ctx)1899 static int cayman_mul_int_instr(struct r600_shader_ctx *ctx)
1900 {
1901 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1902 int i, j, k, r;
1903 struct r600_bytecode_alu alu;
1904 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1905 for (k = 0; k < last_slot; k++) {
1906 if (!(inst->Dst[0].Register.WriteMask & (1 << k)))
1907 continue;
1908
1909 for (i = 0 ; i < 4; i++) {
1910 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1911 alu.inst = ctx->inst_info->r600_opcode;
1912 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1913 r600_bytecode_src(&alu.src[j], &ctx->src[j], k);
1914 }
1915 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1916 alu.dst.write = (i == k);
1917 if (i == 3)
1918 alu.last = 1;
1919 r = r600_bytecode_add_alu(ctx->bc, &alu);
1920 if (r)
1921 return r;
1922 }
1923 }
1924 return 0;
1925 }
1926
1927 /*
1928 * r600 - trunc to -PI..PI range
1929 * r700 - normalize by dividing by 2PI
1930 * see fdo bug 27901
1931 */
tgsi_setup_trig(struct r600_shader_ctx * ctx)1932 static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
1933 {
1934 static float half_inv_pi = 1.0 /(3.1415926535 * 2);
1935 static float double_pi = 3.1415926535 * 2;
1936 static float neg_pi = -3.1415926535;
1937
1938 int r;
1939 struct r600_bytecode_alu alu;
1940
1941 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1942 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1943 alu.is_op3 = 1;
1944
1945 alu.dst.chan = 0;
1946 alu.dst.sel = ctx->temp_reg;
1947 alu.dst.write = 1;
1948
1949 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1950
1951 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1952 alu.src[1].chan = 0;
1953 alu.src[1].value = *(uint32_t *)&half_inv_pi;
1954 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1955 alu.src[2].chan = 0;
1956 alu.last = 1;
1957 r = r600_bytecode_add_alu(ctx->bc, &alu);
1958 if (r)
1959 return r;
1960
1961 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1962 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1963
1964 alu.dst.chan = 0;
1965 alu.dst.sel = ctx->temp_reg;
1966 alu.dst.write = 1;
1967
1968 alu.src[0].sel = ctx->temp_reg;
1969 alu.src[0].chan = 0;
1970 alu.last = 1;
1971 r = r600_bytecode_add_alu(ctx->bc, &alu);
1972 if (r)
1973 return r;
1974
1975 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1976 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1977 alu.is_op3 = 1;
1978
1979 alu.dst.chan = 0;
1980 alu.dst.sel = ctx->temp_reg;
1981 alu.dst.write = 1;
1982
1983 alu.src[0].sel = ctx->temp_reg;
1984 alu.src[0].chan = 0;
1985
1986 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1987 alu.src[1].chan = 0;
1988 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1989 alu.src[2].chan = 0;
1990
1991 if (ctx->bc->chip_class == R600) {
1992 alu.src[1].value = *(uint32_t *)&double_pi;
1993 alu.src[2].value = *(uint32_t *)&neg_pi;
1994 } else {
1995 alu.src[1].sel = V_SQ_ALU_SRC_1;
1996 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1997 alu.src[2].neg = 1;
1998 }
1999
2000 alu.last = 1;
2001 r = r600_bytecode_add_alu(ctx->bc, &alu);
2002 if (r)
2003 return r;
2004 return 0;
2005 }
2006
cayman_trig(struct r600_shader_ctx * ctx)2007 static int cayman_trig(struct r600_shader_ctx *ctx)
2008 {
2009 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2010 struct r600_bytecode_alu alu;
2011 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
2012 int i, r;
2013
2014 r = tgsi_setup_trig(ctx);
2015 if (r)
2016 return r;
2017
2018
2019 for (i = 0; i < last_slot; i++) {
2020 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2021 alu.inst = ctx->inst_info->r600_opcode;
2022 alu.dst.chan = i;
2023
2024 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2025 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2026
2027 alu.src[0].sel = ctx->temp_reg;
2028 alu.src[0].chan = 0;
2029 if (i == last_slot - 1)
2030 alu.last = 1;
2031 r = r600_bytecode_add_alu(ctx->bc, &alu);
2032 if (r)
2033 return r;
2034 }
2035 return 0;
2036 }
2037
tgsi_trig(struct r600_shader_ctx * ctx)2038 static int tgsi_trig(struct r600_shader_ctx *ctx)
2039 {
2040 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2041 struct r600_bytecode_alu alu;
2042 int i, r;
2043 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2044
2045 r = tgsi_setup_trig(ctx);
2046 if (r)
2047 return r;
2048
2049 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2050 alu.inst = ctx->inst_info->r600_opcode;
2051 alu.dst.chan = 0;
2052 alu.dst.sel = ctx->temp_reg;
2053 alu.dst.write = 1;
2054
2055 alu.src[0].sel = ctx->temp_reg;
2056 alu.src[0].chan = 0;
2057 alu.last = 1;
2058 r = r600_bytecode_add_alu(ctx->bc, &alu);
2059 if (r)
2060 return r;
2061
2062 /* replicate result */
2063 for (i = 0; i < lasti + 1; i++) {
2064 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2065 continue;
2066
2067 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2068 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2069
2070 alu.src[0].sel = ctx->temp_reg;
2071 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2072 if (i == lasti)
2073 alu.last = 1;
2074 r = r600_bytecode_add_alu(ctx->bc, &alu);
2075 if (r)
2076 return r;
2077 }
2078 return 0;
2079 }
2080
tgsi_scs(struct r600_shader_ctx * ctx)2081 static int tgsi_scs(struct r600_shader_ctx *ctx)
2082 {
2083 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2084 struct r600_bytecode_alu alu;
2085 int i, r;
2086
2087 /* We'll only need the trig stuff if we are going to write to the
2088 * X or Y components of the destination vector.
2089 */
2090 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
2091 r = tgsi_setup_trig(ctx);
2092 if (r)
2093 return r;
2094 }
2095
2096 /* dst.x = COS */
2097 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2098 if (ctx->bc->chip_class == CAYMAN) {
2099 for (i = 0 ; i < 3; i++) {
2100 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2101 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
2102 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2103
2104 if (i == 0)
2105 alu.dst.write = 1;
2106 else
2107 alu.dst.write = 0;
2108 alu.src[0].sel = ctx->temp_reg;
2109 alu.src[0].chan = 0;
2110 if (i == 2)
2111 alu.last = 1;
2112 r = r600_bytecode_add_alu(ctx->bc, &alu);
2113 if (r)
2114 return r;
2115 }
2116 } else {
2117 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2118 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
2119 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
2120
2121 alu.src[0].sel = ctx->temp_reg;
2122 alu.src[0].chan = 0;
2123 alu.last = 1;
2124 r = r600_bytecode_add_alu(ctx->bc, &alu);
2125 if (r)
2126 return r;
2127 }
2128 }
2129
2130 /* dst.y = SIN */
2131 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2132 if (ctx->bc->chip_class == CAYMAN) {
2133 for (i = 0 ; i < 3; i++) {
2134 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2135 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
2136 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2137 if (i == 1)
2138 alu.dst.write = 1;
2139 else
2140 alu.dst.write = 0;
2141 alu.src[0].sel = ctx->temp_reg;
2142 alu.src[0].chan = 0;
2143 if (i == 2)
2144 alu.last = 1;
2145 r = r600_bytecode_add_alu(ctx->bc, &alu);
2146 if (r)
2147 return r;
2148 }
2149 } else {
2150 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2151 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
2152 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
2153
2154 alu.src[0].sel = ctx->temp_reg;
2155 alu.src[0].chan = 0;
2156 alu.last = 1;
2157 r = r600_bytecode_add_alu(ctx->bc, &alu);
2158 if (r)
2159 return r;
2160 }
2161 }
2162
2163 /* dst.z = 0.0; */
2164 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2165 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2166
2167 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2168
2169 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
2170
2171 alu.src[0].sel = V_SQ_ALU_SRC_0;
2172 alu.src[0].chan = 0;
2173
2174 alu.last = 1;
2175
2176 r = r600_bytecode_add_alu(ctx->bc, &alu);
2177 if (r)
2178 return r;
2179 }
2180
2181 /* dst.w = 1.0; */
2182 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2183 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2184
2185 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2186
2187 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
2188
2189 alu.src[0].sel = V_SQ_ALU_SRC_1;
2190 alu.src[0].chan = 0;
2191
2192 alu.last = 1;
2193
2194 r = r600_bytecode_add_alu(ctx->bc, &alu);
2195 if (r)
2196 return r;
2197 }
2198
2199 return 0;
2200 }
2201
tgsi_kill(struct r600_shader_ctx * ctx)2202 static int tgsi_kill(struct r600_shader_ctx *ctx)
2203 {
2204 struct r600_bytecode_alu alu;
2205 int i, r;
2206
2207 for (i = 0; i < 4; i++) {
2208 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2209 alu.inst = ctx->inst_info->r600_opcode;
2210
2211 alu.dst.chan = i;
2212
2213 alu.src[0].sel = V_SQ_ALU_SRC_0;
2214
2215 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
2216 alu.src[1].sel = V_SQ_ALU_SRC_1;
2217 alu.src[1].neg = 1;
2218 } else {
2219 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2220 }
2221 if (i == 3) {
2222 alu.last = 1;
2223 }
2224 r = r600_bytecode_add_alu(ctx->bc, &alu);
2225 if (r)
2226 return r;
2227 }
2228
2229 /* kill must be last in ALU */
2230 ctx->bc->force_add_cf = 1;
2231 ctx->shader->uses_kill = TRUE;
2232 return 0;
2233 }
2234
tgsi_lit(struct r600_shader_ctx * ctx)2235 static int tgsi_lit(struct r600_shader_ctx *ctx)
2236 {
2237 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2238 struct r600_bytecode_alu alu;
2239 int r;
2240
2241 /* tmp.x = max(src.y, 0.0) */
2242 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2243 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
2244 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
2245 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
2246 alu.src[1].chan = 1;
2247
2248 alu.dst.sel = ctx->temp_reg;
2249 alu.dst.chan = 0;
2250 alu.dst.write = 1;
2251
2252 alu.last = 1;
2253 r = r600_bytecode_add_alu(ctx->bc, &alu);
2254 if (r)
2255 return r;
2256
2257 if (inst->Dst[0].Register.WriteMask & (1 << 2))
2258 {
2259 int chan;
2260 int sel;
2261 int i;
2262
2263 if (ctx->bc->chip_class == CAYMAN) {
2264 for (i = 0; i < 3; i++) {
2265 /* tmp.z = log(tmp.x) */
2266 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2267 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
2268 alu.src[0].sel = ctx->temp_reg;
2269 alu.src[0].chan = 0;
2270 alu.dst.sel = ctx->temp_reg;
2271 alu.dst.chan = i;
2272 if (i == 2) {
2273 alu.dst.write = 1;
2274 alu.last = 1;
2275 } else
2276 alu.dst.write = 0;
2277
2278 r = r600_bytecode_add_alu(ctx->bc, &alu);
2279 if (r)
2280 return r;
2281 }
2282 } else {
2283 /* tmp.z = log(tmp.x) */
2284 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2285 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
2286 alu.src[0].sel = ctx->temp_reg;
2287 alu.src[0].chan = 0;
2288 alu.dst.sel = ctx->temp_reg;
2289 alu.dst.chan = 2;
2290 alu.dst.write = 1;
2291 alu.last = 1;
2292 r = r600_bytecode_add_alu(ctx->bc, &alu);
2293 if (r)
2294 return r;
2295 }
2296
2297 chan = alu.dst.chan;
2298 sel = alu.dst.sel;
2299
2300 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
2301 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2302 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
2303 alu.src[0].sel = sel;
2304 alu.src[0].chan = chan;
2305 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3);
2306 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0);
2307 alu.dst.sel = ctx->temp_reg;
2308 alu.dst.chan = 0;
2309 alu.dst.write = 1;
2310 alu.is_op3 = 1;
2311 alu.last = 1;
2312 r = r600_bytecode_add_alu(ctx->bc, &alu);
2313 if (r)
2314 return r;
2315
2316 if (ctx->bc->chip_class == CAYMAN) {
2317 for (i = 0; i < 3; i++) {
2318 /* dst.z = exp(tmp.x) */
2319 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2320 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2321 alu.src[0].sel = ctx->temp_reg;
2322 alu.src[0].chan = 0;
2323 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2324 if (i == 2) {
2325 alu.dst.write = 1;
2326 alu.last = 1;
2327 } else
2328 alu.dst.write = 0;
2329 r = r600_bytecode_add_alu(ctx->bc, &alu);
2330 if (r)
2331 return r;
2332 }
2333 } else {
2334 /* dst.z = exp(tmp.x) */
2335 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2336 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2337 alu.src[0].sel = ctx->temp_reg;
2338 alu.src[0].chan = 0;
2339 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
2340 alu.last = 1;
2341 r = r600_bytecode_add_alu(ctx->bc, &alu);
2342 if (r)
2343 return r;
2344 }
2345 }
2346
2347 /* dst.x, <- 1.0 */
2348 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2349 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2350 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
2351 alu.src[0].chan = 0;
2352 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
2353 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
2354 r = r600_bytecode_add_alu(ctx->bc, &alu);
2355 if (r)
2356 return r;
2357
2358 /* dst.y = max(src.x, 0.0) */
2359 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2360 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
2361 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2362 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
2363 alu.src[1].chan = 0;
2364 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
2365 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
2366 r = r600_bytecode_add_alu(ctx->bc, &alu);
2367 if (r)
2368 return r;
2369
2370 /* dst.w, <- 1.0 */
2371 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2372 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2373 alu.src[0].sel = V_SQ_ALU_SRC_1;
2374 alu.src[0].chan = 0;
2375 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
2376 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
2377 alu.last = 1;
2378 r = r600_bytecode_add_alu(ctx->bc, &alu);
2379 if (r)
2380 return r;
2381
2382 return 0;
2383 }
2384
tgsi_rsq(struct r600_shader_ctx * ctx)2385 static int tgsi_rsq(struct r600_shader_ctx *ctx)
2386 {
2387 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2388 struct r600_bytecode_alu alu;
2389 int i, r;
2390
2391 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2392
2393 /* XXX:
2394 * For state trackers other than OpenGL, we'll want to use
2395 * _RECIPSQRT_IEEE instead.
2396 */
2397 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
2398
2399 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
2400 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
2401 r600_bytecode_src_set_abs(&alu.src[i]);
2402 }
2403 alu.dst.sel = ctx->temp_reg;
2404 alu.dst.write = 1;
2405 alu.last = 1;
2406 r = r600_bytecode_add_alu(ctx->bc, &alu);
2407 if (r)
2408 return r;
2409 /* replicate result */
2410 return tgsi_helper_tempx_replicate(ctx);
2411 }
2412
tgsi_helper_tempx_replicate(struct r600_shader_ctx * ctx)2413 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
2414 {
2415 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2416 struct r600_bytecode_alu alu;
2417 int i, r;
2418
2419 for (i = 0; i < 4; i++) {
2420 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2421 alu.src[0].sel = ctx->temp_reg;
2422 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2423 alu.dst.chan = i;
2424 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2425 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2426 if (i == 3)
2427 alu.last = 1;
2428 r = r600_bytecode_add_alu(ctx->bc, &alu);
2429 if (r)
2430 return r;
2431 }
2432 return 0;
2433 }
2434
tgsi_trans_srcx_replicate(struct r600_shader_ctx * ctx)2435 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
2436 {
2437 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2438 struct r600_bytecode_alu alu;
2439 int i, r;
2440
2441 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2442 alu.inst = ctx->inst_info->r600_opcode;
2443 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
2444 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
2445 }
2446 alu.dst.sel = ctx->temp_reg;
2447 alu.dst.write = 1;
2448 alu.last = 1;
2449 r = r600_bytecode_add_alu(ctx->bc, &alu);
2450 if (r)
2451 return r;
2452 /* replicate result */
2453 return tgsi_helper_tempx_replicate(ctx);
2454 }
2455
cayman_pow(struct r600_shader_ctx * ctx)2456 static int cayman_pow(struct r600_shader_ctx *ctx)
2457 {
2458 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2459 int i, r;
2460 struct r600_bytecode_alu alu;
2461 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
2462
2463 for (i = 0; i < 3; i++) {
2464 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2465 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2466 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2467 alu.dst.sel = ctx->temp_reg;
2468 alu.dst.chan = i;
2469 alu.dst.write = 1;
2470 if (i == 2)
2471 alu.last = 1;
2472 r = r600_bytecode_add_alu(ctx->bc, &alu);
2473 if (r)
2474 return r;
2475 }
2476
2477 /* b * LOG2(a) */
2478 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2479 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2480 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
2481 alu.src[1].sel = ctx->temp_reg;
2482 alu.dst.sel = ctx->temp_reg;
2483 alu.dst.write = 1;
2484 alu.last = 1;
2485 r = r600_bytecode_add_alu(ctx->bc, &alu);
2486 if (r)
2487 return r;
2488
2489 for (i = 0; i < last_slot; i++) {
2490 /* POW(a,b) = EXP2(b * LOG2(a))*/
2491 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2492 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2493 alu.src[0].sel = ctx->temp_reg;
2494
2495 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2496 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2497 if (i == last_slot - 1)
2498 alu.last = 1;
2499 r = r600_bytecode_add_alu(ctx->bc, &alu);
2500 if (r)
2501 return r;
2502 }
2503 return 0;
2504 }
2505
tgsi_pow(struct r600_shader_ctx * ctx)2506 static int tgsi_pow(struct r600_shader_ctx *ctx)
2507 {
2508 struct r600_bytecode_alu alu;
2509 int r;
2510
2511 /* LOG2(a) */
2512 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2513 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2514 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2515 alu.dst.sel = ctx->temp_reg;
2516 alu.dst.write = 1;
2517 alu.last = 1;
2518 r = r600_bytecode_add_alu(ctx->bc, &alu);
2519 if (r)
2520 return r;
2521 /* b * LOG2(a) */
2522 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2523 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2524 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
2525 alu.src[1].sel = ctx->temp_reg;
2526 alu.dst.sel = ctx->temp_reg;
2527 alu.dst.write = 1;
2528 alu.last = 1;
2529 r = r600_bytecode_add_alu(ctx->bc, &alu);
2530 if (r)
2531 return r;
2532 /* POW(a,b) = EXP2(b * LOG2(a))*/
2533 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2534 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2535 alu.src[0].sel = ctx->temp_reg;
2536 alu.dst.sel = ctx->temp_reg;
2537 alu.dst.write = 1;
2538 alu.last = 1;
2539 r = r600_bytecode_add_alu(ctx->bc, &alu);
2540 if (r)
2541 return r;
2542 return tgsi_helper_tempx_replicate(ctx);
2543 }
2544
tgsi_divmod(struct r600_shader_ctx * ctx,int mod,int signed_op)2545 static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op)
2546 {
2547 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2548 struct r600_bytecode_alu alu;
2549 int i, r, j;
2550 unsigned write_mask = inst->Dst[0].Register.WriteMask;
2551 int tmp0 = ctx->temp_reg;
2552 int tmp1 = r600_get_temp(ctx);
2553 int tmp2 = r600_get_temp(ctx);
2554 int tmp3 = r600_get_temp(ctx);
2555 /* Unsigned path:
2556 *
2557 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder
2558 *
2559 * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error
2560 * 2. tmp0.z = lo (tmp0.x * src2)
2561 * 3. tmp0.w = -tmp0.z
2562 * 4. tmp0.y = hi (tmp0.x * src2)
2563 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2))
2564 * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error
2565 * 7. tmp1.x = tmp0.x - tmp0.w
2566 * 8. tmp1.y = tmp0.x + tmp0.w
2567 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x)
2568 * 10. tmp0.z = hi(tmp0.x * src1) = q
2569 * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r
2570 *
2571 * 12. tmp0.w = src1 - tmp0.y = r
2572 * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison)
2573 * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison)
2574 *
2575 * if DIV
2576 *
2577 * 15. tmp1.z = tmp0.z + 1 = q + 1
2578 * 16. tmp1.w = tmp0.z - 1 = q - 1
2579 *
2580 * else MOD
2581 *
2582 * 15. tmp1.z = tmp0.w - src2 = r - src2
2583 * 16. tmp1.w = tmp0.w + src2 = r + src2
2584 *
2585 * endif
2586 *
2587 * 17. tmp1.x = tmp1.x & tmp1.y
2588 *
2589 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z
2590 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z
2591 *
2592 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z
2593 * 20. dst = src2==0 ? MAX_UINT : tmp0.z
2594 *
2595 * Signed path:
2596 *
2597 * Same as unsigned, using abs values of the operands,
2598 * and fixing the sign of the result in the end.
2599 */
2600
2601 for (i = 0; i < 4; i++) {
2602 if (!(write_mask & (1<<i)))
2603 continue;
2604
2605 if (signed_op) {
2606
2607 /* tmp2.x = -src0 */
2608 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2609 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2610
2611 alu.dst.sel = tmp2;
2612 alu.dst.chan = 0;
2613 alu.dst.write = 1;
2614
2615 alu.src[0].sel = V_SQ_ALU_SRC_0;
2616
2617 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2618
2619 alu.last = 1;
2620 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2621 return r;
2622
2623 /* tmp2.y = -src1 */
2624 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2625 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2626
2627 alu.dst.sel = tmp2;
2628 alu.dst.chan = 1;
2629 alu.dst.write = 1;
2630
2631 alu.src[0].sel = V_SQ_ALU_SRC_0;
2632
2633 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2634
2635 alu.last = 1;
2636 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2637 return r;
2638
2639 /* tmp2.z sign bit is set if src0 and src2 signs are different */
2640 /* it will be a sign of the quotient */
2641 if (!mod) {
2642
2643 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2644 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT);
2645
2646 alu.dst.sel = tmp2;
2647 alu.dst.chan = 2;
2648 alu.dst.write = 1;
2649
2650 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2651 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2652
2653 alu.last = 1;
2654 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2655 return r;
2656 }
2657
2658 /* tmp2.x = |src0| */
2659 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2660 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2661 alu.is_op3 = 1;
2662
2663 alu.dst.sel = tmp2;
2664 alu.dst.chan = 0;
2665 alu.dst.write = 1;
2666
2667 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2668 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2669 alu.src[2].sel = tmp2;
2670 alu.src[2].chan = 0;
2671
2672 alu.last = 1;
2673 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2674 return r;
2675
2676 /* tmp2.y = |src1| */
2677 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2678 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2679 alu.is_op3 = 1;
2680
2681 alu.dst.sel = tmp2;
2682 alu.dst.chan = 1;
2683 alu.dst.write = 1;
2684
2685 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2686 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2687 alu.src[2].sel = tmp2;
2688 alu.src[2].chan = 1;
2689
2690 alu.last = 1;
2691 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2692 return r;
2693
2694 }
2695
2696 /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */
2697 if (ctx->bc->chip_class == CAYMAN) {
2698 /* tmp3.x = u2f(src2) */
2699 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2700 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT);
2701
2702 alu.dst.sel = tmp3;
2703 alu.dst.chan = 0;
2704 alu.dst.write = 1;
2705
2706 if (signed_op) {
2707 alu.src[0].sel = tmp2;
2708 alu.src[0].chan = 1;
2709 } else {
2710 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2711 }
2712
2713 alu.last = 1;
2714 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2715 return r;
2716
2717 /* tmp0.x = recip(tmp3.x) */
2718 for (j = 0 ; j < 3; j++) {
2719 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2720 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
2721
2722 alu.dst.sel = tmp0;
2723 alu.dst.chan = j;
2724 alu.dst.write = (j == 0);
2725
2726 alu.src[0].sel = tmp3;
2727 alu.src[0].chan = 0;
2728
2729 if (j == 2)
2730 alu.last = 1;
2731 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2732 return r;
2733 }
2734
2735 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2736 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2737
2738 alu.src[0].sel = tmp0;
2739 alu.src[0].chan = 0;
2740
2741 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2742 alu.src[1].value = 0x4f800000;
2743
2744 alu.dst.sel = tmp3;
2745 alu.dst.write = 1;
2746 alu.last = 1;
2747 r = r600_bytecode_add_alu(ctx->bc, &alu);
2748 if (r)
2749 return r;
2750
2751 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2752 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT);
2753
2754 alu.dst.sel = tmp0;
2755 alu.dst.chan = 0;
2756 alu.dst.write = 1;
2757
2758 alu.src[0].sel = tmp3;
2759 alu.src[0].chan = 0;
2760
2761 alu.last = 1;
2762 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2763 return r;
2764
2765 } else {
2766 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2767 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT);
2768
2769 alu.dst.sel = tmp0;
2770 alu.dst.chan = 0;
2771 alu.dst.write = 1;
2772
2773 if (signed_op) {
2774 alu.src[0].sel = tmp2;
2775 alu.src[0].chan = 1;
2776 } else {
2777 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2778 }
2779
2780 alu.last = 1;
2781 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2782 return r;
2783 }
2784
2785 /* 2. tmp0.z = lo (tmp0.x * src2) */
2786 if (ctx->bc->chip_class == CAYMAN) {
2787 for (j = 0 ; j < 4; j++) {
2788 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2789 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
2790
2791 alu.dst.sel = tmp0;
2792 alu.dst.chan = j;
2793 alu.dst.write = (j == 2);
2794
2795 alu.src[0].sel = tmp0;
2796 alu.src[0].chan = 0;
2797 if (signed_op) {
2798 alu.src[1].sel = tmp2;
2799 alu.src[1].chan = 1;
2800 } else {
2801 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2802 }
2803
2804 alu.last = (j == 3);
2805 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2806 return r;
2807 }
2808 } else {
2809 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2810 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
2811
2812 alu.dst.sel = tmp0;
2813 alu.dst.chan = 2;
2814 alu.dst.write = 1;
2815
2816 alu.src[0].sel = tmp0;
2817 alu.src[0].chan = 0;
2818 if (signed_op) {
2819 alu.src[1].sel = tmp2;
2820 alu.src[1].chan = 1;
2821 } else {
2822 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2823 }
2824
2825 alu.last = 1;
2826 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2827 return r;
2828 }
2829
2830 /* 3. tmp0.w = -tmp0.z */
2831 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2832 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2833
2834 alu.dst.sel = tmp0;
2835 alu.dst.chan = 3;
2836 alu.dst.write = 1;
2837
2838 alu.src[0].sel = V_SQ_ALU_SRC_0;
2839 alu.src[1].sel = tmp0;
2840 alu.src[1].chan = 2;
2841
2842 alu.last = 1;
2843 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2844 return r;
2845
2846 /* 4. tmp0.y = hi (tmp0.x * src2) */
2847 if (ctx->bc->chip_class == CAYMAN) {
2848 for (j = 0 ; j < 4; j++) {
2849 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2850 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2851
2852 alu.dst.sel = tmp0;
2853 alu.dst.chan = j;
2854 alu.dst.write = (j == 1);
2855
2856 alu.src[0].sel = tmp0;
2857 alu.src[0].chan = 0;
2858
2859 if (signed_op) {
2860 alu.src[1].sel = tmp2;
2861 alu.src[1].chan = 1;
2862 } else {
2863 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2864 }
2865 alu.last = (j == 3);
2866 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2867 return r;
2868 }
2869 } else {
2870 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2871 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2872
2873 alu.dst.sel = tmp0;
2874 alu.dst.chan = 1;
2875 alu.dst.write = 1;
2876
2877 alu.src[0].sel = tmp0;
2878 alu.src[0].chan = 0;
2879
2880 if (signed_op) {
2881 alu.src[1].sel = tmp2;
2882 alu.src[1].chan = 1;
2883 } else {
2884 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2885 }
2886
2887 alu.last = 1;
2888 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2889 return r;
2890 }
2891
2892 /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */
2893 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2894 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
2895 alu.is_op3 = 1;
2896
2897 alu.dst.sel = tmp0;
2898 alu.dst.chan = 2;
2899 alu.dst.write = 1;
2900
2901 alu.src[0].sel = tmp0;
2902 alu.src[0].chan = 1;
2903 alu.src[1].sel = tmp0;
2904 alu.src[1].chan = 3;
2905 alu.src[2].sel = tmp0;
2906 alu.src[2].chan = 2;
2907
2908 alu.last = 1;
2909 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2910 return r;
2911
2912 /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */
2913 if (ctx->bc->chip_class == CAYMAN) {
2914 for (j = 0 ; j < 4; j++) {
2915 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2916 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2917
2918 alu.dst.sel = tmp0;
2919 alu.dst.chan = j;
2920 alu.dst.write = (j == 3);
2921
2922 alu.src[0].sel = tmp0;
2923 alu.src[0].chan = 2;
2924
2925 alu.src[1].sel = tmp0;
2926 alu.src[1].chan = 0;
2927
2928 alu.last = (j == 3);
2929 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2930 return r;
2931 }
2932 } else {
2933 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2934 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2935
2936 alu.dst.sel = tmp0;
2937 alu.dst.chan = 3;
2938 alu.dst.write = 1;
2939
2940 alu.src[0].sel = tmp0;
2941 alu.src[0].chan = 2;
2942
2943 alu.src[1].sel = tmp0;
2944 alu.src[1].chan = 0;
2945
2946 alu.last = 1;
2947 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2948 return r;
2949 }
2950
2951 /* 7. tmp1.x = tmp0.x - tmp0.w */
2952 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2953 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2954
2955 alu.dst.sel = tmp1;
2956 alu.dst.chan = 0;
2957 alu.dst.write = 1;
2958
2959 alu.src[0].sel = tmp0;
2960 alu.src[0].chan = 0;
2961 alu.src[1].sel = tmp0;
2962 alu.src[1].chan = 3;
2963
2964 alu.last = 1;
2965 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2966 return r;
2967
2968 /* 8. tmp1.y = tmp0.x + tmp0.w */
2969 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2970 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
2971
2972 alu.dst.sel = tmp1;
2973 alu.dst.chan = 1;
2974 alu.dst.write = 1;
2975
2976 alu.src[0].sel = tmp0;
2977 alu.src[0].chan = 0;
2978 alu.src[1].sel = tmp0;
2979 alu.src[1].chan = 3;
2980
2981 alu.last = 1;
2982 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2983 return r;
2984
2985 /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */
2986 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2987 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
2988 alu.is_op3 = 1;
2989
2990 alu.dst.sel = tmp0;
2991 alu.dst.chan = 0;
2992 alu.dst.write = 1;
2993
2994 alu.src[0].sel = tmp0;
2995 alu.src[0].chan = 1;
2996 alu.src[1].sel = tmp1;
2997 alu.src[1].chan = 1;
2998 alu.src[2].sel = tmp1;
2999 alu.src[2].chan = 0;
3000
3001 alu.last = 1;
3002 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3003 return r;
3004
3005 /* 10. tmp0.z = hi(tmp0.x * src1) = q */
3006 if (ctx->bc->chip_class == CAYMAN) {
3007 for (j = 0 ; j < 4; j++) {
3008 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3009 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
3010
3011 alu.dst.sel = tmp0;
3012 alu.dst.chan = j;
3013 alu.dst.write = (j == 2);
3014
3015 alu.src[0].sel = tmp0;
3016 alu.src[0].chan = 0;
3017
3018 if (signed_op) {
3019 alu.src[1].sel = tmp2;
3020 alu.src[1].chan = 0;
3021 } else {
3022 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3023 }
3024
3025 alu.last = (j == 3);
3026 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3027 return r;
3028 }
3029 } else {
3030 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3031 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
3032
3033 alu.dst.sel = tmp0;
3034 alu.dst.chan = 2;
3035 alu.dst.write = 1;
3036
3037 alu.src[0].sel = tmp0;
3038 alu.src[0].chan = 0;
3039
3040 if (signed_op) {
3041 alu.src[1].sel = tmp2;
3042 alu.src[1].chan = 0;
3043 } else {
3044 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3045 }
3046
3047 alu.last = 1;
3048 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3049 return r;
3050 }
3051
3052 /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */
3053 if (ctx->bc->chip_class == CAYMAN) {
3054 for (j = 0 ; j < 4; j++) {
3055 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3056 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
3057
3058 alu.dst.sel = tmp0;
3059 alu.dst.chan = j;
3060 alu.dst.write = (j == 1);
3061
3062 if (signed_op) {
3063 alu.src[0].sel = tmp2;
3064 alu.src[0].chan = 1;
3065 } else {
3066 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
3067 }
3068
3069 alu.src[1].sel = tmp0;
3070 alu.src[1].chan = 2;
3071
3072 alu.last = (j == 3);
3073 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3074 return r;
3075 }
3076 } else {
3077 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3078 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
3079
3080 alu.dst.sel = tmp0;
3081 alu.dst.chan = 1;
3082 alu.dst.write = 1;
3083
3084 if (signed_op) {
3085 alu.src[0].sel = tmp2;
3086 alu.src[0].chan = 1;
3087 } else {
3088 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
3089 }
3090
3091 alu.src[1].sel = tmp0;
3092 alu.src[1].chan = 2;
3093
3094 alu.last = 1;
3095 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3096 return r;
3097 }
3098
3099 /* 12. tmp0.w = src1 - tmp0.y = r */
3100 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3101 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3102
3103 alu.dst.sel = tmp0;
3104 alu.dst.chan = 3;
3105 alu.dst.write = 1;
3106
3107 if (signed_op) {
3108 alu.src[0].sel = tmp2;
3109 alu.src[0].chan = 0;
3110 } else {
3111 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3112 }
3113
3114 alu.src[1].sel = tmp0;
3115 alu.src[1].chan = 1;
3116
3117 alu.last = 1;
3118 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3119 return r;
3120
3121 /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */
3122 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3123 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT);
3124
3125 alu.dst.sel = tmp1;
3126 alu.dst.chan = 0;
3127 alu.dst.write = 1;
3128
3129 alu.src[0].sel = tmp0;
3130 alu.src[0].chan = 3;
3131 if (signed_op) {
3132 alu.src[1].sel = tmp2;
3133 alu.src[1].chan = 1;
3134 } else {
3135 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3136 }
3137
3138 alu.last = 1;
3139 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3140 return r;
3141
3142 /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */
3143 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3144 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT);
3145
3146 alu.dst.sel = tmp1;
3147 alu.dst.chan = 1;
3148 alu.dst.write = 1;
3149
3150 if (signed_op) {
3151 alu.src[0].sel = tmp2;
3152 alu.src[0].chan = 0;
3153 } else {
3154 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3155 }
3156
3157 alu.src[1].sel = tmp0;
3158 alu.src[1].chan = 1;
3159
3160 alu.last = 1;
3161 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3162 return r;
3163
3164 if (mod) { /* UMOD */
3165
3166 /* 15. tmp1.z = tmp0.w - src2 = r - src2 */
3167 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3168 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3169
3170 alu.dst.sel = tmp1;
3171 alu.dst.chan = 2;
3172 alu.dst.write = 1;
3173
3174 alu.src[0].sel = tmp0;
3175 alu.src[0].chan = 3;
3176
3177 if (signed_op) {
3178 alu.src[1].sel = tmp2;
3179 alu.src[1].chan = 1;
3180 } else {
3181 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3182 }
3183
3184 alu.last = 1;
3185 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3186 return r;
3187
3188 /* 16. tmp1.w = tmp0.w + src2 = r + src2 */
3189 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3190 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3191
3192 alu.dst.sel = tmp1;
3193 alu.dst.chan = 3;
3194 alu.dst.write = 1;
3195
3196 alu.src[0].sel = tmp0;
3197 alu.src[0].chan = 3;
3198 if (signed_op) {
3199 alu.src[1].sel = tmp2;
3200 alu.src[1].chan = 1;
3201 } else {
3202 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3203 }
3204
3205 alu.last = 1;
3206 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3207 return r;
3208
3209 } else { /* UDIV */
3210
3211 /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */
3212 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3213 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3214
3215 alu.dst.sel = tmp1;
3216 alu.dst.chan = 2;
3217 alu.dst.write = 1;
3218
3219 alu.src[0].sel = tmp0;
3220 alu.src[0].chan = 2;
3221 alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
3222
3223 alu.last = 1;
3224 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3225 return r;
3226
3227 /* 16. tmp1.w = tmp0.z - 1 = q - 1 */
3228 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3229 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3230
3231 alu.dst.sel = tmp1;
3232 alu.dst.chan = 3;
3233 alu.dst.write = 1;
3234
3235 alu.src[0].sel = tmp0;
3236 alu.src[0].chan = 2;
3237 alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT;
3238
3239 alu.last = 1;
3240 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3241 return r;
3242
3243 }
3244
3245 /* 17. tmp1.x = tmp1.x & tmp1.y */
3246 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3247 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT);
3248
3249 alu.dst.sel = tmp1;
3250 alu.dst.chan = 0;
3251 alu.dst.write = 1;
3252
3253 alu.src[0].sel = tmp1;
3254 alu.src[0].chan = 0;
3255 alu.src[1].sel = tmp1;
3256 alu.src[1].chan = 1;
3257
3258 alu.last = 1;
3259 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3260 return r;
3261
3262 /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */
3263 /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */
3264 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3265 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
3266 alu.is_op3 = 1;
3267
3268 alu.dst.sel = tmp0;
3269 alu.dst.chan = 2;
3270 alu.dst.write = 1;
3271
3272 alu.src[0].sel = tmp1;
3273 alu.src[0].chan = 0;
3274 alu.src[1].sel = tmp0;
3275 alu.src[1].chan = mod ? 3 : 2;
3276 alu.src[2].sel = tmp1;
3277 alu.src[2].chan = 2;
3278
3279 alu.last = 1;
3280 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3281 return r;
3282
3283 /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */
3284 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3285 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
3286 alu.is_op3 = 1;
3287
3288 if (signed_op) {
3289 alu.dst.sel = tmp0;
3290 alu.dst.chan = 2;
3291 alu.dst.write = 1;
3292 } else {
3293 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3294 }
3295
3296 alu.src[0].sel = tmp1;
3297 alu.src[0].chan = 1;
3298 alu.src[1].sel = tmp1;
3299 alu.src[1].chan = 3;
3300 alu.src[2].sel = tmp0;
3301 alu.src[2].chan = 2;
3302
3303 alu.last = 1;
3304 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3305 return r;
3306
3307 if (signed_op) {
3308
3309 /* fix the sign of the result */
3310
3311 if (mod) {
3312
3313 /* tmp0.x = -tmp0.z */
3314 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3315 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3316
3317 alu.dst.sel = tmp0;
3318 alu.dst.chan = 0;
3319 alu.dst.write = 1;
3320
3321 alu.src[0].sel = V_SQ_ALU_SRC_0;
3322 alu.src[1].sel = tmp0;
3323 alu.src[1].chan = 2;
3324
3325 alu.last = 1;
3326 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3327 return r;
3328
3329 /* sign of the remainder is the same as the sign of src0 */
3330 /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */
3331 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3332 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3333 alu.is_op3 = 1;
3334
3335 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3336
3337 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3338 alu.src[1].sel = tmp0;
3339 alu.src[1].chan = 2;
3340 alu.src[2].sel = tmp0;
3341 alu.src[2].chan = 0;
3342
3343 alu.last = 1;
3344 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3345 return r;
3346
3347 } else {
3348
3349 /* tmp0.x = -tmp0.z */
3350 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3351 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3352
3353 alu.dst.sel = tmp0;
3354 alu.dst.chan = 0;
3355 alu.dst.write = 1;
3356
3357 alu.src[0].sel = V_SQ_ALU_SRC_0;
3358 alu.src[1].sel = tmp0;
3359 alu.src[1].chan = 2;
3360
3361 alu.last = 1;
3362 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3363 return r;
3364
3365 /* fix the quotient sign (same as the sign of src0*src1) */
3366 /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */
3367 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3368 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3369 alu.is_op3 = 1;
3370
3371 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3372
3373 alu.src[0].sel = tmp2;
3374 alu.src[0].chan = 2;
3375 alu.src[1].sel = tmp0;
3376 alu.src[1].chan = 2;
3377 alu.src[2].sel = tmp0;
3378 alu.src[2].chan = 0;
3379
3380 alu.last = 1;
3381 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3382 return r;
3383 }
3384 }
3385 }
3386 return 0;
3387 }
3388
tgsi_udiv(struct r600_shader_ctx * ctx)3389 static int tgsi_udiv(struct r600_shader_ctx *ctx)
3390 {
3391 return tgsi_divmod(ctx, 0, 0);
3392 }
3393
tgsi_umod(struct r600_shader_ctx * ctx)3394 static int tgsi_umod(struct r600_shader_ctx *ctx)
3395 {
3396 return tgsi_divmod(ctx, 1, 0);
3397 }
3398
tgsi_idiv(struct r600_shader_ctx * ctx)3399 static int tgsi_idiv(struct r600_shader_ctx *ctx)
3400 {
3401 return tgsi_divmod(ctx, 0, 1);
3402 }
3403
tgsi_imod(struct r600_shader_ctx * ctx)3404 static int tgsi_imod(struct r600_shader_ctx *ctx)
3405 {
3406 return tgsi_divmod(ctx, 1, 1);
3407 }
3408
3409
tgsi_f2i(struct r600_shader_ctx * ctx)3410 static int tgsi_f2i(struct r600_shader_ctx *ctx)
3411 {
3412 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3413 struct r600_bytecode_alu alu;
3414 int i, r;
3415 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3416 int last_inst = tgsi_last_instruction(write_mask);
3417
3418 for (i = 0; i < 4; i++) {
3419 if (!(write_mask & (1<<i)))
3420 continue;
3421
3422 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3423 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC);
3424
3425 alu.dst.sel = ctx->temp_reg;
3426 alu.dst.chan = i;
3427 alu.dst.write = 1;
3428
3429 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3430 if (i == last_inst)
3431 alu.last = 1;
3432 r = r600_bytecode_add_alu(ctx->bc, &alu);
3433 if (r)
3434 return r;
3435 }
3436
3437 for (i = 0; i < 4; i++) {
3438 if (!(write_mask & (1<<i)))
3439 continue;
3440
3441 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3442 alu.inst = ctx->inst_info->r600_opcode;
3443
3444 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3445
3446 alu.src[0].sel = ctx->temp_reg;
3447 alu.src[0].chan = i;
3448
3449 if (i == last_inst || alu.inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT)
3450 alu.last = 1;
3451 r = r600_bytecode_add_alu(ctx->bc, &alu);
3452 if (r)
3453 return r;
3454 }
3455
3456 return 0;
3457 }
3458
tgsi_iabs(struct r600_shader_ctx * ctx)3459 static int tgsi_iabs(struct r600_shader_ctx *ctx)
3460 {
3461 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3462 struct r600_bytecode_alu alu;
3463 int i, r;
3464 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3465 int last_inst = tgsi_last_instruction(write_mask);
3466
3467 /* tmp = -src */
3468 for (i = 0; i < 4; i++) {
3469 if (!(write_mask & (1<<i)))
3470 continue;
3471
3472 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3473 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3474
3475 alu.dst.sel = ctx->temp_reg;
3476 alu.dst.chan = i;
3477 alu.dst.write = 1;
3478
3479 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3480 alu.src[0].sel = V_SQ_ALU_SRC_0;
3481
3482 if (i == last_inst)
3483 alu.last = 1;
3484 r = r600_bytecode_add_alu(ctx->bc, &alu);
3485 if (r)
3486 return r;
3487 }
3488
3489 /* dst = (src >= 0 ? src : tmp) */
3490 for (i = 0; i < 4; i++) {
3491 if (!(write_mask & (1<<i)))
3492 continue;
3493
3494 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3495 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3496 alu.is_op3 = 1;
3497 alu.dst.write = 1;
3498
3499 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3500
3501 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3502 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3503 alu.src[2].sel = ctx->temp_reg;
3504 alu.src[2].chan = i;
3505
3506 if (i == last_inst)
3507 alu.last = 1;
3508 r = r600_bytecode_add_alu(ctx->bc, &alu);
3509 if (r)
3510 return r;
3511 }
3512 return 0;
3513 }
3514
tgsi_issg(struct r600_shader_ctx * ctx)3515 static int tgsi_issg(struct r600_shader_ctx *ctx)
3516 {
3517 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3518 struct r600_bytecode_alu alu;
3519 int i, r;
3520 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3521 int last_inst = tgsi_last_instruction(write_mask);
3522
3523 /* tmp = (src >= 0 ? src : -1) */
3524 for (i = 0; i < 4; i++) {
3525 if (!(write_mask & (1<<i)))
3526 continue;
3527
3528 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3529 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3530 alu.is_op3 = 1;
3531
3532 alu.dst.sel = ctx->temp_reg;
3533 alu.dst.chan = i;
3534 alu.dst.write = 1;
3535
3536 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3537 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3538 alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT;
3539
3540 if (i == last_inst)
3541 alu.last = 1;
3542 r = r600_bytecode_add_alu(ctx->bc, &alu);
3543 if (r)
3544 return r;
3545 }
3546
3547 /* dst = (tmp > 0 ? 1 : tmp) */
3548 for (i = 0; i < 4; i++) {
3549 if (!(write_mask & (1<<i)))
3550 continue;
3551
3552 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3553 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT);
3554 alu.is_op3 = 1;
3555 alu.dst.write = 1;
3556
3557 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3558
3559 alu.src[0].sel = ctx->temp_reg;
3560 alu.src[0].chan = i;
3561
3562 alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
3563
3564 alu.src[2].sel = ctx->temp_reg;
3565 alu.src[2].chan = i;
3566
3567 if (i == last_inst)
3568 alu.last = 1;
3569 r = r600_bytecode_add_alu(ctx->bc, &alu);
3570 if (r)
3571 return r;
3572 }
3573 return 0;
3574 }
3575
3576
3577
tgsi_ssg(struct r600_shader_ctx * ctx)3578 static int tgsi_ssg(struct r600_shader_ctx *ctx)
3579 {
3580 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3581 struct r600_bytecode_alu alu;
3582 int i, r;
3583
3584 /* tmp = (src > 0 ? 1 : src) */
3585 for (i = 0; i < 4; i++) {
3586 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3587 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
3588 alu.is_op3 = 1;
3589
3590 alu.dst.sel = ctx->temp_reg;
3591 alu.dst.chan = i;
3592
3593 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3594 alu.src[1].sel = V_SQ_ALU_SRC_1;
3595 r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
3596
3597 if (i == 3)
3598 alu.last = 1;
3599 r = r600_bytecode_add_alu(ctx->bc, &alu);
3600 if (r)
3601 return r;
3602 }
3603
3604 /* dst = (-tmp > 0 ? -1 : tmp) */
3605 for (i = 0; i < 4; i++) {
3606 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3607 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
3608 alu.is_op3 = 1;
3609 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3610
3611 alu.src[0].sel = ctx->temp_reg;
3612 alu.src[0].chan = i;
3613 alu.src[0].neg = 1;
3614
3615 alu.src[1].sel = V_SQ_ALU_SRC_1;
3616 alu.src[1].neg = 1;
3617
3618 alu.src[2].sel = ctx->temp_reg;
3619 alu.src[2].chan = i;
3620
3621 if (i == 3)
3622 alu.last = 1;
3623 r = r600_bytecode_add_alu(ctx->bc, &alu);
3624 if (r)
3625 return r;
3626 }
3627 return 0;
3628 }
3629
tgsi_helper_copy(struct r600_shader_ctx * ctx,struct tgsi_full_instruction * inst)3630 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
3631 {
3632 struct r600_bytecode_alu alu;
3633 int i, r;
3634
3635 for (i = 0; i < 4; i++) {
3636 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3637 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
3638 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
3639 alu.dst.chan = i;
3640 } else {
3641 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3642 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3643 alu.src[0].sel = ctx->temp_reg;
3644 alu.src[0].chan = i;
3645 }
3646 if (i == 3) {
3647 alu.last = 1;
3648 }
3649 r = r600_bytecode_add_alu(ctx->bc, &alu);
3650 if (r)
3651 return r;
3652 }
3653 return 0;
3654 }
3655
tgsi_op3(struct r600_shader_ctx * ctx)3656 static int tgsi_op3(struct r600_shader_ctx *ctx)
3657 {
3658 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3659 struct r600_bytecode_alu alu;
3660 int i, j, r;
3661 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
3662
3663 for (i = 0; i < lasti + 1; i++) {
3664 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3665 continue;
3666
3667 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3668 alu.inst = ctx->inst_info->r600_opcode;
3669 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
3670 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3671 }
3672
3673 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3674 alu.dst.chan = i;
3675 alu.dst.write = 1;
3676 alu.is_op3 = 1;
3677 if (i == lasti) {
3678 alu.last = 1;
3679 }
3680 r = r600_bytecode_add_alu(ctx->bc, &alu);
3681 if (r)
3682 return r;
3683 }
3684 return 0;
3685 }
3686
tgsi_dp(struct r600_shader_ctx * ctx)3687 static int tgsi_dp(struct r600_shader_ctx *ctx)
3688 {
3689 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3690 struct r600_bytecode_alu alu;
3691 int i, j, r;
3692
3693 for (i = 0; i < 4; i++) {
3694 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3695 alu.inst = ctx->inst_info->r600_opcode;
3696 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
3697 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3698 }
3699
3700 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3701 alu.dst.chan = i;
3702 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
3703 /* handle some special cases */
3704 switch (ctx->inst_info->tgsi_opcode) {
3705 case TGSI_OPCODE_DP2:
3706 if (i > 1) {
3707 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
3708 alu.src[0].chan = alu.src[1].chan = 0;
3709 }
3710 break;
3711 case TGSI_OPCODE_DP3:
3712 if (i > 2) {
3713 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
3714 alu.src[0].chan = alu.src[1].chan = 0;
3715 }
3716 break;
3717 case TGSI_OPCODE_DPH:
3718 if (i == 3) {
3719 alu.src[0].sel = V_SQ_ALU_SRC_1;
3720 alu.src[0].chan = 0;
3721 alu.src[0].neg = 0;
3722 }
3723 break;
3724 default:
3725 break;
3726 }
3727 if (i == 3) {
3728 alu.last = 1;
3729 }
3730 r = r600_bytecode_add_alu(ctx->bc, &alu);
3731 if (r)
3732 return r;
3733 }
3734 return 0;
3735 }
3736
tgsi_tex_src_requires_loading(struct r600_shader_ctx * ctx,unsigned index)3737 static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
3738 unsigned index)
3739 {
3740 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3741 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
3742 inst->Src[index].Register.File != TGSI_FILE_INPUT &&
3743 inst->Src[index].Register.File != TGSI_FILE_OUTPUT) ||
3744 ctx->src[index].neg || ctx->src[index].abs;
3745 }
3746
tgsi_tex_get_src_gpr(struct r600_shader_ctx * ctx,unsigned index)3747 static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
3748 unsigned index)
3749 {
3750 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3751 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
3752 }
3753
tgsi_tex(struct r600_shader_ctx * ctx)3754 static int tgsi_tex(struct r600_shader_ctx *ctx)
3755 {
3756 static float one_point_five = 1.5f;
3757 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3758 struct r600_bytecode_tex tex;
3759 struct r600_bytecode_alu alu;
3760 unsigned src_gpr;
3761 int r, i, j;
3762 int opcode;
3763 /* Texture fetch instructions can only use gprs as source.
3764 * Also they cannot negate the source or take the absolute value */
3765 const boolean src_requires_loading = inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ &&
3766 tgsi_tex_src_requires_loading(ctx, 0);
3767 boolean src_loaded = FALSE;
3768 unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1;
3769 uint8_t offset_x = 0, offset_y = 0, offset_z = 0;
3770
3771 src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
3772
3773 if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
3774 /* get offset values */
3775 if (inst->Texture.NumOffsets) {
3776 assert(inst->Texture.NumOffsets == 1);
3777
3778 offset_x = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
3779 offset_y = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
3780 offset_z = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
3781 }
3782 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
3783 /* TGSI moves the sampler to src reg 3 for TXD */
3784 sampler_src_reg = 3;
3785
3786 for (i = 1; i < 3; i++) {
3787 /* set gradients h/v */
3788 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
3789 tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H :
3790 SQ_TEX_INST_SET_GRADIENTS_V;
3791 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
3792 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
3793
3794 if (tgsi_tex_src_requires_loading(ctx, i)) {
3795 tex.src_gpr = r600_get_temp(ctx);
3796 tex.src_sel_x = 0;
3797 tex.src_sel_y = 1;
3798 tex.src_sel_z = 2;
3799 tex.src_sel_w = 3;
3800
3801 for (j = 0; j < 4; j++) {
3802 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3803 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3804 r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
3805 alu.dst.sel = tex.src_gpr;
3806 alu.dst.chan = j;
3807 if (j == 3)
3808 alu.last = 1;
3809 alu.dst.write = 1;
3810 r = r600_bytecode_add_alu(ctx->bc, &alu);
3811 if (r)
3812 return r;
3813 }
3814
3815 } else {
3816 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i);
3817 tex.src_sel_x = ctx->src[i].swizzle[0];
3818 tex.src_sel_y = ctx->src[i].swizzle[1];
3819 tex.src_sel_z = ctx->src[i].swizzle[2];
3820 tex.src_sel_w = ctx->src[i].swizzle[3];
3821 tex.src_rel = ctx->src[i].rel;
3822 }
3823 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */
3824 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
3825 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
3826 tex.coord_type_x = 1;
3827 tex.coord_type_y = 1;
3828 tex.coord_type_z = 1;
3829 tex.coord_type_w = 1;
3830 }
3831 r = r600_bytecode_add_tex(ctx->bc, &tex);
3832 if (r)
3833 return r;
3834 }
3835 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
3836 int out_chan;
3837 /* Add perspective divide */
3838 if (ctx->bc->chip_class == CAYMAN) {
3839 out_chan = 2;
3840 for (i = 0; i < 3; i++) {
3841 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3842 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3843 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
3844
3845 alu.dst.sel = ctx->temp_reg;
3846 alu.dst.chan = i;
3847 if (i == 2)
3848 alu.last = 1;
3849 if (out_chan == i)
3850 alu.dst.write = 1;
3851 r = r600_bytecode_add_alu(ctx->bc, &alu);
3852 if (r)
3853 return r;
3854 }
3855
3856 } else {
3857 out_chan = 3;
3858 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3859 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3860 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
3861
3862 alu.dst.sel = ctx->temp_reg;
3863 alu.dst.chan = out_chan;
3864 alu.last = 1;
3865 alu.dst.write = 1;
3866 r = r600_bytecode_add_alu(ctx->bc, &alu);
3867 if (r)
3868 return r;
3869 }
3870
3871 for (i = 0; i < 3; i++) {
3872 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3873 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
3874 alu.src[0].sel = ctx->temp_reg;
3875 alu.src[0].chan = out_chan;
3876 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3877 alu.dst.sel = ctx->temp_reg;
3878 alu.dst.chan = i;
3879 alu.dst.write = 1;
3880 r = r600_bytecode_add_alu(ctx->bc, &alu);
3881 if (r)
3882 return r;
3883 }
3884 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3885 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3886 alu.src[0].sel = V_SQ_ALU_SRC_1;
3887 alu.src[0].chan = 0;
3888 alu.dst.sel = ctx->temp_reg;
3889 alu.dst.chan = 3;
3890 alu.last = 1;
3891 alu.dst.write = 1;
3892 r = r600_bytecode_add_alu(ctx->bc, &alu);
3893 if (r)
3894 return r;
3895 src_loaded = TRUE;
3896 src_gpr = ctx->temp_reg;
3897 }
3898
3899 if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
3900 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) &&
3901 inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
3902 inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
3903
3904 static const unsigned src0_swizzle[] = {2, 2, 0, 1};
3905 static const unsigned src1_swizzle[] = {1, 0, 2, 2};
3906
3907 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
3908 for (i = 0; i < 4; i++) {
3909 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3910 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
3911 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
3912 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
3913 alu.dst.sel = ctx->temp_reg;
3914 alu.dst.chan = i;
3915 if (i == 3)
3916 alu.last = 1;
3917 alu.dst.write = 1;
3918 r = r600_bytecode_add_alu(ctx->bc, &alu);
3919 if (r)
3920 return r;
3921 }
3922
3923 /* tmp1.z = RCP_e(|tmp1.z|) */
3924 if (ctx->bc->chip_class == CAYMAN) {
3925 for (i = 0; i < 3; i++) {
3926 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3927 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3928 alu.src[0].sel = ctx->temp_reg;
3929 alu.src[0].chan = 2;
3930 alu.src[0].abs = 1;
3931 alu.dst.sel = ctx->temp_reg;
3932 alu.dst.chan = i;
3933 if (i == 2)
3934 alu.dst.write = 1;
3935 if (i == 2)
3936 alu.last = 1;
3937 r = r600_bytecode_add_alu(ctx->bc, &alu);
3938 if (r)
3939 return r;
3940 }
3941 } else {
3942 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3943 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3944 alu.src[0].sel = ctx->temp_reg;
3945 alu.src[0].chan = 2;
3946 alu.src[0].abs = 1;
3947 alu.dst.sel = ctx->temp_reg;
3948 alu.dst.chan = 2;
3949 alu.dst.write = 1;
3950 alu.last = 1;
3951 r = r600_bytecode_add_alu(ctx->bc, &alu);
3952 if (r)
3953 return r;
3954 }
3955
3956 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
3957 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
3958 * muladd has no writemask, have to use another temp
3959 */
3960 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3961 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
3962 alu.is_op3 = 1;
3963
3964 alu.src[0].sel = ctx->temp_reg;
3965 alu.src[0].chan = 0;
3966 alu.src[1].sel = ctx->temp_reg;
3967 alu.src[1].chan = 2;
3968
3969 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
3970 alu.src[2].chan = 0;
3971 alu.src[2].value = *(uint32_t *)&one_point_five;
3972
3973 alu.dst.sel = ctx->temp_reg;
3974 alu.dst.chan = 0;
3975 alu.dst.write = 1;
3976
3977 r = r600_bytecode_add_alu(ctx->bc, &alu);
3978 if (r)
3979 return r;
3980
3981 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3982 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
3983 alu.is_op3 = 1;
3984
3985 alu.src[0].sel = ctx->temp_reg;
3986 alu.src[0].chan = 1;
3987 alu.src[1].sel = ctx->temp_reg;
3988 alu.src[1].chan = 2;
3989
3990 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
3991 alu.src[2].chan = 0;
3992 alu.src[2].value = *(uint32_t *)&one_point_five;
3993
3994 alu.dst.sel = ctx->temp_reg;
3995 alu.dst.chan = 1;
3996 alu.dst.write = 1;
3997
3998 alu.last = 1;
3999 r = r600_bytecode_add_alu(ctx->bc, &alu);
4000 if (r)
4001 return r;
4002 /* write initial W value into Z component */
4003 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) {
4004 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4005 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4006 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
4007 alu.dst.sel = ctx->temp_reg;
4008 alu.dst.chan = 2;
4009 alu.dst.write = 1;
4010 alu.last = 1;
4011 r = r600_bytecode_add_alu(ctx->bc, &alu);
4012 if (r)
4013 return r;
4014 }
4015
4016 /* for cube forms of lod and bias we need to route the lod
4017 value into Z */
4018 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
4019 inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
4020 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4021 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4022 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
4023 alu.dst.sel = ctx->temp_reg;
4024 alu.dst.chan = 2;
4025 alu.last = 1;
4026 alu.dst.write = 1;
4027 r = r600_bytecode_add_alu(ctx->bc, &alu);
4028 if (r)
4029 return r;
4030 }
4031
4032 src_loaded = TRUE;
4033 src_gpr = ctx->temp_reg;
4034 }
4035
4036 if (src_requires_loading && !src_loaded) {
4037 for (i = 0; i < 4; i++) {
4038 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4039 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4040 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4041 alu.dst.sel = ctx->temp_reg;
4042 alu.dst.chan = i;
4043 if (i == 3)
4044 alu.last = 1;
4045 alu.dst.write = 1;
4046 r = r600_bytecode_add_alu(ctx->bc, &alu);
4047 if (r)
4048 return r;
4049 }
4050 src_loaded = TRUE;
4051 src_gpr = ctx->temp_reg;
4052 }
4053
4054 opcode = ctx->inst_info->r600_opcode;
4055 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
4056 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
4057 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
4058 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
4059 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
4060 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) {
4061 switch (opcode) {
4062 case SQ_TEX_INST_SAMPLE:
4063 opcode = SQ_TEX_INST_SAMPLE_C;
4064 break;
4065 case SQ_TEX_INST_SAMPLE_L:
4066 opcode = SQ_TEX_INST_SAMPLE_C_L;
4067 break;
4068 case SQ_TEX_INST_SAMPLE_LB:
4069 opcode = SQ_TEX_INST_SAMPLE_C_LB;
4070 break;
4071 case SQ_TEX_INST_SAMPLE_G:
4072 opcode = SQ_TEX_INST_SAMPLE_C_G;
4073 break;
4074 }
4075 }
4076
4077 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
4078 tex.inst = opcode;
4079
4080 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
4081 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
4082 tex.src_gpr = src_gpr;
4083 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
4084 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
4085 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
4086 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
4087 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
4088
4089 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ) {
4090 tex.src_sel_x = 4;
4091 tex.src_sel_y = 4;
4092 tex.src_sel_z = 4;
4093 tex.src_sel_w = 4;
4094 } else if (src_loaded) {
4095 tex.src_sel_x = 0;
4096 tex.src_sel_y = 1;
4097 tex.src_sel_z = 2;
4098 tex.src_sel_w = 3;
4099 } else {
4100 tex.src_sel_x = ctx->src[0].swizzle[0];
4101 tex.src_sel_y = ctx->src[0].swizzle[1];
4102 tex.src_sel_z = ctx->src[0].swizzle[2];
4103 tex.src_sel_w = ctx->src[0].swizzle[3];
4104 tex.src_rel = ctx->src[0].rel;
4105 }
4106
4107 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
4108 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) {
4109 tex.src_sel_x = 1;
4110 tex.src_sel_y = 0;
4111 tex.src_sel_z = 3;
4112 tex.src_sel_w = 2; /* route Z compare or Lod value into W */
4113 }
4114
4115 if (inst->Texture.Texture != TGSI_TEXTURE_RECT &&
4116 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) {
4117 tex.coord_type_x = 1;
4118 tex.coord_type_y = 1;
4119 }
4120 tex.coord_type_z = 1;
4121 tex.coord_type_w = 1;
4122
4123 tex.offset_x = offset_x;
4124 tex.offset_y = offset_y;
4125 tex.offset_z = offset_z;
4126
4127 /* Put the depth for comparison in W.
4128 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W.
4129 * Some instructions expect the depth in Z. */
4130 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
4131 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
4132 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
4133 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) &&
4134 opcode != SQ_TEX_INST_SAMPLE_C_L &&
4135 opcode != SQ_TEX_INST_SAMPLE_C_LB) {
4136 tex.src_sel_w = tex.src_sel_z;
4137 }
4138
4139 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY ||
4140 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) {
4141 if (opcode == SQ_TEX_INST_SAMPLE_C_L ||
4142 opcode == SQ_TEX_INST_SAMPLE_C_LB) {
4143 /* the array index is read from Y */
4144 tex.coord_type_y = 0;
4145 } else {
4146 /* the array index is read from Z */
4147 tex.coord_type_z = 0;
4148 tex.src_sel_z = tex.src_sel_y;
4149 }
4150 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
4151 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)
4152 /* the array index is read from Z */
4153 tex.coord_type_z = 0;
4154
4155 r = r600_bytecode_add_tex(ctx->bc, &tex);
4156 if (r)
4157 return r;
4158
4159 /* add shadow ambient support - gallium doesn't do it yet */
4160 return 0;
4161 }
4162
tgsi_lrp(struct r600_shader_ctx * ctx)4163 static int tgsi_lrp(struct r600_shader_ctx *ctx)
4164 {
4165 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4166 struct r600_bytecode_alu alu;
4167 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4168 unsigned i;
4169 int r;
4170
4171 /* optimize if it's just an equal balance */
4172 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
4173 for (i = 0; i < lasti + 1; i++) {
4174 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4175 continue;
4176
4177 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4178 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
4179 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
4180 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4181 alu.omod = 3;
4182 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4183 alu.dst.chan = i;
4184 if (i == lasti) {
4185 alu.last = 1;
4186 }
4187 r = r600_bytecode_add_alu(ctx->bc, &alu);
4188 if (r)
4189 return r;
4190 }
4191 return 0;
4192 }
4193
4194 /* 1 - src0 */
4195 for (i = 0; i < lasti + 1; i++) {
4196 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4197 continue;
4198
4199 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4200 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
4201 alu.src[0].sel = V_SQ_ALU_SRC_1;
4202 alu.src[0].chan = 0;
4203 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
4204 r600_bytecode_src_toggle_neg(&alu.src[1]);
4205 alu.dst.sel = ctx->temp_reg;
4206 alu.dst.chan = i;
4207 if (i == lasti) {
4208 alu.last = 1;
4209 }
4210 alu.dst.write = 1;
4211 r = r600_bytecode_add_alu(ctx->bc, &alu);
4212 if (r)
4213 return r;
4214 }
4215
4216 /* (1 - src0) * src2 */
4217 for (i = 0; i < lasti + 1; i++) {
4218 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4219 continue;
4220
4221 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4222 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4223 alu.src[0].sel = ctx->temp_reg;
4224 alu.src[0].chan = i;
4225 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4226 alu.dst.sel = ctx->temp_reg;
4227 alu.dst.chan = i;
4228 if (i == lasti) {
4229 alu.last = 1;
4230 }
4231 alu.dst.write = 1;
4232 r = r600_bytecode_add_alu(ctx->bc, &alu);
4233 if (r)
4234 return r;
4235 }
4236
4237 /* src0 * src1 + (1 - src0) * src2 */
4238 for (i = 0; i < lasti + 1; i++) {
4239 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4240 continue;
4241
4242 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4243 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4244 alu.is_op3 = 1;
4245 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4246 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
4247 alu.src[2].sel = ctx->temp_reg;
4248 alu.src[2].chan = i;
4249
4250 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4251 alu.dst.chan = i;
4252 if (i == lasti) {
4253 alu.last = 1;
4254 }
4255 r = r600_bytecode_add_alu(ctx->bc, &alu);
4256 if (r)
4257 return r;
4258 }
4259 return 0;
4260 }
4261
tgsi_cmp(struct r600_shader_ctx * ctx)4262 static int tgsi_cmp(struct r600_shader_ctx *ctx)
4263 {
4264 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4265 struct r600_bytecode_alu alu;
4266 int i, r;
4267 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4268
4269 for (i = 0; i < lasti + 1; i++) {
4270 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4271 continue;
4272
4273 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4274 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
4275 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4276 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4277 r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
4278 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4279 alu.dst.chan = i;
4280 alu.dst.write = 1;
4281 alu.is_op3 = 1;
4282 if (i == lasti)
4283 alu.last = 1;
4284 r = r600_bytecode_add_alu(ctx->bc, &alu);
4285 if (r)
4286 return r;
4287 }
4288 return 0;
4289 }
4290
tgsi_xpd(struct r600_shader_ctx * ctx)4291 static int tgsi_xpd(struct r600_shader_ctx *ctx)
4292 {
4293 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4294 static const unsigned int src0_swizzle[] = {2, 0, 1};
4295 static const unsigned int src1_swizzle[] = {1, 2, 0};
4296 struct r600_bytecode_alu alu;
4297 uint32_t use_temp = 0;
4298 int i, r;
4299
4300 if (inst->Dst[0].Register.WriteMask != 0xf)
4301 use_temp = 1;
4302
4303 for (i = 0; i < 4; i++) {
4304 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4305 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4306 if (i < 3) {
4307 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
4308 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
4309 } else {
4310 alu.src[0].sel = V_SQ_ALU_SRC_0;
4311 alu.src[0].chan = i;
4312 alu.src[1].sel = V_SQ_ALU_SRC_0;
4313 alu.src[1].chan = i;
4314 }
4315
4316 alu.dst.sel = ctx->temp_reg;
4317 alu.dst.chan = i;
4318 alu.dst.write = 1;
4319
4320 if (i == 3)
4321 alu.last = 1;
4322 r = r600_bytecode_add_alu(ctx->bc, &alu);
4323 if (r)
4324 return r;
4325 }
4326
4327 for (i = 0; i < 4; i++) {
4328 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4329 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4330
4331 if (i < 3) {
4332 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
4333 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
4334 } else {
4335 alu.src[0].sel = V_SQ_ALU_SRC_0;
4336 alu.src[0].chan = i;
4337 alu.src[1].sel = V_SQ_ALU_SRC_0;
4338 alu.src[1].chan = i;
4339 }
4340
4341 alu.src[2].sel = ctx->temp_reg;
4342 alu.src[2].neg = 1;
4343 alu.src[2].chan = i;
4344
4345 if (use_temp)
4346 alu.dst.sel = ctx->temp_reg;
4347 else
4348 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4349 alu.dst.chan = i;
4350 alu.dst.write = 1;
4351 alu.is_op3 = 1;
4352 if (i == 3)
4353 alu.last = 1;
4354 r = r600_bytecode_add_alu(ctx->bc, &alu);
4355 if (r)
4356 return r;
4357 }
4358 if (use_temp)
4359 return tgsi_helper_copy(ctx, inst);
4360 return 0;
4361 }
4362
tgsi_exp(struct r600_shader_ctx * ctx)4363 static int tgsi_exp(struct r600_shader_ctx *ctx)
4364 {
4365 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4366 struct r600_bytecode_alu alu;
4367 int r;
4368 int i;
4369
4370 /* result.x = 2^floor(src); */
4371 if (inst->Dst[0].Register.WriteMask & 1) {
4372 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4373
4374 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4375 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4376
4377 alu.dst.sel = ctx->temp_reg;
4378 alu.dst.chan = 0;
4379 alu.dst.write = 1;
4380 alu.last = 1;
4381 r = r600_bytecode_add_alu(ctx->bc, &alu);
4382 if (r)
4383 return r;
4384
4385 if (ctx->bc->chip_class == CAYMAN) {
4386 for (i = 0; i < 3; i++) {
4387 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4388 alu.src[0].sel = ctx->temp_reg;
4389 alu.src[0].chan = 0;
4390
4391 alu.dst.sel = ctx->temp_reg;
4392 alu.dst.chan = i;
4393 alu.dst.write = i == 0;
4394 alu.last = i == 2;
4395 r = r600_bytecode_add_alu(ctx->bc, &alu);
4396 if (r)
4397 return r;
4398 }
4399 } else {
4400 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4401 alu.src[0].sel = ctx->temp_reg;
4402 alu.src[0].chan = 0;
4403
4404 alu.dst.sel = ctx->temp_reg;
4405 alu.dst.chan = 0;
4406 alu.dst.write = 1;
4407 alu.last = 1;
4408 r = r600_bytecode_add_alu(ctx->bc, &alu);
4409 if (r)
4410 return r;
4411 }
4412 }
4413
4414 /* result.y = tmp - floor(tmp); */
4415 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
4416 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4417
4418 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
4419 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4420
4421 alu.dst.sel = ctx->temp_reg;
4422 #if 0
4423 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4424 if (r)
4425 return r;
4426 #endif
4427 alu.dst.write = 1;
4428 alu.dst.chan = 1;
4429
4430 alu.last = 1;
4431
4432 r = r600_bytecode_add_alu(ctx->bc, &alu);
4433 if (r)
4434 return r;
4435 }
4436
4437 /* result.z = RoughApprox2ToX(tmp);*/
4438 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
4439 if (ctx->bc->chip_class == CAYMAN) {
4440 for (i = 0; i < 3; i++) {
4441 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4442 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4443 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4444
4445 alu.dst.sel = ctx->temp_reg;
4446 alu.dst.chan = i;
4447 if (i == 2) {
4448 alu.dst.write = 1;
4449 alu.last = 1;
4450 }
4451
4452 r = r600_bytecode_add_alu(ctx->bc, &alu);
4453 if (r)
4454 return r;
4455 }
4456 } else {
4457 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4458 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4459 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4460
4461 alu.dst.sel = ctx->temp_reg;
4462 alu.dst.write = 1;
4463 alu.dst.chan = 2;
4464
4465 alu.last = 1;
4466
4467 r = r600_bytecode_add_alu(ctx->bc, &alu);
4468 if (r)
4469 return r;
4470 }
4471 }
4472
4473 /* result.w = 1.0;*/
4474 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
4475 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4476
4477 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4478 alu.src[0].sel = V_SQ_ALU_SRC_1;
4479 alu.src[0].chan = 0;
4480
4481 alu.dst.sel = ctx->temp_reg;
4482 alu.dst.chan = 3;
4483 alu.dst.write = 1;
4484 alu.last = 1;
4485 r = r600_bytecode_add_alu(ctx->bc, &alu);
4486 if (r)
4487 return r;
4488 }
4489 return tgsi_helper_copy(ctx, inst);
4490 }
4491
tgsi_log(struct r600_shader_ctx * ctx)4492 static int tgsi_log(struct r600_shader_ctx *ctx)
4493 {
4494 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4495 struct r600_bytecode_alu alu;
4496 int r;
4497 int i;
4498
4499 /* result.x = floor(log2(|src|)); */
4500 if (inst->Dst[0].Register.WriteMask & 1) {
4501 if (ctx->bc->chip_class == CAYMAN) {
4502 for (i = 0; i < 3; i++) {
4503 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4504
4505 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4506 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4507 r600_bytecode_src_set_abs(&alu.src[0]);
4508
4509 alu.dst.sel = ctx->temp_reg;
4510 alu.dst.chan = i;
4511 if (i == 0)
4512 alu.dst.write = 1;
4513 if (i == 2)
4514 alu.last = 1;
4515 r = r600_bytecode_add_alu(ctx->bc, &alu);
4516 if (r)
4517 return r;
4518 }
4519
4520 } else {
4521 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4522
4523 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4524 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4525 r600_bytecode_src_set_abs(&alu.src[0]);
4526
4527 alu.dst.sel = ctx->temp_reg;
4528 alu.dst.chan = 0;
4529 alu.dst.write = 1;
4530 alu.last = 1;
4531 r = r600_bytecode_add_alu(ctx->bc, &alu);
4532 if (r)
4533 return r;
4534 }
4535
4536 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4537 alu.src[0].sel = ctx->temp_reg;
4538 alu.src[0].chan = 0;
4539
4540 alu.dst.sel = ctx->temp_reg;
4541 alu.dst.chan = 0;
4542 alu.dst.write = 1;
4543 alu.last = 1;
4544
4545 r = r600_bytecode_add_alu(ctx->bc, &alu);
4546 if (r)
4547 return r;
4548 }
4549
4550 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
4551 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
4552
4553 if (ctx->bc->chip_class == CAYMAN) {
4554 for (i = 0; i < 3; i++) {
4555 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4556
4557 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4558 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4559 r600_bytecode_src_set_abs(&alu.src[0]);
4560
4561 alu.dst.sel = ctx->temp_reg;
4562 alu.dst.chan = i;
4563 if (i == 1)
4564 alu.dst.write = 1;
4565 if (i == 2)
4566 alu.last = 1;
4567
4568 r = r600_bytecode_add_alu(ctx->bc, &alu);
4569 if (r)
4570 return r;
4571 }
4572 } else {
4573 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4574
4575 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4576 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4577 r600_bytecode_src_set_abs(&alu.src[0]);
4578
4579 alu.dst.sel = ctx->temp_reg;
4580 alu.dst.chan = 1;
4581 alu.dst.write = 1;
4582 alu.last = 1;
4583
4584 r = r600_bytecode_add_alu(ctx->bc, &alu);
4585 if (r)
4586 return r;
4587 }
4588
4589 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4590
4591 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4592 alu.src[0].sel = ctx->temp_reg;
4593 alu.src[0].chan = 1;
4594
4595 alu.dst.sel = ctx->temp_reg;
4596 alu.dst.chan = 1;
4597 alu.dst.write = 1;
4598 alu.last = 1;
4599
4600 r = r600_bytecode_add_alu(ctx->bc, &alu);
4601 if (r)
4602 return r;
4603
4604 if (ctx->bc->chip_class == CAYMAN) {
4605 for (i = 0; i < 3; i++) {
4606 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4607 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4608 alu.src[0].sel = ctx->temp_reg;
4609 alu.src[0].chan = 1;
4610
4611 alu.dst.sel = ctx->temp_reg;
4612 alu.dst.chan = i;
4613 if (i == 1)
4614 alu.dst.write = 1;
4615 if (i == 2)
4616 alu.last = 1;
4617
4618 r = r600_bytecode_add_alu(ctx->bc, &alu);
4619 if (r)
4620 return r;
4621 }
4622 } else {
4623 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4624 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4625 alu.src[0].sel = ctx->temp_reg;
4626 alu.src[0].chan = 1;
4627
4628 alu.dst.sel = ctx->temp_reg;
4629 alu.dst.chan = 1;
4630 alu.dst.write = 1;
4631 alu.last = 1;
4632
4633 r = r600_bytecode_add_alu(ctx->bc, &alu);
4634 if (r)
4635 return r;
4636 }
4637
4638 if (ctx->bc->chip_class == CAYMAN) {
4639 for (i = 0; i < 3; i++) {
4640 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4641 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
4642 alu.src[0].sel = ctx->temp_reg;
4643 alu.src[0].chan = 1;
4644
4645 alu.dst.sel = ctx->temp_reg;
4646 alu.dst.chan = i;
4647 if (i == 1)
4648 alu.dst.write = 1;
4649 if (i == 2)
4650 alu.last = 1;
4651
4652 r = r600_bytecode_add_alu(ctx->bc, &alu);
4653 if (r)
4654 return r;
4655 }
4656 } else {
4657 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4658 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
4659 alu.src[0].sel = ctx->temp_reg;
4660 alu.src[0].chan = 1;
4661
4662 alu.dst.sel = ctx->temp_reg;
4663 alu.dst.chan = 1;
4664 alu.dst.write = 1;
4665 alu.last = 1;
4666
4667 r = r600_bytecode_add_alu(ctx->bc, &alu);
4668 if (r)
4669 return r;
4670 }
4671
4672 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4673
4674 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4675
4676 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4677 r600_bytecode_src_set_abs(&alu.src[0]);
4678
4679 alu.src[1].sel = ctx->temp_reg;
4680 alu.src[1].chan = 1;
4681
4682 alu.dst.sel = ctx->temp_reg;
4683 alu.dst.chan = 1;
4684 alu.dst.write = 1;
4685 alu.last = 1;
4686
4687 r = r600_bytecode_add_alu(ctx->bc, &alu);
4688 if (r)
4689 return r;
4690 }
4691
4692 /* result.z = log2(|src|);*/
4693 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
4694 if (ctx->bc->chip_class == CAYMAN) {
4695 for (i = 0; i < 3; i++) {
4696 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4697
4698 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4699 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4700 r600_bytecode_src_set_abs(&alu.src[0]);
4701
4702 alu.dst.sel = ctx->temp_reg;
4703 if (i == 2)
4704 alu.dst.write = 1;
4705 alu.dst.chan = i;
4706 if (i == 2)
4707 alu.last = 1;
4708
4709 r = r600_bytecode_add_alu(ctx->bc, &alu);
4710 if (r)
4711 return r;
4712 }
4713 } else {
4714 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4715
4716 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4717 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4718 r600_bytecode_src_set_abs(&alu.src[0]);
4719
4720 alu.dst.sel = ctx->temp_reg;
4721 alu.dst.write = 1;
4722 alu.dst.chan = 2;
4723 alu.last = 1;
4724
4725 r = r600_bytecode_add_alu(ctx->bc, &alu);
4726 if (r)
4727 return r;
4728 }
4729 }
4730
4731 /* result.w = 1.0; */
4732 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
4733 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4734
4735 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4736 alu.src[0].sel = V_SQ_ALU_SRC_1;
4737 alu.src[0].chan = 0;
4738
4739 alu.dst.sel = ctx->temp_reg;
4740 alu.dst.chan = 3;
4741 alu.dst.write = 1;
4742 alu.last = 1;
4743
4744 r = r600_bytecode_add_alu(ctx->bc, &alu);
4745 if (r)
4746 return r;
4747 }
4748
4749 return tgsi_helper_copy(ctx, inst);
4750 }
4751
tgsi_eg_arl(struct r600_shader_ctx * ctx)4752 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
4753 {
4754 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4755 struct r600_bytecode_alu alu;
4756 int r;
4757
4758 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4759
4760 switch (inst->Instruction.Opcode) {
4761 case TGSI_OPCODE_ARL:
4762 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
4763 break;
4764 case TGSI_OPCODE_ARR:
4765 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4766 break;
4767 case TGSI_OPCODE_UARL:
4768 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
4769 break;
4770 default:
4771 assert(0);
4772 return -1;
4773 }
4774
4775 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4776 alu.last = 1;
4777 alu.dst.sel = ctx->bc->ar_reg;
4778 alu.dst.write = 1;
4779 r = r600_bytecode_add_alu(ctx->bc, &alu);
4780 if (r)
4781 return r;
4782
4783 ctx->bc->ar_loaded = 0;
4784 return 0;
4785 }
tgsi_r600_arl(struct r600_shader_ctx * ctx)4786 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
4787 {
4788 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4789 struct r600_bytecode_alu alu;
4790 int r;
4791
4792 switch (inst->Instruction.Opcode) {
4793 case TGSI_OPCODE_ARL:
4794 memset(&alu, 0, sizeof(alu));
4795 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
4796 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4797 alu.dst.sel = ctx->bc->ar_reg;
4798 alu.dst.write = 1;
4799 alu.last = 1;
4800
4801 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4802 return r;
4803
4804 memset(&alu, 0, sizeof(alu));
4805 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4806 alu.src[0].sel = ctx->bc->ar_reg;
4807 alu.dst.sel = ctx->bc->ar_reg;
4808 alu.dst.write = 1;
4809 alu.last = 1;
4810
4811 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4812 return r;
4813 break;
4814 case TGSI_OPCODE_ARR:
4815 memset(&alu, 0, sizeof(alu));
4816 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4817 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4818 alu.dst.sel = ctx->bc->ar_reg;
4819 alu.dst.write = 1;
4820 alu.last = 1;
4821
4822 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4823 return r;
4824 break;
4825 case TGSI_OPCODE_UARL:
4826 memset(&alu, 0, sizeof(alu));
4827 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
4828 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4829 alu.dst.sel = ctx->bc->ar_reg;
4830 alu.dst.write = 1;
4831 alu.last = 1;
4832
4833 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4834 return r;
4835 break;
4836 default:
4837 assert(0);
4838 return -1;
4839 }
4840
4841 ctx->bc->ar_loaded = 0;
4842 return 0;
4843 }
4844
tgsi_opdst(struct r600_shader_ctx * ctx)4845 static int tgsi_opdst(struct r600_shader_ctx *ctx)
4846 {
4847 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4848 struct r600_bytecode_alu alu;
4849 int i, r = 0;
4850
4851 for (i = 0; i < 4; i++) {
4852 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4853
4854 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4855 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4856
4857 if (i == 0 || i == 3) {
4858 alu.src[0].sel = V_SQ_ALU_SRC_1;
4859 } else {
4860 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4861 }
4862
4863 if (i == 0 || i == 2) {
4864 alu.src[1].sel = V_SQ_ALU_SRC_1;
4865 } else {
4866 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
4867 }
4868 if (i == 3)
4869 alu.last = 1;
4870 r = r600_bytecode_add_alu(ctx->bc, &alu);
4871 if (r)
4872 return r;
4873 }
4874 return 0;
4875 }
4876
emit_logic_pred(struct r600_shader_ctx * ctx,int opcode)4877 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
4878 {
4879 struct r600_bytecode_alu alu;
4880 int r;
4881
4882 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4883 alu.inst = opcode;
4884 alu.execute_mask = 1;
4885 alu.update_pred = 1;
4886
4887 alu.dst.sel = ctx->temp_reg;
4888 alu.dst.write = 1;
4889 alu.dst.chan = 0;
4890
4891 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4892 alu.src[1].sel = V_SQ_ALU_SRC_0;
4893 alu.src[1].chan = 0;
4894
4895 alu.last = 1;
4896
4897 r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
4898 if (r)
4899 return r;
4900 return 0;
4901 }
4902
pops(struct r600_shader_ctx * ctx,int pops)4903 static int pops(struct r600_shader_ctx *ctx, int pops)
4904 {
4905 unsigned force_pop = ctx->bc->force_add_cf;
4906
4907 if (!force_pop) {
4908 int alu_pop = 3;
4909 if (ctx->bc->cf_last) {
4910 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU))
4911 alu_pop = 0;
4912 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER))
4913 alu_pop = 1;
4914 }
4915 alu_pop += pops;
4916 if (alu_pop == 1) {
4917 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER);
4918 ctx->bc->force_add_cf = 1;
4919 } else if (alu_pop == 2) {
4920 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER);
4921 ctx->bc->force_add_cf = 1;
4922 } else {
4923 force_pop = 1;
4924 }
4925 }
4926
4927 if (force_pop) {
4928 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
4929 ctx->bc->cf_last->pop_count = pops;
4930 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
4931 }
4932
4933 return 0;
4934 }
4935
callstack_decrease_current(struct r600_shader_ctx * ctx,unsigned reason)4936 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
4937 {
4938 switch(reason) {
4939 case FC_PUSH_VPM:
4940 ctx->bc->callstack[ctx->bc->call_sp].current--;
4941 break;
4942 case FC_PUSH_WQM:
4943 case FC_LOOP:
4944 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
4945 break;
4946 case FC_REP:
4947 /* TOODO : for 16 vp asic should -= 2; */
4948 ctx->bc->callstack[ctx->bc->call_sp].current --;
4949 break;
4950 }
4951 }
4952
callstack_check_depth(struct r600_shader_ctx * ctx,unsigned reason,unsigned check_max_only)4953 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
4954 {
4955 if (check_max_only) {
4956 int diff;
4957 switch (reason) {
4958 case FC_PUSH_VPM:
4959 diff = 1;
4960 break;
4961 case FC_PUSH_WQM:
4962 diff = 4;
4963 break;
4964 default:
4965 assert(0);
4966 diff = 0;
4967 }
4968 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
4969 ctx->bc->callstack[ctx->bc->call_sp].max) {
4970 ctx->bc->callstack[ctx->bc->call_sp].max =
4971 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
4972 }
4973 return;
4974 }
4975 switch (reason) {
4976 case FC_PUSH_VPM:
4977 ctx->bc->callstack[ctx->bc->call_sp].current++;
4978 break;
4979 case FC_PUSH_WQM:
4980 case FC_LOOP:
4981 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
4982 break;
4983 case FC_REP:
4984 ctx->bc->callstack[ctx->bc->call_sp].current++;
4985 break;
4986 }
4987
4988 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
4989 ctx->bc->callstack[ctx->bc->call_sp].max) {
4990 ctx->bc->callstack[ctx->bc->call_sp].max =
4991 ctx->bc->callstack[ctx->bc->call_sp].current;
4992 }
4993 }
4994
fc_set_mid(struct r600_shader_ctx * ctx,int fc_sp)4995 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
4996 {
4997 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
4998
4999 sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid,
5000 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1));
5001 sp->mid[sp->num_mid] = ctx->bc->cf_last;
5002 sp->num_mid++;
5003 }
5004
fc_pushlevel(struct r600_shader_ctx * ctx,int type)5005 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
5006 {
5007 ctx->bc->fc_sp++;
5008 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
5009 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
5010 }
5011
fc_poplevel(struct r600_shader_ctx * ctx)5012 static void fc_poplevel(struct r600_shader_ctx *ctx)
5013 {
5014 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
5015 if (sp->mid) {
5016 free(sp->mid);
5017 sp->mid = NULL;
5018 }
5019 sp->num_mid = 0;
5020 sp->start = NULL;
5021 sp->type = 0;
5022 ctx->bc->fc_sp--;
5023 }
5024
5025 #if 0
5026 static int emit_return(struct r600_shader_ctx *ctx)
5027 {
5028 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
5029 return 0;
5030 }
5031
5032 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
5033 {
5034
5035 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
5036 ctx->bc->cf_last->pop_count = pops;
5037 /* XXX work out offset */
5038 return 0;
5039 }
5040
5041 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
5042 {
5043 return 0;
5044 }
5045
5046 static void emit_testflag(struct r600_shader_ctx *ctx)
5047 {
5048
5049 }
5050
5051 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
5052 {
5053 emit_testflag(ctx);
5054 emit_jump_to_offset(ctx, 1, 4);
5055 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
5056 pops(ctx, ifidx + 1);
5057 emit_return(ctx);
5058 }
5059
5060 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
5061 {
5062 emit_testflag(ctx);
5063
5064 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
5065 ctx->bc->cf_last->pop_count = 1;
5066
5067 fc_set_mid(ctx, fc_sp);
5068
5069 pops(ctx, 1);
5070 }
5071 #endif
5072
tgsi_if(struct r600_shader_ctx * ctx)5073 static int tgsi_if(struct r600_shader_ctx *ctx)
5074 {
5075 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
5076
5077 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
5078
5079 fc_pushlevel(ctx, FC_IF);
5080
5081 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
5082 return 0;
5083 }
5084
tgsi_else(struct r600_shader_ctx * ctx)5085 static int tgsi_else(struct r600_shader_ctx *ctx)
5086 {
5087 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
5088 ctx->bc->cf_last->pop_count = 1;
5089
5090 fc_set_mid(ctx, ctx->bc->fc_sp);
5091 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
5092 return 0;
5093 }
5094
tgsi_endif(struct r600_shader_ctx * ctx)5095 static int tgsi_endif(struct r600_shader_ctx *ctx)
5096 {
5097 pops(ctx, 1);
5098 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
5099 R600_ERR("if/endif unbalanced in shader\n");
5100 return -1;
5101 }
5102
5103 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
5104 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
5105 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
5106 } else {
5107 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
5108 }
5109 fc_poplevel(ctx);
5110
5111 callstack_decrease_current(ctx, FC_PUSH_VPM);
5112 return 0;
5113 }
5114
tgsi_bgnloop(struct r600_shader_ctx * ctx)5115 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
5116 {
5117 /* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not
5118 * limited to 4096 iterations, like the other LOOP_* instructions. */
5119 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10));
5120
5121 fc_pushlevel(ctx, FC_LOOP);
5122
5123 /* check stack depth */
5124 callstack_check_depth(ctx, FC_LOOP, 0);
5125 return 0;
5126 }
5127
tgsi_endloop(struct r600_shader_ctx * ctx)5128 static int tgsi_endloop(struct r600_shader_ctx *ctx)
5129 {
5130 int i;
5131
5132 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
5133
5134 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
5135 R600_ERR("loop/endloop in shader code are not paired.\n");
5136 return -EINVAL;
5137 }
5138
5139 /* fixup loop pointers - from r600isa
5140 LOOP END points to CF after LOOP START,
5141 LOOP START point to CF after LOOP END
5142 BRK/CONT point to LOOP END CF
5143 */
5144 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
5145
5146 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
5147
5148 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
5149 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
5150 }
5151 /* XXX add LOOPRET support */
5152 fc_poplevel(ctx);
5153 callstack_decrease_current(ctx, FC_LOOP);
5154 return 0;
5155 }
5156
tgsi_loop_brk_cont(struct r600_shader_ctx * ctx)5157 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
5158 {
5159 unsigned int fscp;
5160
5161 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
5162 {
5163 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
5164 break;
5165 }
5166
5167 if (fscp == 0) {
5168 R600_ERR("Break not inside loop/endloop pair\n");
5169 return -EINVAL;
5170 }
5171
5172 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
5173
5174 fc_set_mid(ctx, fscp);
5175
5176 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
5177 return 0;
5178 }
5179
tgsi_umad(struct r600_shader_ctx * ctx)5180 static int tgsi_umad(struct r600_shader_ctx *ctx)
5181 {
5182 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5183 struct r600_bytecode_alu alu;
5184 int i, j, r;
5185 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
5186
5187 /* src0 * src1 */
5188 for (i = 0; i < lasti + 1; i++) {
5189 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5190 continue;
5191
5192 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5193
5194 alu.dst.chan = i;
5195 alu.dst.sel = ctx->temp_reg;
5196 alu.dst.write = 1;
5197
5198 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
5199 for (j = 0; j < 2; j++) {
5200 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
5201 }
5202
5203 alu.last = 1;
5204 r = r600_bytecode_add_alu(ctx->bc, &alu);
5205 if (r)
5206 return r;
5207 }
5208
5209
5210 for (i = 0; i < lasti + 1; i++) {
5211 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5212 continue;
5213
5214 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5215 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
5216
5217 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
5218
5219 alu.src[0].sel = ctx->temp_reg;
5220 alu.src[0].chan = i;
5221
5222 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
5223 if (i == lasti) {
5224 alu.last = 1;
5225 }
5226 r = r600_bytecode_add_alu(ctx->bc, &alu);
5227 if (r)
5228 return r;
5229 }
5230 return 0;
5231 }
5232
5233 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
5234 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
5235 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5236 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5237
5238 /* XXX:
5239 * For state trackers other than OpenGL, we'll want to use
5240 * _RECIP_IEEE instead.
5241 */
5242 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
5243
5244 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
5245 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5246 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5247 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5248 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5249 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5250 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5251 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5252 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5253 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5254 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5255 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5256 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5257 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5258 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5259 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5260 /* gap */
5261 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5262 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5263 /* gap */
5264 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5265 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5266 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5267 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5268 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5269 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5270 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
5271 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
5272 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
5273 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5274 /* gap */
5275 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5276 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5277 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5278 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5279 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
5280 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5281 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5282 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5283 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5284 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5285 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5286 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5287 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5288 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5289 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5290 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5291 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
5292 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5293 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5294 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5295 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5296 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5297 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5298 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5299 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5300 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5301 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5302 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5303 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5304 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
5305 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5306 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5307 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5308 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5309 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5310 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5311 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5312 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5313 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5314 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5315 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5316 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5317 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5318 /* gap */
5319 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5320 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5321 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5322 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5323 /* gap */
5324 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5325 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5326 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5327 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5328 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5329 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
5330 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5331 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5332 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2_trans},
5333 /* gap */
5334 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5335 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5336 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5337 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5338 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5339 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5340 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5341 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5342 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5343 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5344 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5345 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5346 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5347 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5348 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5349 {TGSI_OPCODE_TXQ_LZ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5350 /* gap */
5351 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5352 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5353 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5354 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5355 /* gap */
5356 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5357 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5358 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5359 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5360 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5361 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5362 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5363 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5364 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5365 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5366 /* gap */
5367 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5368 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2_trans},
5369 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5370 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5371 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5372 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5373 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5374 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2_trans},
5375 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5376 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2_trans},
5377 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans},
5378 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5379 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5380 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5381 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5382 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5383 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5384 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
5385 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5386 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5387 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2_trans},
5388 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5389 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2_swap},
5390 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5391 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5392 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5393 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5394 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5395 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
5396 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
5397 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5398 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5399 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5400 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5401 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5402 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5403 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
5404 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5405 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5406 {TGSI_OPCODE_UARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_r600_arl},
5407 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5408 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5409 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5410 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5411 };
5412
5413 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
5414 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5415 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5416 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5417 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
5418 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq},
5419 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5420 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5421 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5422 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5423 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5424 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5425 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5426 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5427 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5428 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5429 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5430 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5431 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5432 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5433 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5434 /* gap */
5435 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5436 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5437 /* gap */
5438 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5439 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5440 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5441 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5442 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5443 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5444 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
5445 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
5446 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
5447 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5448 /* gap */
5449 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5450 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5451 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5452 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5453 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
5454 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5455 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5456 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5457 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5458 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5459 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5460 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5461 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5462 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5463 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5464 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5465 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
5466 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5467 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5468 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5469 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5470 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5471 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5472 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5473 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5474 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5475 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5476 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5477 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5478 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5479 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5480 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5481 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5482 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5483 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5484 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5485 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5486 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5487 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5488 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5489 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5490 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5491 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5492 /* gap */
5493 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5494 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5495 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5496 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5497 /* gap */
5498 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5499 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5500 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5501 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5502 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5503 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
5504 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5505 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5506 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2},
5507 /* gap */
5508 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5509 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5510 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5511 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5512 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5513 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5514 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5515 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5516 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5517 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5518 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5519 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5520 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5521 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5522 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5523 {TGSI_OPCODE_TXQ_LZ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5524 /* gap */
5525 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5526 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5527 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5528 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5529 /* gap */
5530 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5531 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5532 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5533 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5534 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5535 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5536 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5537 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5538 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5539 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5540 /* gap */
5541 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5542 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_f2i},
5543 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5544 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5545 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5546 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5547 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5548 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2},
5549 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5550 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_f2i},
5551 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans},
5552 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5553 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5554 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5555 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5556 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5557 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5558 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
5559 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5560 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5561 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2},
5562 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5563 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
5564 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5565 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5566 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5567 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5568 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5569 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
5570 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
5571 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5572 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5573 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5574 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5575 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5576 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5577 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
5578 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5579 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5580 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
5581 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5582 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5583 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5584 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5585 };
5586
5587 static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
5588 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5589 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5590 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5591 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr},
5592 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr},
5593 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5594 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5595 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5596 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5597 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5598 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5599 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5600 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5601 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5602 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5603 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5604 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5605 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5606 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5607 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5608 /* gap */
5609 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5610 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5611 /* gap */
5612 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5613 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5614 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5615 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5616 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5617 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5618 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
5619 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
5620 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},
5621 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5622 /* gap */
5623 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5624 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5625 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5626 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5627 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig},
5628 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5629 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5630 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5631 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5632 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5633 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5634 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5635 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5636 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5637 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5638 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5639 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig},
5640 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5641 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5642 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5643 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5644 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5645 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5646 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5647 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5648 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5649 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5650 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5651 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5652 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5653 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5654 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5655 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5656 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5657 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5658 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5659 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5660 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5661 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5662 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5663 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5664 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5665 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5666 /* gap */
5667 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5668 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5669 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5670 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5671 /* gap */
5672 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5673 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5674 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5675 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5676 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5677 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2},
5678 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5679 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5680 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2},
5681 /* gap */
5682 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5683 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5684 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5685 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5686 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5687 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5688 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5689 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5690 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5691 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5692 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5693 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5694 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5695 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5696 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5697 {TGSI_OPCODE_TXQ_LZ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5698 /* gap */
5699 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5700 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5701 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5702 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5703 /* gap */
5704 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5705 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5706 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5707 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5708 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5709 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5710 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5711 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5712 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5713 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5714 /* gap */
5715 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5716 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2},
5717 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5718 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5719 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5720 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5721 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5722 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2},
5723 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5724 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2},
5725 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2},
5726 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5727 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5728 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5729 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5730 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5731 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5732 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT, cayman_mul_int_instr},
5733 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5734 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5735 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2},
5736 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5737 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
5738 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5739 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5740 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5741 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5742 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5743 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
5744 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
5745 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5746 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5747 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5748 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5749 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5750 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5751 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
5752 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5753 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5754 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
5755 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5756 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5757 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5758 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5759 };
5760