1 /**************************************************************************
2 *
3 * Copyright 2010 Luca Barbieri
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial
15 * portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27 #include <d3d11shader.h>
28 #include "d3d1xstutil.h"
29 #include "sm4.h"
30 #include "tgsi/tgsi_ureg.h"
31 #include <vector>
32
33 #if 1
34 #define check(x) assert(x)
35 #define fail(x) assert(0 && (x))
36 #else
37 #define check(x) do {if(!(x)) throw(#x);} while(0)
38 #define fail(x) throw(x)
39 #endif
40
41 struct tgsi_interpolation
42 {
43 unsigned interpolation;
44 bool centroid;
45 };
46
47 static tgsi_interpolation sm4_to_pipe_interpolation[] =
48 {
49 {TGSI_INTERPOLATE_PERSPECTIVE, false}, /* UNDEFINED */
50 {TGSI_INTERPOLATE_CONSTANT, false},
51 {TGSI_INTERPOLATE_PERSPECTIVE, false}, /* LINEAR */
52 {TGSI_INTERPOLATE_PERSPECTIVE, true}, /* LINEAR_CENTROID */
53 {TGSI_INTERPOLATE_LINEAR, false}, /* LINEAR_NOPERSPECTIVE */
54 {TGSI_INTERPOLATE_LINEAR, true}, /* LINEAR_NOPERSPECTIVE_CENTROID */
55
56 // Added in D3D10.1
57 {TGSI_INTERPOLATE_PERSPECTIVE, true}, /* LINEAR_SAMPLE */
58 {TGSI_INTERPOLATE_LINEAR, true}, /* LINEAR_NOPERSPECTIVE_SAMPLE */
59 };
60
61 static int sm4_to_pipe_sv[] =
62 {
63 -1,
64 TGSI_SEMANTIC_POSITION,
65 -1, /*TGSI_SEMANTIC_CLIP_DISTANCE */
66 -1, /*TGSI_SEMANTIC_CULL_DISTANCE */
67 -1, /*TGSI_SEMANTIC_RENDER_TARGET_ARRAY_INDEX */
68 -1, /*TGSI_SEMANTIC_VIEWPORT_ARRAY_INDEX */
69 -1, /*TGSI_SEMANTIC_VERTEXID,*/
70 TGSI_SEMANTIC_PRIMID,
71 TGSI_SEMANTIC_INSTANCEID,
72 TGSI_SEMANTIC_FACE,
73 -1, /*TGSI_SEMANTIC_SAMPLE_INDEX*/
74 };
75
76 struct sm4_to_tgsi_converter
77 {
78 struct ureg_program* ureg;
79 std::vector<struct ureg_dst> temps;
80 std::vector<struct ureg_dst> outputs;
81 std::vector<struct ureg_src> inputs;
82 std::vector<struct ureg_src> resources;
83 std::vector<struct ureg_src> samplers;
84 std::vector<std::pair<unsigned, unsigned> > targets; // first is normal, second shadow/comparison
85 std::vector<unsigned> sampler_modes; // 0 = normal, 1 = shadow/comparison
86 std::vector<std::pair<unsigned, unsigned> > loops;
87 sm4_insn* insn;
88 struct sm4_program& program;
89 std::vector<unsigned> sm4_to_tgsi_insn_num;
90 std::vector<std::pair<unsigned, unsigned> > label_to_sm4_insn_num;
91 bool in_sub;
92 bool avoid_txf;
93 bool avoid_int;
94
sm4_to_tgsi_convertersm4_to_tgsi_converter95 sm4_to_tgsi_converter(struct sm4_program& program)
96 : program(program)
97 {
98 avoid_txf = true;
99 avoid_int = false;
100 }
101
_regsm4_to_tgsi_converter102 struct ureg_dst _reg(sm4_op& op)
103 {
104 switch(op.file)
105 {
106 case SM4_FILE_NULL:
107 {
108 struct ureg_dst d;
109 memset(&d, 0, sizeof(d));
110 d.File = TGSI_FILE_NULL;
111 return d;
112 }
113 case SM4_FILE_TEMP:
114 check(op.has_simple_index());
115 check(op.indices[0].disp < temps.size());
116 return temps[op.indices[0].disp];
117 case SM4_FILE_OUTPUT:
118 check(op.has_simple_index());
119 check(op.indices[0].disp < outputs.size());
120 return outputs[op.indices[0].disp];
121 default:
122 check(0);
123 return ureg_dst_undef();
124 }
125 }
126
_dstsm4_to_tgsi_converter127 struct ureg_dst _dst(unsigned i = 0)
128 {
129 check(i < insn->num_ops);
130 sm4_op& op = *insn->ops[i];
131 check(op.mode == SM4_OPERAND_MODE_MASK || op.mode == SM4_OPERAND_MODE_SCALAR);
132 struct ureg_dst d = ureg_writemask(_reg(op), op.mask);
133 if(insn->insn.sat)
134 d = ureg_saturate(d);
135 return d;
136 }
137
_srcsm4_to_tgsi_converter138 struct ureg_src _src(unsigned i)
139 {
140 check(i < insn->num_ops);
141 sm4_op& op = *insn->ops[i];
142 struct ureg_src s;
143 switch(op.file)
144 {
145 case SM4_FILE_IMMEDIATE32:
146 s = ureg_imm4f(ureg, op.imm_values[0].f32, op.imm_values[1].f32, op.imm_values[2].f32, op.imm_values[3].f32);
147 break;
148 case SM4_FILE_INPUT:
149 check(op.is_index_simple(0));
150 check(op.num_indices == 1 || op.num_indices == 2);
151 // TODO: is this correct, or are incorrectly swapping the two indices in the GS case?
152 check(op.indices[op.num_indices - 1].disp < inputs.size());
153 s = inputs[op.indices[op.num_indices - 1].disp];
154 if(op.num_indices == 2)
155 {
156 s.Dimension = 1;
157 s.DimensionIndex = op.indices[0].disp;
158 }
159 break;
160 case SM4_FILE_CONSTANT_BUFFER:
161 // TODO: indirect addressing
162 check(op.num_indices == 2);
163 check(op.is_index_simple(0));
164 check(op.is_index_simple(1));
165 s = ureg_src_register(TGSI_FILE_CONSTANT, (unsigned)op.indices[1].disp);
166 s.Dimension = 1;
167 s.DimensionIndex = op.indices[0].disp;
168 break;
169 default:
170 s = ureg_src(_reg(op));
171 break;
172 }
173 if(op.mode == SM4_OPERAND_MODE_SWIZZLE || op.mode == SM4_OPERAND_MODE_SCALAR)
174 s = ureg_swizzle(s, op.swizzle[0], op.swizzle[1], op.swizzle[2], op.swizzle[3]);
175 else
176 {
177 /* immediates are masked to show needed values */
178 check(op.file == SM4_FILE_IMMEDIATE32 || op.file == SM4_FILE_IMMEDIATE64);
179 }
180 if(op.abs)
181 s = ureg_abs(s);
182 if(op.neg)
183 s = ureg_negate(s);
184 return s;
185 };
186
_idxsm4_to_tgsi_converter187 int _idx(sm4_file file, unsigned i = 0)
188 {
189 check(i < insn->num_ops);
190 sm4_op& op = *insn->ops[i];
191 check(op.file == file);
192 check(op.has_simple_index());
193 return (int)op.indices[0].disp;
194 }
195
tex_targetsm4_to_tgsi_converter196 unsigned tex_target(unsigned resource, unsigned sampler)
197 {
198 unsigned shadow = sampler_modes[sampler];
199 unsigned target = shadow ? targets[resource].second : targets[resource].first;
200 check(target);
201 return target;
202 }
203
res_return_typesm4_to_tgsi_converter204 enum pipe_type res_return_type(unsigned type)
205 {
206 switch(type)
207 {
208 case D3D_RETURN_TYPE_UNORM: return PIPE_TYPE_UNORM;
209 case D3D_RETURN_TYPE_SNORM: return PIPE_TYPE_SNORM;
210 case D3D_RETURN_TYPE_SINT: return PIPE_TYPE_SINT;
211 case D3D_RETURN_TYPE_UINT: return PIPE_TYPE_UINT;
212 case D3D_RETURN_TYPE_FLOAT: return PIPE_TYPE_FLOAT;
213 default:
214 fail("invalid resource return type");
215 return PIPE_TYPE_FLOAT;
216 }
217 }
218
219 std::vector<struct ureg_dst> insn_tmps;
220
_tmpsm4_to_tgsi_converter221 struct ureg_dst _tmp()
222 {
223 struct ureg_dst t = ureg_DECL_temporary(ureg);
224 insn_tmps.push_back(t);
225 return t;
226 }
227
_tmpsm4_to_tgsi_converter228 struct ureg_dst _tmp(struct ureg_dst d)
229 {
230 if(d.File == TGSI_FILE_TEMPORARY)
231 return d;
232 else
233 return ureg_writemask(_tmp(), d.WriteMask);
234 }
235
236 #define OP1_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1)); break
237 #define OP2_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1), _src(2)); break
238 #define OP3_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1), _src(2), _src(3)); break
239 #define OP1(n) OP1_(n, n)
240 #define OP2(n) OP2_(n, n)
241 #define OP3(n) OP3_(n, n)
242 #define OP_CF(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, &label); label_to_sm4_insn_num.push_back(std::make_pair(label, program.cf_insn_linked[insn_num])); break;
243
translate_insnssm4_to_tgsi_converter244 void translate_insns(unsigned begin, unsigned end)
245 {
246 for(unsigned insn_num = begin; insn_num < end; ++insn_num)
247 {
248 sm4_to_tgsi_insn_num[insn_num] = ureg_get_instruction_number(ureg);
249 unsigned label;
250 insn = program.insns[insn_num];
251 bool ok;
252 ok = true;
253 switch(insn->opcode)
254 {
255 // trivial instructions
256 case SM4_OPCODE_NOP:
257 break;
258 OP1(MOV);
259
260 // float
261 OP2(ADD);
262 OP2(MUL);
263 OP3(MAD);
264 OP2(DIV);
265 OP1(FRC);
266 OP1(RCP);
267 OP2(MIN);
268 OP2(MAX);
269 OP2_(LT, SLT);
270 OP2_(GE, SGE);
271 OP2_(EQ, SEQ);
272 OP2_(NE, SNE);
273
274 // bitwise
275 OP1(NOT);
276 OP2(AND);
277 OP2(OR);
278 OP2(XOR);
279
280 // special mathematical
281 OP2(DP2);
282 OP2(DP3);
283 OP2(DP4);
284 OP1(RSQ);
285 OP1_(LOG, LG2);
286 OP1_(EXP, EX2);
287
288 // rounding
289 OP1_(ROUND_NE, ROUND);
290 OP1_(ROUND_Z, TRUNC);
291 OP1_(ROUND_PI, CEIL);
292 OP1_(ROUND_NI, FLR);
293
294 // cross-thread
295 OP1_(DERIV_RTX, DDX);
296 OP1_(DERIV_RTX_COARSE, DDX);
297 OP1_(DERIV_RTX_FINE, DDX);
298 OP1_(DERIV_RTY, DDY);
299 OP1_(DERIV_RTY_COARSE, DDY);
300 OP1_(DERIV_RTY_FINE, DDY);
301 case SM4_OPCODE_EMIT:
302 ureg_EMIT(ureg);
303 break;
304 case SM4_OPCODE_CUT:
305 ureg_ENDPRIM(ureg);
306 break;
307 case SM4_OPCODE_EMITTHENCUT:
308 ureg_EMIT(ureg);
309 ureg_ENDPRIM(ureg);
310 break;
311
312 // non-trivial instructions
313 case SM4_OPCODE_MOVC:
314 /* CMP checks for < 0, but MOVC checks for != 0
315 * but fortunately, x != 0 is equivalent to -abs(x) < 0
316 * XXX: can test_nz apply to this?!
317 */
318 ureg_CMP(ureg, _dst(), ureg_negate(ureg_abs(_src(1))), _src(2), _src(3));
319 break;
320 case SM4_OPCODE_SQRT:
321 {
322 struct ureg_dst d = _dst();
323 struct ureg_dst t = _tmp(d);
324 ureg_RSQ(ureg, t, _src(1));
325 ureg_RCP(ureg, d, ureg_src(t));
326 break;
327 }
328 case SM4_OPCODE_SINCOS:
329 {
330 struct ureg_dst s = _dst(0);
331 struct ureg_dst c = _dst(1);
332 struct ureg_src v = _src(2);
333 if(s.File != TGSI_FILE_NULL)
334 ureg_SIN(ureg, s, v);
335 if(c.File != TGSI_FILE_NULL)
336 ureg_COS(ureg, c, v);
337 break;
338 }
339
340 // control flow
341 case SM4_OPCODE_DISCARD:
342 ureg_KIL(ureg, _src(0));
343 break;
344 OP_CF(LOOP, BGNLOOP);
345 OP_CF(ENDLOOP, ENDLOOP);
346 case SM4_OPCODE_BREAK:
347 ureg_BRK(ureg);
348 break;
349 case SM4_OPCODE_BREAKC:
350 // XXX: can test_nz apply to this?!
351 ureg_BREAKC(ureg, _src(0));
352 break;
353 case SM4_OPCODE_CONTINUE:
354 ureg_CONT(ureg);
355 break;
356 case SM4_OPCODE_CONTINUEC:
357 // XXX: can test_nz apply to this?!
358 ureg_IF(ureg, _src(0), &label);
359 ureg_CONT(ureg);
360 ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
361 ureg_ENDIF(ureg);
362 break;
363 case SM4_OPCODE_SWITCH:
364 ureg_SWITCH(ureg, _src(0));
365 break;
366 case SM4_OPCODE_CASE:
367 ureg_CASE(ureg, _src(0));
368 break;
369 case SM4_OPCODE_DEFAULT:
370 ureg_DEFAULT(ureg);
371 break;
372 case SM4_OPCODE_ENDSWITCH:
373 ureg_ENDSWITCH(ureg);
374 break;
375 case SM4_OPCODE_CALL:
376 ureg_CAL(ureg, &label);
377 label_to_sm4_insn_num.push_back(std::make_pair(label, program.label_to_insn_num[_idx(SM4_FILE_LABEL)]));
378 break;
379 case SM4_OPCODE_LABEL:
380 if(in_sub)
381 ureg_ENDSUB(ureg);
382 else
383 ureg_END(ureg);
384 ureg_BGNSUB(ureg);
385 in_sub = true;
386 break;
387 case SM4_OPCODE_RET:
388 if(in_sub || insn_num != (program.insns.size() - 1))
389 ureg_RET(ureg);
390 break;
391 case SM4_OPCODE_RETC:
392 ureg_IF(ureg, _src(0), &label);
393 if(insn->insn.test_nz)
394 ureg_RET(ureg);
395 ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
396 if(!insn->insn.test_nz)
397 {
398 ureg_ELSE(ureg, &label);
399 ureg_RET(ureg);
400 ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
401 }
402 ureg_ENDIF(ureg);
403 break;
404 OP_CF(ELSE, ELSE);
405 case SM4_OPCODE_ENDIF:
406 ureg_ENDIF(ureg);
407 break;
408 case SM4_OPCODE_IF:
409 if(insn->insn.test_nz)
410 {
411 ureg_IF(ureg, _src(0), &label);
412 label_to_sm4_insn_num.push_back(std::make_pair(label, program.cf_insn_linked[insn_num]));
413 }
414 else
415 {
416 unsigned linked = program.cf_insn_linked[insn_num];
417 if(program.insns[linked]->opcode == SM4_OPCODE_ENDIF)
418 {
419 ureg_IF(ureg, _src(0), &label);
420 ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
421 ureg_ELSE(ureg, &label);
422 label_to_sm4_insn_num.push_back(std::make_pair(label, linked));
423 }
424 else
425 {
426 /* we have to swap the branches in this case (fun!)
427 * TODO: maybe just emit a SEQ 0?
428 * */
429 unsigned endif = program.cf_insn_linked[linked];
430
431 ureg_IF(ureg, _src(0), &label);
432 label_to_sm4_insn_num.push_back(std::make_pair(label, linked));
433
434 translate_insns(linked + 1, endif);
435
436 sm4_to_tgsi_insn_num[linked] = ureg_get_instruction_number(ureg);
437 ureg_ELSE(ureg, &label);
438 label_to_sm4_insn_num.push_back(std::make_pair(label, endif));
439
440 translate_insns(insn_num + 1, linked);
441
442 insn_num = endif - 1;
443 goto next;
444 }
445 }
446 break;
447 case SM4_OPCODE_RESINFO:
448 // TODO: return type
449 ureg_SVIEWINFO(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)]);
450 break;
451 // TODO: sample index, texture offset
452 case SM4_OPCODE_LD: // dst, coord_int, res; mipmap level in last coord_int arg
453 ureg_LOAD(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)]);
454 break;
455 case SM4_OPCODE_LD_MS:
456 ureg_LOAD_MS(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)]);
457 break;
458 case SM4_OPCODE_SAMPLE: // dst, coord, res, samp
459 ureg_SAMPLE(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)]);
460 break;
461 case SM4_OPCODE_SAMPLE_B: // dst, coord, res, samp, bias.x
462 ureg_SAMPLE_B(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)], _src(4));
463 break;
464 case SM4_OPCODE_SAMPLE_C: // dst, coord, res, samp, comp.x
465 ureg_SAMPLE_C(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)], _src(4));
466 break;
467 case SM4_OPCODE_SAMPLE_C_LZ: // dst, coord, res, samp, comp.x
468 ureg_SAMPLE_C_LZ(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)], _src(4));
469 break;
470 case SM4_OPCODE_SAMPLE_D: // dst, coord, res, samp, ddx, ddy
471 ureg_SAMPLE_D(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)], _src(4), _src(5));
472 break;
473 case SM4_OPCODE_SAMPLE_L: // dst, coord, res, samp, bias.x
474 {
475 struct ureg_dst tmp = _tmp();
476 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), _src(1));
477 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_swizzle(_src(4), 0, 0, 0, 0));
478 ureg_SAMPLE_L(ureg, _dst(), ureg_src(tmp), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)]);
479 break;
480 }
481 default:
482 ok = false;
483 break;
484 }
485
486 if(!ok && !avoid_int)
487 {
488 ok = true;
489 switch(insn->opcode)
490 {
491 // integer
492 OP1_(ITOF, I2F);
493 OP1_(FTOI, F2I);
494 OP2_(IADD, UADD);
495 OP1(INEG);
496 OP2_(IMUL, UMUL);
497 OP3_(IMAD, UMAD);
498 OP2_(ISHL, SHL);
499 OP2_(ISHR, ISHR);
500 OP2(IMIN);
501 OP2(IMAX);
502 OP2_(ILT, ISLT);
503 OP2_(IGE, ISGE);
504 OP2_(IEQ, USEQ);
505 OP2_(INE, USNE);
506
507 // unsigned
508 OP1_(UTOF, U2F);
509 OP1_(FTOU, F2U);
510 OP2(UMUL);
511 OP3(UMAD);
512 OP2(UMIN);
513 OP2(UMAX);
514 OP2_(ULT, USLT);
515 OP2_(UGE, USGE);
516 OP2(USHR);
517
518 case SM4_OPCODE_UDIV:
519 {
520 struct ureg_dst q = _dst(0);
521 struct ureg_dst r = _dst(1);
522 struct ureg_src a = _src(2);
523 struct ureg_src b = _src(3);
524 if(q.File != TGSI_FILE_NULL)
525 ureg_UDIV(ureg, q, a, b);
526 if(r.File != TGSI_FILE_NULL)
527 ureg_UMOD(ureg, r, a, b);
528 break;
529 }
530 default:
531 ok = false;
532 }
533 }
534
535 if(!ok && avoid_int)
536 {
537 ok = true;
538 switch(insn->opcode)
539 {
540 case SM4_OPCODE_ITOF:
541 case SM4_OPCODE_UTOF:
542 break;
543 OP1_(FTOI, TRUNC);
544 OP1_(FTOU, FLR);
545 // integer
546 OP2_(IADD, ADD);
547 OP2_(IMUL, MUL);
548 OP3_(IMAD, MAD);
549 OP2_(MIN, MIN);
550 OP2_(MAX, MAX);
551 OP2_(ILT, SLT);
552 OP2_(IGE, SGE);
553 OP2_(IEQ, SEQ);
554 OP2_(INE, SNE);
555
556 // unsigned
557 OP2_(UMUL, MUL);
558 OP3_(UMAD, MAD);
559 OP2_(UMIN, MIN);
560 OP2_(UMAX, MAX);
561 OP2_(ULT, SLT);
562 OP2_(UGE, SGE);
563
564 case SM4_OPCODE_INEG:
565 ureg_MOV(ureg, _dst(), ureg_negate(_src(1)));
566 break;
567 case SM4_OPCODE_ISHL:
568 {
569 struct ureg_dst d = _dst();
570 struct ureg_dst t = _tmp(d);
571 ureg_EX2(ureg, t, _src(2));
572 ureg_MUL(ureg, d, ureg_src(t), _src(1));
573 break;
574 }
575 case SM4_OPCODE_ISHR:
576 case SM4_OPCODE_USHR:
577 {
578 struct ureg_dst d = _dst();
579 struct ureg_dst t = _tmp(d);
580 ureg_EX2(ureg, t, ureg_negate(_src(2)));
581 ureg_MUL(ureg, t, ureg_src(t), _src(1));
582 ureg_FLR(ureg, d, ureg_src(t));
583 break;
584 }
585 case SM4_OPCODE_UDIV:
586 {
587 struct ureg_dst q = _dst(0);
588 struct ureg_dst r = _dst(1);
589 struct ureg_src a = _src(2);
590 struct ureg_src b = _src(3);
591 struct ureg_dst f = _tmp();
592 ureg_DIV(ureg, f, a, b);
593 if(q.File != TGSI_FILE_NULL)
594 ureg_FLR(ureg, q, ureg_src(f));
595 if(r.File != TGSI_FILE_NULL)
596 {
597 ureg_FRC(ureg, f, ureg_src(f));
598 ureg_MUL(ureg, r, ureg_src(f), b);
599 }
600 break;
601 }
602 default:
603 ok = false;
604 }
605 }
606
607 check(ok);
608
609 if(!insn_tmps.empty())
610 {
611 for(unsigned i = 0; i < insn_tmps.size(); ++i)
612 ureg_release_temporary(ureg, insn_tmps[i]);
613 insn_tmps.clear();
614 }
615 next:;
616 }
617 }
618
do_translatesm4_to_tgsi_converter619 void* do_translate()
620 {
621 unsigned processor;
622 switch(program.version.type)
623 {
624 case 0:
625 processor = TGSI_PROCESSOR_FRAGMENT;
626 break;
627 case 1:
628 processor = TGSI_PROCESSOR_VERTEX;
629 break;
630 case 2:
631 processor = TGSI_PROCESSOR_GEOMETRY;
632 break;
633 default:
634 fail("Tessellation and compute shaders not yet supported");
635 return 0;
636 }
637
638 if(!sm4_link_cf_insns(program))
639 fail("Malformed control flow");
640 if(!sm4_find_labels(program))
641 fail("Failed to locate labels");
642
643 ureg = ureg_create(processor);
644
645 in_sub = false;
646
647 sm4_to_tgsi_insn_num.resize(program.insns.size());
648 for(unsigned insn_num = 0; insn_num < program.dcls.size(); ++insn_num)
649 {
650 sm4_dcl& dcl = *program.dcls[insn_num];
651 int idx = -1;
652 if(dcl.op.get() && dcl.op->is_index_simple(0))
653 idx = dcl.op->indices[0].disp;
654 switch(dcl.opcode)
655 {
656 case SM4_OPCODE_DCL_GLOBAL_FLAGS:
657 break;
658 case SM4_OPCODE_DCL_TEMPS:
659 for(unsigned i = 0; i < dcl.num; ++i)
660 temps.push_back(ureg_DECL_temporary(ureg));
661 break;
662 case SM4_OPCODE_DCL_INPUT:
663 check(idx >= 0);
664 if(processor == TGSI_PROCESSOR_VERTEX)
665 {
666 if(inputs.size() <= (unsigned)idx)
667 inputs.resize(idx + 1);
668 inputs[idx] = ureg_DECL_vs_input(ureg, idx);
669 }
670 else if(processor == TGSI_PROCESSOR_GEOMETRY)
671 {
672 // TODO: is this correct?
673 unsigned gsidx = dcl.op->indices[1].disp;
674 if(inputs.size() <= (unsigned)gsidx)
675 inputs.resize(gsidx + 1);
676 inputs[gsidx] = ureg_DECL_gs_input(ureg, gsidx, TGSI_SEMANTIC_GENERIC, gsidx);
677 }
678 else
679 check(0);
680 break;
681 case SM4_OPCODE_DCL_INPUT_PS:
682 check(idx >= 0);
683 if(inputs.size() <= (unsigned)idx)
684 inputs.resize(idx + 1);
685 inputs[idx] = ureg_DECL_fs_input_cyl_centroid(ureg, TGSI_SEMANTIC_GENERIC, idx, sm4_to_pipe_interpolation[dcl.dcl_input_ps.interpolation].interpolation, 0, sm4_to_pipe_interpolation[dcl.dcl_input_ps.interpolation].centroid);
686 break;
687 case SM4_OPCODE_DCL_OUTPUT:
688 check(idx >= 0);
689 if(outputs.size() <= (unsigned)idx)
690 outputs.resize(idx + 1);
691 if(processor == TGSI_PROCESSOR_FRAGMENT)
692 outputs[idx] = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, idx);
693 else
694 outputs[idx] = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, idx);
695 break;
696 case SM4_OPCODE_DCL_INPUT_SIV:
697 case SM4_OPCODE_DCL_INPUT_SGV:
698 case SM4_OPCODE_DCL_INPUT_PS_SIV:
699 case SM4_OPCODE_DCL_INPUT_PS_SGV:
700 check(idx >= 0);
701 if(inputs.size() <= (unsigned)idx)
702 inputs.resize(idx + 1);
703 // TODO: is this correct?
704 inputs[idx] = ureg_DECL_system_value(ureg, idx, sm4_to_pipe_sv[dcl.sv], 0);
705 break;
706 case SM4_OPCODE_DCL_OUTPUT_SIV:
707 case SM4_OPCODE_DCL_OUTPUT_SGV:
708 check(idx >= 0);
709 if(outputs.size() <= (unsigned)idx)
710 outputs.resize(idx + 1);
711 check(sm4_to_pipe_sv[dcl.sv] >= 0);
712 outputs[idx] = ureg_DECL_output(ureg, sm4_to_pipe_sv[dcl.sv], 0);
713 break;
714 case SM4_OPCODE_DCL_RESOURCE:
715 check(idx >= 0);
716 if(targets.size() <= (unsigned)idx)
717 targets.resize(idx + 1);
718 switch(dcl.dcl_resource.target)
719 {
720 case SM4_TARGET_TEXTURE1D:
721 targets[idx].first = TGSI_TEXTURE_1D;
722 targets[idx].second = TGSI_TEXTURE_SHADOW1D;
723 break;
724 case SM4_TARGET_TEXTURE1DARRAY:
725 targets[idx].first = TGSI_TEXTURE_1D_ARRAY;
726 targets[idx].second = TGSI_TEXTURE_SHADOW1D_ARRAY;
727 break;
728 case SM4_TARGET_TEXTURE2D:
729 targets[idx].first = TGSI_TEXTURE_2D;
730 targets[idx].second = TGSI_TEXTURE_SHADOW2D;
731 break;
732 case SM4_TARGET_TEXTURE2DARRAY:
733 targets[idx].first = TGSI_TEXTURE_2D_ARRAY;
734 targets[idx].second = TGSI_TEXTURE_SHADOW2D_ARRAY;
735 break;
736 case SM4_TARGET_TEXTURE3D:
737 targets[idx].first = TGSI_TEXTURE_3D;
738 targets[idx].second = 0;
739 break;
740 case SM4_TARGET_TEXTURECUBE:
741 targets[idx].first = TGSI_TEXTURE_CUBE;
742 targets[idx].second = 0;
743 break;
744 default:
745 // HACK to make SimpleSample10 work
746 //check(0);
747 targets[idx].first = TGSI_TEXTURE_2D;
748 targets[idx].second = TGSI_TEXTURE_SHADOW2D;
749 break;
750 }
751 if(resources.size() <= (unsigned)idx)
752 resources.resize(idx + 1);
753 resources[idx] = ureg_DECL_sampler_view(
754 ureg, idx, targets[idx].first,
755 res_return_type(dcl.rrt.x),
756 res_return_type(dcl.rrt.y),
757 res_return_type(dcl.rrt.z),
758 res_return_type(dcl.rrt.w));
759 break;
760 case SM4_OPCODE_DCL_SAMPLER:
761 check(idx >= 0);
762 if(sampler_modes.size() <= (unsigned)idx)
763 sampler_modes.resize(idx + 1);
764 check(!dcl.dcl_sampler.mono);
765 sampler_modes[idx] = dcl.dcl_sampler.shadow;
766 if(samplers.size() <= (unsigned)idx)
767 samplers.resize(idx + 1);
768 samplers[idx] = ureg_DECL_sampler(ureg, idx);
769 break;
770 case SM4_OPCODE_DCL_CONSTANT_BUFFER:
771 check(dcl.op->num_indices == 2);
772 check(dcl.op->is_index_simple(0));
773 check(dcl.op->is_index_simple(1));
774 idx = dcl.op->indices[0].disp;
775 ureg_DECL_constant2D(ureg, 0, (unsigned)dcl.op->indices[1].disp - 1, idx);
776 break;
777 case SM4_OPCODE_DCL_GS_INPUT_PRIMITIVE:
778 ureg_property_gs_input_prim(ureg, d3d_to_pipe_prim_type[dcl.dcl_gs_input_primitive.primitive]);
779 break;
780 case SM4_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
781 ureg_property_gs_output_prim(ureg, d3d_to_pipe_prim[dcl.dcl_gs_output_primitive_topology.primitive_topology]);
782 break;
783 case SM4_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
784 ureg_property_gs_max_vertices(ureg, dcl.num);
785 break;
786 default:
787 check(0);
788 }
789 }
790
791 translate_insns(0, program.insns.size());
792 sm4_to_tgsi_insn_num.push_back(ureg_get_instruction_number(ureg));
793 if(in_sub)
794 ureg_ENDSUB(ureg);
795 else
796 ureg_END(ureg);
797
798 for(unsigned i = 0; i < label_to_sm4_insn_num.size(); ++i)
799 ureg_fixup_label(ureg, label_to_sm4_insn_num[i].first, sm4_to_tgsi_insn_num[label_to_sm4_insn_num[i].second]);
800
801 const struct tgsi_token * tokens = ureg_get_tokens(ureg, 0);
802 ureg_destroy(ureg);
803 return (void*)tokens;
804 }
805
translatesm4_to_tgsi_converter806 void* translate()
807 {
808 try
809 {
810 return do_translate();
811 }
812 catch(const char*)
813 {
814 return 0;
815 }
816 }
817 };
818
sm4_to_tgsi(struct sm4_program & program)819 void* sm4_to_tgsi(struct sm4_program& program)
820 {
821 sm4_to_tgsi_converter conv(program);
822 return conv.translate();
823 }
824
sm4_to_tgsi_linkage_only(struct sm4_program & prog)825 void* sm4_to_tgsi_linkage_only(struct sm4_program& prog)
826 {
827 struct ureg_program* ureg = ureg_create(TGSI_PROCESSOR_GEOMETRY);
828
829 uint64_t already = 0;
830 for(unsigned n = 0, i = 0; i < prog.num_params_out; ++i)
831 {
832 unsigned sn, si;
833
834 if(already & (1ULL << prog.params_out[i].Register))
835 continue;
836 already |= 1ULL << prog.params_out[i].Register;
837
838 switch(prog.params_out[i].SystemValueType)
839 {
840 case D3D_NAME_UNDEFINED:
841 sn = TGSI_SEMANTIC_GENERIC;
842 si = n++;
843 break;
844 case D3D_NAME_CULL_DISTANCE:
845 case D3D_NAME_CLIP_DISTANCE:
846 // FIXME
847 sn = 0;
848 si = prog.params_out[i].SemanticIndex;
849 assert(0);
850 break;
851 default:
852 continue;
853 }
854
855 ureg_DECL_output(ureg, sn, si);
856 }
857
858 const struct tgsi_token* tokens = ureg_get_tokens(ureg, 0);
859 ureg_destroy(ureg);
860 return (void*)tokens;
861 }
862