1 /**************************************************************************
2  *
3  * Copyright 2010 Luca Barbieri
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sublicense, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the
14  * next paragraph) shall be included in all copies or substantial
15  * portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24  *
25  **************************************************************************/
26 
27 #include <d3d11shader.h>
28 #include "d3d1xstutil.h"
29 #include "sm4.h"
30 #include "tgsi/tgsi_ureg.h"
31 #include <vector>
32 
33 #if 1
34 #define check(x) assert(x)
35 #define fail(x) assert(0 && (x))
36 #else
37 #define check(x) do {if(!(x)) throw(#x);} while(0)
38 #define fail(x) throw(x)
39 #endif
40 
41 struct tgsi_interpolation
42 {
43 	unsigned interpolation;
44 	bool centroid;
45 };
46 
47 static tgsi_interpolation sm4_to_pipe_interpolation[] =
48 {
49 	{TGSI_INTERPOLATE_PERSPECTIVE, false}, /* UNDEFINED */
50 	{TGSI_INTERPOLATE_CONSTANT, false},
51 	{TGSI_INTERPOLATE_PERSPECTIVE, false}, /* LINEAR */
52 	{TGSI_INTERPOLATE_PERSPECTIVE, true}, /* LINEAR_CENTROID */
53 	{TGSI_INTERPOLATE_LINEAR, false}, /* LINEAR_NOPERSPECTIVE */
54 	{TGSI_INTERPOLATE_LINEAR, true}, /* LINEAR_NOPERSPECTIVE_CENTROID */
55 
56 	// Added in D3D10.1
57 	{TGSI_INTERPOLATE_PERSPECTIVE, true}, /* LINEAR_SAMPLE */
58 	{TGSI_INTERPOLATE_LINEAR, true}, /* LINEAR_NOPERSPECTIVE_SAMPLE */
59 };
60 
61 static int sm4_to_pipe_sv[] =
62 {
63 	-1,
64 	TGSI_SEMANTIC_POSITION,
65 	-1, /*TGSI_SEMANTIC_CLIP_DISTANCE */
66 	-1, /*TGSI_SEMANTIC_CULL_DISTANCE */
67 	-1, /*TGSI_SEMANTIC_RENDER_TARGET_ARRAY_INDEX */
68 	-1, /*TGSI_SEMANTIC_VIEWPORT_ARRAY_INDEX */
69 	-1, /*TGSI_SEMANTIC_VERTEXID,*/
70 	TGSI_SEMANTIC_PRIMID,
71 	TGSI_SEMANTIC_INSTANCEID,
72 	TGSI_SEMANTIC_FACE,
73 	-1, /*TGSI_SEMANTIC_SAMPLE_INDEX*/
74 };
75 
76 struct sm4_to_tgsi_converter
77 {
78 	struct ureg_program* ureg;
79 	std::vector<struct ureg_dst> temps;
80 	std::vector<struct ureg_dst> outputs;
81 	std::vector<struct ureg_src> inputs;
82 	std::vector<struct ureg_src> resources;
83 	std::vector<struct ureg_src> samplers;
84 	std::vector<std::pair<unsigned, unsigned> > targets; // first is normal, second shadow/comparison
85 	std::vector<unsigned> sampler_modes; // 0 = normal, 1 = shadow/comparison
86 	std::vector<std::pair<unsigned, unsigned> > loops;
87 	sm4_insn* insn;
88 	struct sm4_program& program;
89 	std::vector<unsigned> sm4_to_tgsi_insn_num;
90 	std::vector<std::pair<unsigned, unsigned> > label_to_sm4_insn_num;
91 	bool in_sub;
92 	bool avoid_txf;
93 	bool avoid_int;
94 
sm4_to_tgsi_convertersm4_to_tgsi_converter95 	sm4_to_tgsi_converter(struct sm4_program& program)
96 	: program(program)
97 	{
98 		avoid_txf = true;
99 		avoid_int = false;
100 	}
101 
_regsm4_to_tgsi_converter102 	struct ureg_dst _reg(sm4_op& op)
103 	{
104 		switch(op.file)
105 		{
106 		case SM4_FILE_NULL:
107 		{
108 			struct ureg_dst d;
109 			memset(&d, 0, sizeof(d));
110 			d.File = TGSI_FILE_NULL;
111 			return d;
112 		}
113 		case SM4_FILE_TEMP:
114 			check(op.has_simple_index());
115 			check(op.indices[0].disp < temps.size());
116 			return temps[op.indices[0].disp];
117 		case SM4_FILE_OUTPUT:
118 			check(op.has_simple_index());
119 			check(op.indices[0].disp < outputs.size());
120 			return outputs[op.indices[0].disp];
121 		default:
122 			check(0);
123 			return ureg_dst_undef();
124 		}
125 	}
126 
_dstsm4_to_tgsi_converter127 	struct ureg_dst _dst(unsigned i = 0)
128 	{
129 		check(i < insn->num_ops);
130 		sm4_op& op = *insn->ops[i];
131 		check(op.mode == SM4_OPERAND_MODE_MASK || op.mode == SM4_OPERAND_MODE_SCALAR);
132 		struct ureg_dst d = ureg_writemask(_reg(op), op.mask);
133 		if(insn->insn.sat)
134 			d = ureg_saturate(d);
135 		return d;
136 	}
137 
_srcsm4_to_tgsi_converter138 	struct ureg_src _src(unsigned i)
139 	{
140 		check(i < insn->num_ops);
141 		sm4_op& op = *insn->ops[i];
142 		struct ureg_src s;
143 		switch(op.file)
144 		{
145 		case SM4_FILE_IMMEDIATE32:
146 			s = ureg_imm4f(ureg, op.imm_values[0].f32, op.imm_values[1].f32, op.imm_values[2].f32, op.imm_values[3].f32);
147 			break;
148 		case SM4_FILE_INPUT:
149 			check(op.is_index_simple(0));
150 			check(op.num_indices == 1 || op.num_indices == 2);
151 			// TODO: is this correct, or are incorrectly swapping the two indices in the GS case?
152 			check(op.indices[op.num_indices - 1].disp < inputs.size());
153 			s = inputs[op.indices[op.num_indices - 1].disp];
154 			if(op.num_indices == 2)
155 			{
156 				s.Dimension = 1;
157 				s.DimensionIndex = op.indices[0].disp;
158 			}
159 			break;
160 		case SM4_FILE_CONSTANT_BUFFER:
161 			// TODO: indirect addressing
162 			check(op.num_indices == 2);
163 			check(op.is_index_simple(0));
164 			check(op.is_index_simple(1));
165 			s = ureg_src_register(TGSI_FILE_CONSTANT, (unsigned)op.indices[1].disp);
166 			s.Dimension = 1;
167 			s.DimensionIndex = op.indices[0].disp;
168 			break;
169 		default:
170 			s = ureg_src(_reg(op));
171 			break;
172 		}
173 		if(op.mode == SM4_OPERAND_MODE_SWIZZLE || op.mode == SM4_OPERAND_MODE_SCALAR)
174 			s = ureg_swizzle(s, op.swizzle[0], op.swizzle[1], op.swizzle[2], op.swizzle[3]);
175 		else
176 		{
177 			/* immediates are masked to show needed values */
178 			check(op.file == SM4_FILE_IMMEDIATE32 || op.file == SM4_FILE_IMMEDIATE64);
179 		}
180 		if(op.abs)
181 			s = ureg_abs(s);
182 		if(op.neg)
183 			s = ureg_negate(s);
184 		return s;
185 	};
186 
_idxsm4_to_tgsi_converter187 	int _idx(sm4_file file, unsigned i = 0)
188 	{
189 		check(i < insn->num_ops);
190 		sm4_op& op = *insn->ops[i];
191 		check(op.file == file);
192 		check(op.has_simple_index());
193 		return (int)op.indices[0].disp;
194 	}
195 
tex_targetsm4_to_tgsi_converter196 	unsigned tex_target(unsigned resource, unsigned sampler)
197 	{
198 		unsigned shadow = sampler_modes[sampler];
199 		unsigned target = shadow ? targets[resource].second : targets[resource].first;
200 		check(target);
201 		return target;
202 	}
203 
res_return_typesm4_to_tgsi_converter204 	enum pipe_type res_return_type(unsigned type)
205 	{
206 		switch(type)
207 		{
208 		case D3D_RETURN_TYPE_UNORM: return PIPE_TYPE_UNORM;
209 		case D3D_RETURN_TYPE_SNORM: return PIPE_TYPE_SNORM;
210 		case D3D_RETURN_TYPE_SINT:  return PIPE_TYPE_SINT;
211 		case D3D_RETURN_TYPE_UINT:  return PIPE_TYPE_UINT;
212 		case D3D_RETURN_TYPE_FLOAT: return PIPE_TYPE_FLOAT;
213 		default:
214 			fail("invalid resource return type");
215 			return PIPE_TYPE_FLOAT;
216 		}
217 	}
218 
219 	std::vector<struct ureg_dst> insn_tmps;
220 
_tmpsm4_to_tgsi_converter221 	struct ureg_dst _tmp()
222 	{
223 		struct ureg_dst t = ureg_DECL_temporary(ureg);
224 		insn_tmps.push_back(t);
225 		return t;
226 	}
227 
_tmpsm4_to_tgsi_converter228 	struct ureg_dst _tmp(struct ureg_dst d)
229 	{
230 		if(d.File == TGSI_FILE_TEMPORARY)
231 			return d;
232 		else
233 			return ureg_writemask(_tmp(), d.WriteMask);
234 	}
235 
236 #define OP1_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1)); break
237 #define OP2_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1), _src(2)); break
238 #define OP3_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1), _src(2), _src(3)); break
239 #define OP1(n) OP1_(n, n)
240 #define OP2(n) OP2_(n, n)
241 #define OP3(n) OP3_(n, n)
242 #define OP_CF(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, &label); label_to_sm4_insn_num.push_back(std::make_pair(label, program.cf_insn_linked[insn_num])); break;
243 
translate_insnssm4_to_tgsi_converter244 	void translate_insns(unsigned begin, unsigned end)
245 	{
246 		for(unsigned insn_num = begin; insn_num < end; ++insn_num)
247 		{
248 			sm4_to_tgsi_insn_num[insn_num] = ureg_get_instruction_number(ureg);
249 			unsigned label;
250 			insn = program.insns[insn_num];
251 			bool ok;
252 			ok = true;
253 			switch(insn->opcode)
254 			{
255 			// trivial instructions
256 			case SM4_OPCODE_NOP:
257 				break;
258 			OP1(MOV);
259 
260 			// float
261 			OP2(ADD);
262 			OP2(MUL);
263 			OP3(MAD);
264 			OP2(DIV);
265 			OP1(FRC);
266 			OP1(RCP);
267 			OP2(MIN);
268 			OP2(MAX);
269 			OP2_(LT, SLT);
270 			OP2_(GE, SGE);
271 			OP2_(EQ, SEQ);
272 			OP2_(NE, SNE);
273 
274 			// bitwise
275 			OP1(NOT);
276 			OP2(AND);
277 			OP2(OR);
278 			OP2(XOR);
279 
280 			// special mathematical
281 			OP2(DP2);
282 			OP2(DP3);
283 			OP2(DP4);
284 			OP1(RSQ);
285 			OP1_(LOG, LG2);
286 			OP1_(EXP, EX2);
287 
288 			// rounding
289 			OP1_(ROUND_NE, ROUND);
290 			OP1_(ROUND_Z, TRUNC);
291 			OP1_(ROUND_PI, CEIL);
292 			OP1_(ROUND_NI, FLR);
293 
294 			// cross-thread
295 			OP1_(DERIV_RTX, DDX);
296 			OP1_(DERIV_RTX_COARSE, DDX);
297 			OP1_(DERIV_RTX_FINE, DDX);
298 			OP1_(DERIV_RTY, DDY);
299 			OP1_(DERIV_RTY_COARSE, DDY);
300 			OP1_(DERIV_RTY_FINE, DDY);
301 			case SM4_OPCODE_EMIT:
302 				ureg_EMIT(ureg);
303 				break;
304 			case SM4_OPCODE_CUT:
305 				ureg_ENDPRIM(ureg);
306 				break;
307 			case SM4_OPCODE_EMITTHENCUT:
308 				ureg_EMIT(ureg);
309 				ureg_ENDPRIM(ureg);
310 				break;
311 
312 			// non-trivial instructions
313 			case SM4_OPCODE_MOVC:
314 				/* CMP checks for < 0, but MOVC checks for != 0
315 				 * but fortunately, x != 0 is equivalent to -abs(x) < 0
316 				 * XXX: can test_nz apply to this?!
317 				 */
318 				ureg_CMP(ureg, _dst(), ureg_negate(ureg_abs(_src(1))), _src(2), _src(3));
319 				break;
320 			case SM4_OPCODE_SQRT:
321 			{
322 				struct ureg_dst d = _dst();
323 				struct ureg_dst t = _tmp(d);
324 				ureg_RSQ(ureg, t, _src(1));
325 				ureg_RCP(ureg, d, ureg_src(t));
326 				break;
327 			}
328 			case SM4_OPCODE_SINCOS:
329 			{
330 				struct ureg_dst s = _dst(0);
331 				struct ureg_dst c = _dst(1);
332 				struct ureg_src v = _src(2);
333 				if(s.File != TGSI_FILE_NULL)
334 					ureg_SIN(ureg, s, v);
335 				if(c.File != TGSI_FILE_NULL)
336 					ureg_COS(ureg, c, v);
337 				break;
338 			}
339 
340 			// control flow
341 			case SM4_OPCODE_DISCARD:
342 				ureg_KIL(ureg, _src(0));
343 				break;
344 			OP_CF(LOOP, BGNLOOP);
345 			OP_CF(ENDLOOP, ENDLOOP);
346 			case SM4_OPCODE_BREAK:
347 				ureg_BRK(ureg);
348 				break;
349 			case SM4_OPCODE_BREAKC:
350 				// XXX: can test_nz apply to this?!
351 				ureg_BREAKC(ureg, _src(0));
352 				break;
353 			case SM4_OPCODE_CONTINUE:
354 				ureg_CONT(ureg);
355 				break;
356 			case SM4_OPCODE_CONTINUEC:
357 				// XXX: can test_nz apply to this?!
358 				ureg_IF(ureg, _src(0), &label);
359 				ureg_CONT(ureg);
360 				ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
361 				ureg_ENDIF(ureg);
362 				break;
363 			case SM4_OPCODE_SWITCH:
364 				ureg_SWITCH(ureg, _src(0));
365 				break;
366 			case SM4_OPCODE_CASE:
367 				ureg_CASE(ureg, _src(0));
368 				break;
369 			case SM4_OPCODE_DEFAULT:
370 				ureg_DEFAULT(ureg);
371 				break;
372 			case SM4_OPCODE_ENDSWITCH:
373 				ureg_ENDSWITCH(ureg);
374 				break;
375 			case SM4_OPCODE_CALL:
376 				ureg_CAL(ureg, &label);
377 				label_to_sm4_insn_num.push_back(std::make_pair(label, program.label_to_insn_num[_idx(SM4_FILE_LABEL)]));
378 				break;
379 			case SM4_OPCODE_LABEL:
380 				if(in_sub)
381 					ureg_ENDSUB(ureg);
382 				else
383 					ureg_END(ureg);
384 				ureg_BGNSUB(ureg);
385 				in_sub = true;
386 				break;
387 			case SM4_OPCODE_RET:
388 				if(in_sub || insn_num != (program.insns.size() - 1))
389 					ureg_RET(ureg);
390 				break;
391 			case SM4_OPCODE_RETC:
392 				ureg_IF(ureg, _src(0), &label);
393 				if(insn->insn.test_nz)
394 					ureg_RET(ureg);
395 				ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
396 				if(!insn->insn.test_nz)
397 				{
398 					ureg_ELSE(ureg, &label);
399 					ureg_RET(ureg);
400 					ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
401 				}
402 				ureg_ENDIF(ureg);
403 				break;
404 			OP_CF(ELSE, ELSE);
405 			case SM4_OPCODE_ENDIF:
406 				ureg_ENDIF(ureg);
407 				break;
408 			case SM4_OPCODE_IF:
409 				if(insn->insn.test_nz)
410 				{
411 					ureg_IF(ureg, _src(0), &label);
412 					label_to_sm4_insn_num.push_back(std::make_pair(label, program.cf_insn_linked[insn_num]));
413 				}
414 				else
415 				{
416 					unsigned linked = program.cf_insn_linked[insn_num];
417 					if(program.insns[linked]->opcode == SM4_OPCODE_ENDIF)
418 					{
419 						ureg_IF(ureg, _src(0), &label);
420 						ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
421 						ureg_ELSE(ureg, &label);
422 						label_to_sm4_insn_num.push_back(std::make_pair(label, linked));
423 					}
424 					else
425 					{
426 						/* we have to swap the branches in this case (fun!)
427 						 * TODO: maybe just emit a SEQ 0?
428 						 * */
429 						unsigned endif = program.cf_insn_linked[linked];
430 
431 						ureg_IF(ureg, _src(0), &label);
432 						label_to_sm4_insn_num.push_back(std::make_pair(label, linked));
433 
434 						translate_insns(linked + 1, endif);
435 
436 						sm4_to_tgsi_insn_num[linked] = ureg_get_instruction_number(ureg);
437 						ureg_ELSE(ureg, &label);
438 						label_to_sm4_insn_num.push_back(std::make_pair(label, endif));
439 
440 						translate_insns(insn_num + 1, linked);
441 
442 						insn_num = endif - 1;
443 						goto next;
444 					}
445 				}
446 				break;
447 			case SM4_OPCODE_RESINFO:
448 				// TODO: return type
449 				ureg_SVIEWINFO(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)]);
450 				break;
451 			// TODO: sample index, texture offset
452 			case SM4_OPCODE_LD: // dst, coord_int, res; mipmap level in last coord_int arg
453 				ureg_LOAD(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)]);
454 				break;
455 			case SM4_OPCODE_LD_MS:
456 				ureg_LOAD_MS(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)]);
457 				break;
458 			case SM4_OPCODE_SAMPLE: // dst, coord, res, samp
459 				ureg_SAMPLE(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)]);
460 				break;
461 			case SM4_OPCODE_SAMPLE_B: // dst, coord, res, samp, bias.x
462 				ureg_SAMPLE_B(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)], _src(4));
463 				break;
464 			case SM4_OPCODE_SAMPLE_C: // dst, coord, res, samp, comp.x
465 				ureg_SAMPLE_C(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)], _src(4));
466 				break;
467 			case SM4_OPCODE_SAMPLE_C_LZ: // dst, coord, res, samp, comp.x
468 				ureg_SAMPLE_C_LZ(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)], _src(4));
469 				break;
470 			case SM4_OPCODE_SAMPLE_D: // dst, coord, res, samp, ddx, ddy
471 				ureg_SAMPLE_D(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)], _src(4), _src(5));
472 				break;
473 			case SM4_OPCODE_SAMPLE_L: // dst, coord, res, samp, bias.x
474 			{
475 				struct ureg_dst tmp = _tmp();
476 				ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), _src(1));
477 				ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_swizzle(_src(4), 0, 0, 0, 0));
478 				ureg_SAMPLE_L(ureg, _dst(), ureg_src(tmp), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)]);
479 				break;
480 			}
481 			default:
482 				ok = false;
483 				break;
484 			}
485 
486 			if(!ok && !avoid_int)
487 			{
488 				ok = true;
489 				switch(insn->opcode)
490 				{
491 				// integer
492 				OP1_(ITOF, I2F);
493 				OP1_(FTOI, F2I);
494 				OP2_(IADD, UADD);
495 				OP1(INEG);
496 				OP2_(IMUL, UMUL);
497 				OP3_(IMAD, UMAD);
498 				OP2_(ISHL, SHL);
499 				OP2_(ISHR, ISHR);
500 				OP2(IMIN);
501 				OP2(IMAX);
502 				OP2_(ILT, ISLT);
503 				OP2_(IGE, ISGE);
504 				OP2_(IEQ, USEQ);
505 				OP2_(INE, USNE);
506 
507 				// unsigned
508 				OP1_(UTOF, U2F);
509 				OP1_(FTOU, F2U);
510 				OP2(UMUL);
511 				OP3(UMAD);
512 				OP2(UMIN);
513 				OP2(UMAX);
514 				OP2_(ULT, USLT);
515 				OP2_(UGE, USGE);
516 				OP2(USHR);
517 
518 				case SM4_OPCODE_UDIV:
519 				{
520 					struct ureg_dst q = _dst(0);
521 					struct ureg_dst r = _dst(1);
522 					struct ureg_src a = _src(2);
523 					struct ureg_src b = _src(3);
524 					if(q.File != TGSI_FILE_NULL)
525 						ureg_UDIV(ureg, q, a, b);
526 					if(r.File != TGSI_FILE_NULL)
527 						ureg_UMOD(ureg, r, a, b);
528 					break;
529 				}
530 				default:
531 					ok = false;
532 				}
533 			}
534 
535 			if(!ok && avoid_int)
536 			{
537 				ok = true;
538 				switch(insn->opcode)
539 				{
540 				case SM4_OPCODE_ITOF:
541 				case SM4_OPCODE_UTOF:
542 					break;
543 				OP1_(FTOI, TRUNC);
544 				OP1_(FTOU, FLR);
545 				// integer
546 				OP2_(IADD, ADD);
547 				OP2_(IMUL, MUL);
548 				OP3_(IMAD, MAD);
549 				OP2_(MIN, MIN);
550 				OP2_(MAX, MAX);
551 				OP2_(ILT, SLT);
552 				OP2_(IGE, SGE);
553 				OP2_(IEQ, SEQ);
554 				OP2_(INE, SNE);
555 
556 				// unsigned
557 				OP2_(UMUL, MUL);
558 				OP3_(UMAD, MAD);
559 				OP2_(UMIN, MIN);
560 				OP2_(UMAX, MAX);
561 				OP2_(ULT, SLT);
562 				OP2_(UGE, SGE);
563 
564 				case SM4_OPCODE_INEG:
565 					ureg_MOV(ureg, _dst(), ureg_negate(_src(1)));
566 					break;
567 				case SM4_OPCODE_ISHL:
568 				{
569 					struct ureg_dst d = _dst();
570 					struct ureg_dst t = _tmp(d);
571 					ureg_EX2(ureg, t, _src(2));
572 					ureg_MUL(ureg, d, ureg_src(t), _src(1));
573 					break;
574 				}
575 				case SM4_OPCODE_ISHR:
576 				case SM4_OPCODE_USHR:
577 				{
578 					struct ureg_dst d = _dst();
579 					struct ureg_dst t = _tmp(d);
580 					ureg_EX2(ureg, t, ureg_negate(_src(2)));
581 					ureg_MUL(ureg, t, ureg_src(t), _src(1));
582 					ureg_FLR(ureg, d, ureg_src(t));
583 					break;
584 				}
585 				case SM4_OPCODE_UDIV:
586 				{
587 					struct ureg_dst q = _dst(0);
588 					struct ureg_dst r = _dst(1);
589 					struct ureg_src a = _src(2);
590 					struct ureg_src b = _src(3);
591 					struct ureg_dst f = _tmp();
592 					ureg_DIV(ureg, f, a, b);
593 					if(q.File != TGSI_FILE_NULL)
594 						ureg_FLR(ureg, q, ureg_src(f));
595 					if(r.File != TGSI_FILE_NULL)
596 					{
597 						ureg_FRC(ureg, f, ureg_src(f));
598 						ureg_MUL(ureg, r, ureg_src(f), b);
599 					}
600 					break;
601 				}
602 				default:
603 					ok = false;
604 				}
605 			}
606 
607 			check(ok);
608 
609 			if(!insn_tmps.empty())
610 			{
611 				for(unsigned i = 0; i < insn_tmps.size(); ++i)
612 					ureg_release_temporary(ureg, insn_tmps[i]);
613 				insn_tmps.clear();
614 			}
615 next:;
616 		}
617 	}
618 
do_translatesm4_to_tgsi_converter619 	void* do_translate()
620 	{
621 		unsigned processor;
622 		switch(program.version.type)
623 		{
624 		case 0:
625 			processor = TGSI_PROCESSOR_FRAGMENT;
626 			break;
627 		case 1:
628 			processor = TGSI_PROCESSOR_VERTEX;
629 			break;
630 		case 2:
631 			processor = TGSI_PROCESSOR_GEOMETRY;
632 			break;
633 		default:
634 			fail("Tessellation and compute shaders not yet supported");
635 			return 0;
636 		}
637 
638 		if(!sm4_link_cf_insns(program))
639 			fail("Malformed control flow");
640 		if(!sm4_find_labels(program))
641 			fail("Failed to locate labels");
642 
643 		ureg = ureg_create(processor);
644 
645 		in_sub = false;
646 
647 		sm4_to_tgsi_insn_num.resize(program.insns.size());
648 		for(unsigned insn_num = 0; insn_num < program.dcls.size(); ++insn_num)
649 		{
650 			sm4_dcl& dcl = *program.dcls[insn_num];
651 			int idx = -1;
652 			if(dcl.op.get() && dcl.op->is_index_simple(0))
653 				idx = dcl.op->indices[0].disp;
654 			switch(dcl.opcode)
655 			{
656 			case SM4_OPCODE_DCL_GLOBAL_FLAGS:
657 				break;
658 			case SM4_OPCODE_DCL_TEMPS:
659 				for(unsigned i = 0; i < dcl.num; ++i)
660 					temps.push_back(ureg_DECL_temporary(ureg));
661 				break;
662 			case SM4_OPCODE_DCL_INPUT:
663 				check(idx >= 0);
664 				if(processor == TGSI_PROCESSOR_VERTEX)
665 				{
666 					if(inputs.size() <= (unsigned)idx)
667 						inputs.resize(idx + 1);
668 					inputs[idx] = ureg_DECL_vs_input(ureg, idx);
669 				}
670 				else if(processor == TGSI_PROCESSOR_GEOMETRY)
671 				{
672 					// TODO: is this correct?
673 					unsigned gsidx = dcl.op->indices[1].disp;
674 					if(inputs.size() <= (unsigned)gsidx)
675 						inputs.resize(gsidx + 1);
676 					inputs[gsidx] = ureg_DECL_gs_input(ureg, gsidx, TGSI_SEMANTIC_GENERIC, gsidx);
677 				}
678 				else
679 					check(0);
680 				break;
681 			case SM4_OPCODE_DCL_INPUT_PS:
682 				check(idx >= 0);
683 				if(inputs.size() <= (unsigned)idx)
684 					inputs.resize(idx + 1);
685 				inputs[idx] = ureg_DECL_fs_input_cyl_centroid(ureg, TGSI_SEMANTIC_GENERIC, idx, sm4_to_pipe_interpolation[dcl.dcl_input_ps.interpolation].interpolation, 0, sm4_to_pipe_interpolation[dcl.dcl_input_ps.interpolation].centroid);
686 				break;
687 			case SM4_OPCODE_DCL_OUTPUT:
688 				check(idx >= 0);
689 				if(outputs.size() <= (unsigned)idx)
690 					outputs.resize(idx + 1);
691 				if(processor == TGSI_PROCESSOR_FRAGMENT)
692 					outputs[idx] = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, idx);
693 				else
694 					outputs[idx] = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, idx);
695 				break;
696 			case SM4_OPCODE_DCL_INPUT_SIV:
697 			case SM4_OPCODE_DCL_INPUT_SGV:
698 			case SM4_OPCODE_DCL_INPUT_PS_SIV:
699 			case SM4_OPCODE_DCL_INPUT_PS_SGV:
700 				check(idx >= 0);
701 				if(inputs.size() <= (unsigned)idx)
702 					inputs.resize(idx + 1);
703 				// TODO: is this correct?
704 				inputs[idx] = ureg_DECL_system_value(ureg, idx, sm4_to_pipe_sv[dcl.sv], 0);
705 				break;
706 			case SM4_OPCODE_DCL_OUTPUT_SIV:
707 			case SM4_OPCODE_DCL_OUTPUT_SGV:
708 				check(idx >= 0);
709 				if(outputs.size() <= (unsigned)idx)
710 					outputs.resize(idx + 1);
711 				check(sm4_to_pipe_sv[dcl.sv] >= 0);
712 				outputs[idx] = ureg_DECL_output(ureg, sm4_to_pipe_sv[dcl.sv], 0);
713 				break;
714 			case SM4_OPCODE_DCL_RESOURCE:
715 				check(idx >= 0);
716 				if(targets.size() <= (unsigned)idx)
717 					targets.resize(idx + 1);
718 				switch(dcl.dcl_resource.target)
719 				{
720 				case SM4_TARGET_TEXTURE1D:
721 					targets[idx].first = TGSI_TEXTURE_1D;
722 					targets[idx].second = TGSI_TEXTURE_SHADOW1D;
723 					break;
724 				case SM4_TARGET_TEXTURE1DARRAY:
725 					targets[idx].first = TGSI_TEXTURE_1D_ARRAY;
726 					targets[idx].second = TGSI_TEXTURE_SHADOW1D_ARRAY;
727 					break;
728 				case SM4_TARGET_TEXTURE2D:
729 					targets[idx].first = TGSI_TEXTURE_2D;
730 					targets[idx].second = TGSI_TEXTURE_SHADOW2D;
731 					break;
732 				case SM4_TARGET_TEXTURE2DARRAY:
733 					targets[idx].first = TGSI_TEXTURE_2D_ARRAY;
734 					targets[idx].second = TGSI_TEXTURE_SHADOW2D_ARRAY;
735 					break;
736 				case SM4_TARGET_TEXTURE3D:
737 					targets[idx].first = TGSI_TEXTURE_3D;
738 					targets[idx].second = 0;
739 					break;
740 				case SM4_TARGET_TEXTURECUBE:
741 					targets[idx].first = TGSI_TEXTURE_CUBE;
742 					targets[idx].second = 0;
743 					break;
744 				default:
745 					// HACK to make SimpleSample10 work
746 					//check(0);
747 					targets[idx].first = TGSI_TEXTURE_2D;
748 					targets[idx].second = TGSI_TEXTURE_SHADOW2D;
749 					break;
750 				}
751 				if(resources.size() <= (unsigned)idx)
752 					resources.resize(idx + 1);
753 				resources[idx] = ureg_DECL_sampler_view(
754                                    ureg, idx, targets[idx].first,
755                                    res_return_type(dcl.rrt.x),
756                                    res_return_type(dcl.rrt.y),
757                                    res_return_type(dcl.rrt.z),
758                                    res_return_type(dcl.rrt.w));
759 				break;
760 			case SM4_OPCODE_DCL_SAMPLER:
761 				check(idx >= 0);
762 				if(sampler_modes.size() <= (unsigned)idx)
763 					sampler_modes.resize(idx + 1);
764 				check(!dcl.dcl_sampler.mono);
765 				sampler_modes[idx] = dcl.dcl_sampler.shadow;
766 				if(samplers.size() <= (unsigned)idx)
767 					samplers.resize(idx + 1);
768 				samplers[idx] = ureg_DECL_sampler(ureg, idx);
769 				break;
770 			case SM4_OPCODE_DCL_CONSTANT_BUFFER:
771 				check(dcl.op->num_indices == 2);
772 				check(dcl.op->is_index_simple(0));
773 				check(dcl.op->is_index_simple(1));
774 				idx = dcl.op->indices[0].disp;
775 				ureg_DECL_constant2D(ureg, 0, (unsigned)dcl.op->indices[1].disp - 1, idx);
776 				break;
777 			case SM4_OPCODE_DCL_GS_INPUT_PRIMITIVE:
778 				ureg_property_gs_input_prim(ureg, d3d_to_pipe_prim_type[dcl.dcl_gs_input_primitive.primitive]);
779 				break;
780 			case SM4_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
781 				ureg_property_gs_output_prim(ureg, d3d_to_pipe_prim[dcl.dcl_gs_output_primitive_topology.primitive_topology]);
782 				break;
783 			case SM4_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
784 				ureg_property_gs_max_vertices(ureg, dcl.num);
785 				break;
786 			default:
787 				check(0);
788 			}
789 		}
790 
791 		translate_insns(0, program.insns.size());
792 		sm4_to_tgsi_insn_num.push_back(ureg_get_instruction_number(ureg));
793 		if(in_sub)
794 			ureg_ENDSUB(ureg);
795 		else
796 			ureg_END(ureg);
797 
798 		for(unsigned i = 0; i < label_to_sm4_insn_num.size(); ++i)
799 			ureg_fixup_label(ureg, label_to_sm4_insn_num[i].first, sm4_to_tgsi_insn_num[label_to_sm4_insn_num[i].second]);
800 
801 		const struct tgsi_token * tokens = ureg_get_tokens(ureg, 0);
802 		ureg_destroy(ureg);
803 		return (void*)tokens;
804 	}
805 
translatesm4_to_tgsi_converter806 	void* translate()
807 	{
808 		try
809 		{
810 			return do_translate();
811 		}
812 		catch(const char*)
813 		{
814 			return 0;
815 		}
816 	}
817 };
818 
sm4_to_tgsi(struct sm4_program & program)819 void* sm4_to_tgsi(struct sm4_program& program)
820 {
821 	sm4_to_tgsi_converter conv(program);
822 	return conv.translate();
823 }
824 
sm4_to_tgsi_linkage_only(struct sm4_program & prog)825 void* sm4_to_tgsi_linkage_only(struct sm4_program& prog)
826 {
827 	struct ureg_program* ureg = ureg_create(TGSI_PROCESSOR_GEOMETRY);
828 
829 	uint64_t already = 0;
830 	for(unsigned n = 0, i = 0; i < prog.num_params_out; ++i)
831 	{
832 		unsigned sn, si;
833 
834 		if(already & (1ULL << prog.params_out[i].Register))
835 			continue;
836 		already |= 1ULL << prog.params_out[i].Register;
837 
838 		switch(prog.params_out[i].SystemValueType)
839 		{
840 		case D3D_NAME_UNDEFINED:
841 			sn = TGSI_SEMANTIC_GENERIC;
842 			si = n++;
843 			break;
844 		case D3D_NAME_CULL_DISTANCE:
845 		case D3D_NAME_CLIP_DISTANCE:
846 			// FIXME
847 			sn = 0;
848 			si = prog.params_out[i].SemanticIndex;
849 			assert(0);
850 			break;
851 		default:
852 			continue;
853 		}
854 
855 		ureg_DECL_output(ureg, sn, si);
856 	}
857 
858 	const struct tgsi_token* tokens = ureg_get_tokens(ureg, 0);
859 	ureg_destroy(ureg);
860 	return (void*)tokens;
861 }
862