1 /*
2  * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3  * Copyright 2013 Christoph Bumiller
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * on the rights to use, copy, modify, merge, publish, distribute, sub
9  * license, and/or sell copies of the Software, and to permit persons to whom
10  * the Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23 
24 #include "nine_shader.h"
25 
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29 #include "vertexdeclaration9.h"
30 
31 #include "util/macros.h"
32 #include "util/u_memory.h"
33 #include "util/u_inlines.h"
34 #include "pipe/p_shader_tokens.h"
35 #include "tgsi/tgsi_ureg.h"
36 #include "tgsi/tgsi_dump.h"
37 #include "nir/tgsi_to_nir.h"
38 
39 #define DBG_CHANNEL DBG_SHADER
40 
41 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
42 
43 
44 struct shader_translator;
45 
46 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
47 
48 static inline const char *d3dsio_to_string(unsigned opcode);
49 
50 
51 #define NINED3D_SM1_VS 0xfffe
52 #define NINED3D_SM1_PS 0xffff
53 
54 #define NINE_MAX_COND_DEPTH 64
55 #define NINE_MAX_LOOP_DEPTH 64
56 
57 #define NINED3DSP_END 0x0000ffff
58 
59 #define NINED3DSPTYPE_FLOAT4  0
60 #define NINED3DSPTYPE_INT4    1
61 #define NINED3DSPTYPE_BOOL    2
62 
63 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
64 
65 #define NINED3DSP_WRITEMASK_MASK  D3DSP_WRITEMASK_ALL
66 #define NINED3DSP_WRITEMASK_SHIFT 16
67 
68 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
69 
70 #define NINED3DSHADER_REL_OP_GT 1
71 #define NINED3DSHADER_REL_OP_EQ 2
72 #define NINED3DSHADER_REL_OP_GE 3
73 #define NINED3DSHADER_REL_OP_LT 4
74 #define NINED3DSHADER_REL_OP_NE 5
75 #define NINED3DSHADER_REL_OP_LE 6
76 
77 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
78 #define NINED3DSIO_OPCODE_FLAGS_MASK  (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
79 
80 #define NINED3DSI_TEXLD_PROJECT 0x1
81 #define NINED3DSI_TEXLD_BIAS    0x2
82 
83 #define NINED3DSP_WRITEMASK_0   0x1
84 #define NINED3DSP_WRITEMASK_1   0x2
85 #define NINED3DSP_WRITEMASK_2   0x4
86 #define NINED3DSP_WRITEMASK_3   0x8
87 #define NINED3DSP_WRITEMASK_ALL 0xf
88 
89 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
90 
91 #define NINE_SWIZZLE4(x,y,z,w) \
92    TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
93 
94 #define NINE_APPLY_SWIZZLE(src, s) \
95    ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
96 
97 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
98 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
99 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
100 
101 /*
102  * NEG     all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
103  * BIAS    <= PS 1.4 (x-0.5)
104  * BIASNEG <= PS 1.4 (-(x-0.5))
105  * SIGN    <= PS 1.4 (2(x-0.5))
106  * SIGNNEG <= PS 1.4 (-2(x-0.5))
107  * COMP    <= PS 1.4 (1-x)
108  * X2       = PS 1.4 (2x)
109  * X2NEG    = PS 1.4 (-2x)
110  * DZ      <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
111  * DW      <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
112  * ABS     >= SM 3.0 (abs(x))
113  * ABSNEG  >= SM 3.0 (-abs(x))
114  * NOT     >= SM 2.0 pedication only
115  */
116 #define NINED3DSPSM_NONE    (D3DSPSM_NONE    >> D3DSP_SRCMOD_SHIFT)
117 #define NINED3DSPSM_NEG     (D3DSPSM_NEG     >> D3DSP_SRCMOD_SHIFT)
118 #define NINED3DSPSM_BIAS    (D3DSPSM_BIAS    >> D3DSP_SRCMOD_SHIFT)
119 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
120 #define NINED3DSPSM_SIGN    (D3DSPSM_SIGN    >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_COMP    (D3DSPSM_COMP    >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_X2      (D3DSPSM_X2      >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_X2NEG   (D3DSPSM_X2NEG   >> D3DSP_SRCMOD_SHIFT)
125 #define NINED3DSPSM_DZ      (D3DSPSM_DZ      >> D3DSP_SRCMOD_SHIFT)
126 #define NINED3DSPSM_DW      (D3DSPSM_DW      >> D3DSP_SRCMOD_SHIFT)
127 #define NINED3DSPSM_ABS     (D3DSPSM_ABS     >> D3DSP_SRCMOD_SHIFT)
128 #define NINED3DSPSM_ABSNEG  (D3DSPSM_ABSNEG  >> D3DSP_SRCMOD_SHIFT)
129 #define NINED3DSPSM_NOT     (D3DSPSM_NOT     >> D3DSP_SRCMOD_SHIFT)
130 
131 static const char *sm1_mod_str[] =
132 {
133     [NINED3DSPSM_NONE] = "",
134     [NINED3DSPSM_NEG] = "-",
135     [NINED3DSPSM_BIAS] = "bias",
136     [NINED3DSPSM_BIASNEG] = "biasneg",
137     [NINED3DSPSM_SIGN] = "sign",
138     [NINED3DSPSM_SIGNNEG] = "signneg",
139     [NINED3DSPSM_COMP] = "comp",
140     [NINED3DSPSM_X2] = "x2",
141     [NINED3DSPSM_X2NEG] = "x2neg",
142     [NINED3DSPSM_DZ] = "dz",
143     [NINED3DSPSM_DW] = "dw",
144     [NINED3DSPSM_ABS] = "abs",
145     [NINED3DSPSM_ABSNEG] = "-abs",
146     [NINED3DSPSM_NOT] = "not"
147 };
148 
149 static void
sm1_dump_writemask(BYTE mask)150 sm1_dump_writemask(BYTE mask)
151 {
152     if (mask & 1) DUMP("x"); else DUMP("_");
153     if (mask & 2) DUMP("y"); else DUMP("_");
154     if (mask & 4) DUMP("z"); else DUMP("_");
155     if (mask & 8) DUMP("w"); else DUMP("_");
156 }
157 
158 static void
sm1_dump_swizzle(BYTE s)159 sm1_dump_swizzle(BYTE s)
160 {
161     char c[4] = { 'x', 'y', 'z', 'w' };
162     DUMP("%c%c%c%c",
163          c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
164 }
165 
166 static const char sm1_file_char[] =
167 {
168     [D3DSPR_TEMP] = 'r',
169     [D3DSPR_INPUT] = 'v',
170     [D3DSPR_CONST] = 'c',
171     [D3DSPR_ADDR] = 'A',
172     [D3DSPR_RASTOUT] = 'R',
173     [D3DSPR_ATTROUT] = 'D',
174     [D3DSPR_OUTPUT] = 'o',
175     [D3DSPR_CONSTINT] = 'I',
176     [D3DSPR_COLOROUT] = 'C',
177     [D3DSPR_DEPTHOUT] = 'D',
178     [D3DSPR_SAMPLER] = 's',
179     [D3DSPR_CONST2] = 'c',
180     [D3DSPR_CONST3] = 'c',
181     [D3DSPR_CONST4] = 'c',
182     [D3DSPR_CONSTBOOL] = 'B',
183     [D3DSPR_LOOP] = 'L',
184     [D3DSPR_TEMPFLOAT16] = 'h',
185     [D3DSPR_MISCTYPE] = 'M',
186     [D3DSPR_LABEL] = 'X',
187     [D3DSPR_PREDICATE] = 'p'
188 };
189 
190 static void
sm1_dump_reg(BYTE file,INT index)191 sm1_dump_reg(BYTE file, INT index)
192 {
193     switch (file) {
194     case D3DSPR_LOOP:
195         DUMP("aL");
196         break;
197     case D3DSPR_COLOROUT:
198         DUMP("oC%i", index);
199         break;
200     case D3DSPR_DEPTHOUT:
201         DUMP("oDepth");
202         break;
203     case D3DSPR_RASTOUT:
204         DUMP("oRast%i", index);
205         break;
206     case D3DSPR_CONSTINT:
207         DUMP("iconst[%i]", index);
208         break;
209     case D3DSPR_CONSTBOOL:
210         DUMP("bconst[%i]", index);
211         break;
212     default:
213         DUMP("%c%i", sm1_file_char[file], index);
214         break;
215     }
216 }
217 
218 struct sm1_src_param
219 {
220     INT idx;
221     struct sm1_src_param *rel;
222     BYTE file;
223     BYTE swizzle;
224     BYTE mod;
225     BYTE type;
226     union {
227         DWORD d[4];
228         float f[4];
229         int i[4];
230         BOOL b;
231     } imm;
232 };
233 static void
234 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
235 
236 struct sm1_dst_param
237 {
238     INT idx;
239     struct sm1_src_param *rel;
240     BYTE file;
241     BYTE mask;
242     BYTE mod;
243     int8_t shift; /* sint4 */
244     BYTE type;
245 };
246 
247 static inline void
assert_replicate_swizzle(const struct ureg_src * reg)248 assert_replicate_swizzle(const struct ureg_src *reg)
249 {
250     assert(reg->SwizzleY == reg->SwizzleX &&
251            reg->SwizzleZ == reg->SwizzleX &&
252            reg->SwizzleW == reg->SwizzleX);
253 }
254 
255 static void
sm1_dump_immediate(const struct sm1_src_param * param)256 sm1_dump_immediate(const struct sm1_src_param *param)
257 {
258     switch (param->type) {
259     case NINED3DSPTYPE_FLOAT4:
260         DUMP("{ %f %f %f %f }",
261              param->imm.f[0], param->imm.f[1],
262              param->imm.f[2], param->imm.f[3]);
263         break;
264     case NINED3DSPTYPE_INT4:
265         DUMP("{ %i %i %i %i }",
266              param->imm.i[0], param->imm.i[1],
267              param->imm.i[2], param->imm.i[3]);
268         break;
269     case NINED3DSPTYPE_BOOL:
270         DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
271         break;
272     default:
273         assert(0);
274         break;
275     }
276 }
277 
278 static void
sm1_dump_src_param(const struct sm1_src_param * param)279 sm1_dump_src_param(const struct sm1_src_param *param)
280 {
281     if (param->file == NINED3DSPR_IMMEDIATE) {
282         assert(!param->mod &&
283                !param->rel &&
284                param->swizzle == NINED3DSP_NOSWIZZLE);
285         sm1_dump_immediate(param);
286         return;
287     }
288 
289     if (param->mod)
290         DUMP("%s(", sm1_mod_str[param->mod]);
291     if (param->rel) {
292         DUMP("%c[", sm1_file_char[param->file]);
293         sm1_dump_src_param(param->rel);
294         DUMP("+%i]", param->idx);
295     } else {
296         sm1_dump_reg(param->file, param->idx);
297     }
298     if (param->mod)
299        DUMP(")");
300     if (param->swizzle != NINED3DSP_NOSWIZZLE) {
301        DUMP(".");
302        sm1_dump_swizzle(param->swizzle);
303     }
304 }
305 
306 static void
sm1_dump_dst_param(const struct sm1_dst_param * param)307 sm1_dump_dst_param(const struct sm1_dst_param *param)
308 {
309    if (param->mod & NINED3DSPDM_SATURATE)
310       DUMP("sat ");
311    if (param->mod & NINED3DSPDM_PARTIALP)
312       DUMP("pp ");
313    if (param->mod & NINED3DSPDM_CENTROID)
314       DUMP("centroid ");
315    if (param->shift < 0)
316       DUMP("/%u ", 1 << -param->shift);
317    if (param->shift > 0)
318       DUMP("*%u ", 1 << param->shift);
319 
320    if (param->rel) {
321       DUMP("%c[", sm1_file_char[param->file]);
322       sm1_dump_src_param(param->rel);
323       DUMP("+%i]", param->idx);
324    } else {
325       sm1_dump_reg(param->file, param->idx);
326    }
327    if (param->mask != NINED3DSP_WRITEMASK_ALL) {
328       DUMP(".");
329       sm1_dump_writemask(param->mask);
330    }
331 }
332 
333 struct sm1_semantic
334 {
335    struct sm1_dst_param reg;
336    BYTE sampler_type;
337    D3DDECLUSAGE usage;
338    BYTE usage_idx;
339 };
340 
341 struct sm1_op_info
342 {
343     /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
344      * should be ignored completely */
345     unsigned sio;
346     unsigned opcode; /* TGSI_OPCODE_x */
347 
348     /* versions are still set even handler is set */
349     struct {
350         unsigned min;
351         unsigned max;
352     } vert_version, frag_version;
353 
354     /* number of regs parsed outside of special handler */
355     unsigned ndst;
356     unsigned nsrc;
357 
358     /* some instructions don't map perfectly, so use a special handler */
359     translate_instruction_func handler;
360 };
361 
362 struct sm1_instruction
363 {
364     D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
365     BYTE flags;
366     BOOL coissue;
367     BOOL predicated;
368     BYTE ndst;
369     BYTE nsrc;
370     struct sm1_src_param src[4];
371     struct sm1_src_param src_rel[4];
372     struct sm1_src_param pred;
373     struct sm1_src_param dst_rel[1];
374     struct sm1_dst_param dst[1];
375 
376     const struct sm1_op_info *info;
377 };
378 
379 static void
sm1_dump_instruction(struct sm1_instruction * insn,unsigned indent)380 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
381 {
382     unsigned i;
383 
384     /* no info stored for these: */
385     if (insn->opcode == D3DSIO_DCL)
386         return;
387     for (i = 0; i < indent; ++i)
388         DUMP("  ");
389 
390     if (insn->predicated) {
391         DUMP("@");
392         sm1_dump_src_param(&insn->pred);
393         DUMP(" ");
394     }
395     DUMP("%s", d3dsio_to_string(insn->opcode));
396     if (insn->flags) {
397         switch (insn->opcode) {
398         case D3DSIO_TEX:
399             DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
400             break;
401         default:
402             DUMP("_%x", insn->flags);
403             break;
404         }
405     }
406     if (insn->coissue)
407         DUMP("_co");
408     DUMP(" ");
409 
410     for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) {
411         sm1_dump_dst_param(&insn->dst[i]);
412         DUMP(" ");
413     }
414 
415     for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) {
416         sm1_dump_src_param(&insn->src[i]);
417         DUMP(" ");
418     }
419     if (insn->opcode == D3DSIO_DEF ||
420         insn->opcode == D3DSIO_DEFI ||
421         insn->opcode == D3DSIO_DEFB)
422         sm1_dump_immediate(&insn->src[0]);
423 
424     DUMP("\n");
425 }
426 
427 struct sm1_local_const
428 {
429     INT idx;
430     struct ureg_src reg;
431     float f[4]; /* for indirect addressing of float constants */
432 };
433 
434 struct shader_translator
435 {
436     const DWORD *byte_code;
437     const DWORD *parse;
438     const DWORD *parse_next;
439 
440     struct ureg_program *ureg;
441 
442     /* shader version */
443     struct {
444         BYTE major;
445         BYTE minor;
446     } version;
447     unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */
448     unsigned num_constf_allowed;
449     unsigned num_consti_allowed;
450     unsigned num_constb_allowed;
451 
452     boolean native_integers;
453     boolean inline_subroutines;
454     boolean want_texcoord;
455     boolean shift_wpos;
456     boolean wpos_is_sysval;
457     boolean face_is_sysval_integer;
458     boolean mul_zero_wins;
459     unsigned texcoord_sn;
460 
461     struct sm1_instruction insn; /* current instruction */
462 
463     struct {
464         struct ureg_dst *r;
465         struct ureg_dst oPos;
466         struct ureg_dst oPos_out; /* the real output when doing streamout */
467         struct ureg_dst oFog;
468         struct ureg_dst oPts;
469         struct ureg_dst oCol[4];
470         struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
471         struct ureg_dst oDepth;
472         struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
473         struct ureg_src v_consecutive; /* copy in temp array of ps inputs for rel addressing */
474         struct ureg_src vPos;
475         struct ureg_src vFace;
476         struct ureg_src s;
477         struct ureg_dst p;
478         struct ureg_dst address;
479         struct ureg_dst a0;
480         struct ureg_dst predicate;
481         struct ureg_dst predicate_tmp;
482         struct ureg_dst predicate_dst;
483         struct ureg_dst tS[8]; /* texture stage registers */
484         struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
485         struct ureg_dst t[8]; /* scratch TEMPs */
486         struct ureg_src vC[2]; /* PS color in */
487         struct ureg_src vT[8]; /* PS texcoord in */
488         struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
489     } regs;
490     unsigned num_temp; /* ARRAY_SIZE(regs.r) */
491     unsigned num_scratch;
492     unsigned loop_depth;
493     unsigned loop_depth_max;
494     unsigned cond_depth;
495     unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
496     unsigned cond_labels[NINE_MAX_COND_DEPTH];
497     boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
498     boolean predicated_activated;
499 
500     unsigned *inst_labels; /* LABEL op */
501     unsigned num_inst_labels;
502 
503     unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
504 
505     struct sm1_local_const *lconstf;
506     unsigned num_lconstf;
507     struct sm1_local_const *lconsti;
508     unsigned num_lconsti;
509     struct sm1_local_const *lconstb;
510     unsigned num_lconstb;
511 
512     boolean slots_used[NINE_MAX_CONST_ALL];
513     unsigned *slot_map;
514     unsigned num_slots;
515 
516     boolean indirect_const_access;
517     boolean failure;
518 
519     struct nine_vs_output_info output_info[16];
520     int num_outputs;
521 
522     struct nine_shader_info *info;
523 
524     int16_t op_info_map[D3DSIO_BREAKP + 1];
525 };
526 
527 #define IS_VS (tx->processor == PIPE_SHADER_VERTEX)
528 #define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT)
529 
530 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
531 
532 static void
533 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
534 
535 static void
sm1_instruction_check(const struct sm1_instruction * insn)536 sm1_instruction_check(const struct sm1_instruction *insn)
537 {
538     if (insn->opcode == D3DSIO_CRS)
539     {
540         if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
541         {
542             DBG("CRS.mask.w\n");
543         }
544     }
545 }
546 
547 static void
nine_record_outputs(struct shader_translator * tx,BYTE Usage,BYTE UsageIndex,int mask,int output_index)548 nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex,
549                     int mask, int output_index)
550 {
551     tx->output_info[tx->num_outputs].output_semantic = Usage;
552     tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex;
553     tx->output_info[tx->num_outputs].mask = mask;
554     tx->output_info[tx->num_outputs].output_index = output_index;
555     tx->num_outputs++;
556 }
557 
nine_float_constant_src(struct shader_translator * tx,int idx)558 static struct ureg_src nine_float_constant_src(struct shader_translator *tx, int idx)
559 {
560     struct ureg_src src;
561 
562     if (tx->slot_map)
563         idx = tx->slot_map[idx];
564     /* vswp constant handling: we use two buffers
565      * to fit all the float constants. The special handling
566      * doesn't need to be elsewhere, because all the instructions
567      * accessing the constants directly are VS1, and swvp
568      * is VS >= 2 */
569     if (tx->info->swvp_on && idx >= 4096) {
570         /* TODO: swvp rel is broken if many constants are used */
571         src = ureg_src_register(TGSI_FILE_CONSTANT, idx - 4096);
572         src = ureg_src_dimension(src, 1);
573     } else {
574         src = ureg_src_register(TGSI_FILE_CONSTANT, idx);
575         src = ureg_src_dimension(src, 0);
576     }
577 
578     if (!tx->info->swvp_on)
579         tx->slots_used[idx] = TRUE;
580     if (tx->info->const_float_slots < (idx + 1))
581         tx->info->const_float_slots = idx + 1;
582     if (tx->num_slots < (idx + 1))
583         tx->num_slots = idx + 1;
584 
585     return src;
586 }
587 
nine_integer_constant_src(struct shader_translator * tx,int idx)588 static struct ureg_src nine_integer_constant_src(struct shader_translator *tx, int idx)
589 {
590     struct ureg_src src;
591 
592     if (tx->info->swvp_on) {
593         src = ureg_src_register(TGSI_FILE_CONSTANT, idx);
594         src = ureg_src_dimension(src, 2);
595     } else {
596         unsigned slot_idx = tx->info->const_i_base + idx;
597         if (tx->slot_map)
598             slot_idx = tx->slot_map[slot_idx];
599         src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
600         src = ureg_src_dimension(src, 0);
601         tx->slots_used[slot_idx] = TRUE;
602         tx->info->int_slots_used[idx] = TRUE;
603         if (tx->num_slots < (slot_idx + 1))
604             tx->num_slots = slot_idx + 1;
605     }
606 
607     if (tx->info->const_int_slots < (idx + 1))
608         tx->info->const_int_slots = idx + 1;
609 
610     return src;
611 }
612 
nine_boolean_constant_src(struct shader_translator * tx,int idx)613 static struct ureg_src nine_boolean_constant_src(struct shader_translator *tx, int idx)
614 {
615     struct ureg_src src;
616 
617     char r = idx / 4;
618     char s = idx & 3;
619 
620     if (tx->info->swvp_on) {
621         src = ureg_src_register(TGSI_FILE_CONSTANT, r);
622         src = ureg_src_dimension(src, 3);
623     } else {
624         unsigned slot_idx = tx->info->const_b_base + r;
625         if (tx->slot_map)
626             slot_idx = tx->slot_map[slot_idx];
627         src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
628         src = ureg_src_dimension(src, 0);
629         tx->slots_used[slot_idx] = TRUE;
630         tx->info->bool_slots_used[idx] = TRUE;
631         if (tx->num_slots < (slot_idx + 1))
632             tx->num_slots = slot_idx + 1;
633     }
634     src = ureg_swizzle(src, s, s, s, s);
635 
636     if (tx->info->const_bool_slots < (idx + 1))
637         tx->info->const_bool_slots = idx + 1;
638 
639     return src;
640 }
641 
642 static boolean
tx_lconstf(struct shader_translator * tx,struct ureg_src * src,INT index)643 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
644 {
645    INT i;
646 
647    if (index < 0 || index >= tx->num_constf_allowed) {
648        tx->failure = TRUE;
649        return FALSE;
650    }
651    for (i = 0; i < tx->num_lconstf; ++i) {
652       if (tx->lconstf[i].idx == index) {
653          *src = tx->lconstf[i].reg;
654          return TRUE;
655       }
656    }
657    return FALSE;
658 }
659 static boolean
tx_lconsti(struct shader_translator * tx,struct ureg_src * src,INT index)660 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
661 {
662    int i;
663 
664    if (index < 0 || index >= tx->num_consti_allowed) {
665        tx->failure = TRUE;
666        return FALSE;
667    }
668    for (i = 0; i < tx->num_lconsti; ++i) {
669       if (tx->lconsti[i].idx == index) {
670          *src = tx->lconsti[i].reg;
671          return TRUE;
672       }
673    }
674    return FALSE;
675 }
676 static boolean
tx_lconstb(struct shader_translator * tx,struct ureg_src * src,INT index)677 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
678 {
679    int i;
680 
681    if (index < 0 || index >= tx->num_constb_allowed) {
682        tx->failure = TRUE;
683        return FALSE;
684    }
685    for (i = 0; i < tx->num_lconstb; ++i) {
686       if (tx->lconstb[i].idx == index) {
687          *src = tx->lconstb[i].reg;
688          return TRUE;
689       }
690    }
691    return FALSE;
692 }
693 
694 static void
tx_set_lconstf(struct shader_translator * tx,INT index,float f[4])695 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
696 {
697     unsigned n;
698 
699     FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
700 
701     for (n = 0; n < tx->num_lconstf; ++n)
702         if (tx->lconstf[n].idx == index)
703             break;
704     if (n == tx->num_lconstf) {
705        if ((n % 8) == 0) {
706           tx->lconstf = REALLOC(tx->lconstf,
707                                 (n + 0) * sizeof(tx->lconstf[0]),
708                                 (n + 8) * sizeof(tx->lconstf[0]));
709           assert(tx->lconstf);
710        }
711        tx->num_lconstf++;
712     }
713     tx->lconstf[n].idx = index;
714     tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
715 
716     memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f));
717 }
718 static void
tx_set_lconsti(struct shader_translator * tx,INT index,int i[4])719 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
720 {
721     unsigned n;
722 
723     FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
724 
725     for (n = 0; n < tx->num_lconsti; ++n)
726         if (tx->lconsti[n].idx == index)
727             break;
728     if (n == tx->num_lconsti) {
729        if ((n % 8) == 0) {
730           tx->lconsti = REALLOC(tx->lconsti,
731                                 (n + 0) * sizeof(tx->lconsti[0]),
732                                 (n + 8) * sizeof(tx->lconsti[0]));
733           assert(tx->lconsti);
734        }
735        tx->num_lconsti++;
736     }
737 
738     tx->lconsti[n].idx = index;
739     tx->lconsti[n].reg = tx->native_integers ?
740        ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
741        ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
742 }
743 static void
tx_set_lconstb(struct shader_translator * tx,INT index,BOOL b)744 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
745 {
746     unsigned n;
747 
748     FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
749 
750     for (n = 0; n < tx->num_lconstb; ++n)
751         if (tx->lconstb[n].idx == index)
752             break;
753     if (n == tx->num_lconstb) {
754        if ((n % 8) == 0) {
755           tx->lconstb = REALLOC(tx->lconstb,
756                                 (n + 0) * sizeof(tx->lconstb[0]),
757                                 (n + 8) * sizeof(tx->lconstb[0]));
758           assert(tx->lconstb);
759        }
760        tx->num_lconstb++;
761     }
762 
763     tx->lconstb[n].idx = index;
764     tx->lconstb[n].reg = tx->native_integers ?
765        ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
766        ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
767 }
768 
769 static inline struct ureg_dst
tx_scratch(struct shader_translator * tx)770 tx_scratch(struct shader_translator *tx)
771 {
772     if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) {
773         tx->failure = TRUE;
774         return tx->regs.t[0];
775     }
776     if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
777         tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
778     return tx->regs.t[tx->num_scratch++];
779 }
780 
781 static inline struct ureg_dst
tx_scratch_scalar(struct shader_translator * tx)782 tx_scratch_scalar(struct shader_translator *tx)
783 {
784     return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
785 }
786 
787 static inline struct ureg_src
tx_src_scalar(struct ureg_dst dst)788 tx_src_scalar(struct ureg_dst dst)
789 {
790     struct ureg_src src = ureg_src(dst);
791     int c = ffs(dst.WriteMask) - 1;
792     if (dst.WriteMask == (1 << c))
793         src = ureg_scalar(src, c);
794     return src;
795 }
796 
797 static inline void
tx_temp_alloc(struct shader_translator * tx,INT idx)798 tx_temp_alloc(struct shader_translator *tx, INT idx)
799 {
800     assert(idx >= 0);
801     if (idx >= tx->num_temp) {
802        unsigned k = tx->num_temp;
803        unsigned n = idx + 1;
804        tx->regs.r = REALLOC(tx->regs.r,
805                             k * sizeof(tx->regs.r[0]),
806                             n * sizeof(tx->regs.r[0]));
807        for (; k < n; ++k)
808           tx->regs.r[k] = ureg_dst_undef();
809        tx->num_temp = n;
810     }
811     if (ureg_dst_is_undef(tx->regs.r[idx]))
812         tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
813 }
814 
815 static inline void
tx_addr_alloc(struct shader_translator * tx,INT idx)816 tx_addr_alloc(struct shader_translator *tx, INT idx)
817 {
818     assert(idx == 0);
819     if (ureg_dst_is_undef(tx->regs.address))
820         tx->regs.address = ureg_DECL_address(tx->ureg);
821     if (ureg_dst_is_undef(tx->regs.a0))
822         tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
823 }
824 
825 /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
826  * the projection should be applied on the texture. It doesn't
827  * apply on texkill.
828  * The doc is very imprecise here (it says the projection is done
829  * before rasterization, thus in vs, which seems wrong since ps instructions
830  * are affected differently)
831  * For now we only apply to the ps TEX instruction and TEXBEM.
832  * Perhaps some other instructions would need it */
833 static inline void
apply_ps1x_projection(struct shader_translator * tx,struct ureg_dst dst,struct ureg_src src,INT idx)834 apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
835                       struct ureg_src src, INT idx)
836 {
837     struct ureg_dst tmp;
838     unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
839 
840     /* no projection */
841     if (dim == 1) {
842         ureg_MOV(tx->ureg, dst, src);
843     } else {
844         tmp = tx_scratch_scalar(tx);
845         ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
846         ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
847     }
848 }
849 
850 static inline void
TEX_with_ps1x_projection(struct shader_translator * tx,struct ureg_dst dst,unsigned target,struct ureg_src src0,struct ureg_src src1,INT idx)851 TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
852                          unsigned target, struct ureg_src src0,
853                          struct ureg_src src1, INT idx)
854 {
855     unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
856     struct ureg_dst tmp;
857     boolean shadow = !!(tx->info->sampler_mask_shadow & (1 << idx));
858 
859     /* dim == 1: no projection
860      * Looks like must be disabled when it makes no
861      * sense according the texture dimensions
862      */
863     if (dim == 1 || (dim <= target && !shadow)) {
864         ureg_TEX(tx->ureg, dst, target, src0, src1);
865     } else if (dim == 4) {
866         ureg_TXP(tx->ureg, dst, target, src0, src1);
867     } else {
868         tmp = tx_scratch(tx);
869         apply_ps1x_projection(tx, tmp, src0, idx);
870         ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
871     }
872 }
873 
874 static inline void
tx_texcoord_alloc(struct shader_translator * tx,INT idx)875 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
876 {
877     assert(IS_PS);
878     assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT));
879     if (ureg_src_is_undef(tx->regs.vT[idx]))
880        tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
881                                              TGSI_INTERPOLATE_PERSPECTIVE);
882 }
883 
884 static inline unsigned *
tx_bgnloop(struct shader_translator * tx)885 tx_bgnloop(struct shader_translator *tx)
886 {
887     tx->loop_depth++;
888     if (tx->loop_depth_max < tx->loop_depth)
889         tx->loop_depth_max = tx->loop_depth;
890     assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
891     return &tx->loop_labels[tx->loop_depth - 1];
892 }
893 
894 static inline unsigned *
tx_endloop(struct shader_translator * tx)895 tx_endloop(struct shader_translator *tx)
896 {
897     assert(tx->loop_depth);
898     tx->loop_depth--;
899     ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
900                      ureg_get_instruction_number(tx->ureg));
901     return &tx->loop_labels[tx->loop_depth];
902 }
903 
904 static struct ureg_dst
tx_get_loopctr(struct shader_translator * tx,boolean loop_or_rep)905 tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
906 {
907     const unsigned l = tx->loop_depth - 1;
908 
909     if (!tx->loop_depth)
910     {
911         DBG("loop counter requested outside of loop\n");
912         return ureg_dst_undef();
913     }
914 
915     if (ureg_dst_is_undef(tx->regs.rL[l])) {
916         /* loop or rep ctr creation */
917         tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
918         tx->loop_or_rep[l] = loop_or_rep;
919     }
920     /* loop - rep - endloop - endrep not allowed */
921     assert(tx->loop_or_rep[l] == loop_or_rep);
922 
923     return tx->regs.rL[l];
924 }
925 
926 static struct ureg_src
tx_get_loopal(struct shader_translator * tx)927 tx_get_loopal(struct shader_translator *tx)
928 {
929     int loop_level = tx->loop_depth - 1;
930 
931     while (loop_level >= 0) {
932         /* handle loop - rep - endrep - endloop case */
933         if (tx->loop_or_rep[loop_level])
934             /* the value is in the loop counter y component (nine implementation) */
935             return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
936         loop_level--;
937     }
938 
939     DBG("aL counter requested outside of loop\n");
940     return ureg_src_undef();
941 }
942 
943 static inline unsigned *
tx_cond(struct shader_translator * tx)944 tx_cond(struct shader_translator *tx)
945 {
946    assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
947    tx->cond_depth++;
948    return &tx->cond_labels[tx->cond_depth - 1];
949 }
950 
951 static inline unsigned *
tx_elsecond(struct shader_translator * tx)952 tx_elsecond(struct shader_translator *tx)
953 {
954    assert(tx->cond_depth);
955    return &tx->cond_labels[tx->cond_depth - 1];
956 }
957 
958 static inline void
tx_endcond(struct shader_translator * tx)959 tx_endcond(struct shader_translator *tx)
960 {
961    assert(tx->cond_depth);
962    tx->cond_depth--;
963    ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
964                     ureg_get_instruction_number(tx->ureg));
965 }
966 
967 static inline struct ureg_dst
nine_ureg_dst_register(unsigned file,int index)968 nine_ureg_dst_register(unsigned file, int index)
969 {
970     return ureg_dst(ureg_src_register(file, index));
971 }
972 
973 static inline struct ureg_src
nine_get_position_input(struct shader_translator * tx)974 nine_get_position_input(struct shader_translator *tx)
975 {
976     struct ureg_program *ureg = tx->ureg;
977 
978     if (tx->wpos_is_sysval)
979         return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
980     else
981         return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION,
982                                   0, TGSI_INTERPOLATE_LINEAR);
983 }
984 
985 static struct ureg_src
tx_src_param(struct shader_translator * tx,const struct sm1_src_param * param)986 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
987 {
988     struct ureg_program *ureg = tx->ureg;
989     struct ureg_src src;
990     struct ureg_dst tmp;
991 
992     assert(!param->rel || (IS_VS && param->file == D3DSPR_CONST) ||
993         (D3DSPR_ADDR && tx->version.major == 3));
994 
995     switch (param->file)
996     {
997     case D3DSPR_TEMP:
998         tx_temp_alloc(tx, param->idx);
999         src = ureg_src(tx->regs.r[param->idx]);
1000         break;
1001  /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1002     case D3DSPR_ADDR:
1003         if (IS_VS) {
1004             assert(param->idx == 0);
1005             /* the address register (vs only) must be
1006              * assigned before use */
1007             assert(!ureg_dst_is_undef(tx->regs.a0));
1008             /* Round to lowest for vs1.1 (contrary to the doc), else
1009              * round to nearest */
1010             if (tx->version.major < 2 && tx->version.minor < 2)
1011                 ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
1012             else
1013                 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
1014             src = ureg_src(tx->regs.address);
1015         } else {
1016             if (tx->version.major < 2 && tx->version.minor < 4) {
1017                 /* no subroutines, so should be defined */
1018                 src = ureg_src(tx->regs.tS[param->idx]);
1019             } else {
1020                 tx_texcoord_alloc(tx, param->idx);
1021                 src = tx->regs.vT[param->idx];
1022             }
1023         }
1024         break;
1025     case D3DSPR_INPUT:
1026         if (IS_VS) {
1027             src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1028         } else {
1029             if (tx->version.major < 3) {
1030                 src = ureg_DECL_fs_input_cyl_centroid(
1031                     ureg, TGSI_SEMANTIC_COLOR, param->idx,
1032                     TGSI_INTERPOLATE_COLOR, 0,
1033                     tx->info->force_color_in_centroid ?
1034                       TGSI_INTERPOLATE_LOC_CENTROID : 0,
1035                     0, 1);
1036             } else {
1037                 if(param->rel) {
1038                     /* Copy all inputs (non consecutive)
1039                      * to temp array (consecutive).
1040                      * This is not good for performance.
1041                      * A better way would be to have inputs
1042                      * consecutive (would need implement alternative
1043                      * way to match vs outputs and ps inputs).
1044                      * However even with the better way, the temp array
1045                      * copy would need to be used if some inputs
1046                      * are not GENERIC or if they have different
1047                      * interpolation flag. */
1048                     if (ureg_src_is_undef(tx->regs.v_consecutive)) {
1049                         int i;
1050                         tx->regs.v_consecutive = ureg_src(ureg_DECL_array_temporary(ureg, 10, 0));
1051                         for (i = 0; i < 10; i++) {
1052                             if (!ureg_src_is_undef(tx->regs.v[i]))
1053                                 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), tx->regs.v[i]);
1054                             else
1055                                 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
1056                         }
1057                     }
1058                     src = ureg_src_array_offset(tx->regs.v_consecutive, param->idx);
1059                 } else {
1060                     assert(param->idx < ARRAY_SIZE(tx->regs.v));
1061                     src = tx->regs.v[param->idx];
1062                 }
1063             }
1064         }
1065         if (param->rel)
1066             src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1067         break;
1068     case D3DSPR_PREDICATE:
1069         if (ureg_dst_is_undef(tx->regs.predicate)) {
1070             /* Forbidden to use the predicate register before being set */
1071             tx->failure = TRUE;
1072             tx->regs.predicate = ureg_DECL_temporary(tx->ureg);
1073         }
1074         src = ureg_src(tx->regs.predicate);
1075         break;
1076     case D3DSPR_SAMPLER:
1077         assert(param->mod == NINED3DSPSM_NONE);
1078         assert(param->swizzle == NINED3DSP_NOSWIZZLE);
1079         src = ureg_DECL_sampler(ureg, param->idx);
1080         break;
1081     case D3DSPR_CONST:
1082         if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
1083             src = nine_float_constant_src(tx, param->idx);
1084             if (param->rel) {
1085                 tx->indirect_const_access = TRUE;
1086                 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1087             }
1088         }
1089         if (!IS_VS && tx->version.major < 2) {
1090             /* ps 1.X clamps constants */
1091             tmp = tx_scratch(tx);
1092             ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
1093             ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
1094             src = ureg_src(tmp);
1095         }
1096         break;
1097     case D3DSPR_CONST2:
1098     case D3DSPR_CONST3:
1099     case D3DSPR_CONST4:
1100         DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
1101         assert(!"CONST2/3/4");
1102         src = ureg_imm1f(ureg, 0.0f);
1103         break;
1104     case D3DSPR_CONSTINT:
1105         /* relative adressing only possible for float constants in vs */
1106         if (!tx_lconsti(tx, &src, param->idx))
1107             src = nine_integer_constant_src(tx, param->idx);
1108         break;
1109     case D3DSPR_CONSTBOOL:
1110         if (!tx_lconstb(tx, &src, param->idx))
1111             src = nine_boolean_constant_src(tx, param->idx);
1112         break;
1113     case D3DSPR_LOOP:
1114         if (ureg_dst_is_undef(tx->regs.address))
1115             tx->regs.address = ureg_DECL_address(ureg);
1116         if (!tx->native_integers)
1117             ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
1118         else
1119             ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
1120         src = ureg_src(tx->regs.address);
1121         break;
1122     case D3DSPR_MISCTYPE:
1123         switch (param->idx) {
1124         case D3DSMO_POSITION:
1125            if (ureg_src_is_undef(tx->regs.vPos))
1126               tx->regs.vPos = nine_get_position_input(tx);
1127            if (tx->shift_wpos) {
1128                /* TODO: do this only once */
1129                struct ureg_dst wpos = tx_scratch(tx);
1130                ureg_ADD(ureg, wpos, tx->regs.vPos,
1131                         ureg_imm4f(ureg, -0.5f, -0.5f, 0.0f, 0.0f));
1132                src = ureg_src(wpos);
1133            } else {
1134                src = tx->regs.vPos;
1135            }
1136            break;
1137         case D3DSMO_FACE:
1138            if (ureg_src_is_undef(tx->regs.vFace)) {
1139                if (tx->face_is_sysval_integer) {
1140                    tmp = ureg_DECL_temporary(ureg);
1141                    tx->regs.vFace =
1142                        ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
1143 
1144                    /* convert bool to float */
1145                    ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X),
1146                              ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1));
1147                    tx->regs.vFace = ureg_src(tmp);
1148                } else {
1149                    tx->regs.vFace = ureg_DECL_fs_input(ureg,
1150                                                        TGSI_SEMANTIC_FACE, 0,
1151                                                        TGSI_INTERPOLATE_CONSTANT);
1152                }
1153                tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
1154            }
1155            src = tx->regs.vFace;
1156            break;
1157         default:
1158             assert(!"invalid src D3DSMO");
1159             break;
1160         }
1161         break;
1162     case D3DSPR_TEMPFLOAT16:
1163         break;
1164     default:
1165         assert(!"invalid src D3DSPR");
1166     }
1167 
1168     switch (param->mod) {
1169     case NINED3DSPSM_DW:
1170         tmp = tx_scratch(tx);
1171         /* NOTE: app is not allowed to read w with this modifier */
1172         ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), ureg_scalar(src, TGSI_SWIZZLE_W));
1173         ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
1174         src = ureg_src(tmp);
1175         break;
1176     case NINED3DSPSM_DZ:
1177         tmp = tx_scratch(tx);
1178         /* NOTE: app is not allowed to read z with this modifier */
1179         ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), ureg_scalar(src, TGSI_SWIZZLE_Z));
1180         ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
1181         src = ureg_src(tmp);
1182         break;
1183     default:
1184         break;
1185     }
1186 
1187     if (param->swizzle != NINED3DSP_NOSWIZZLE)
1188         src = ureg_swizzle(src,
1189                            (param->swizzle >> 0) & 0x3,
1190                            (param->swizzle >> 2) & 0x3,
1191                            (param->swizzle >> 4) & 0x3,
1192                            (param->swizzle >> 6) & 0x3);
1193 
1194     switch (param->mod) {
1195     case NINED3DSPSM_ABS:
1196         src = ureg_abs(src);
1197         break;
1198     case NINED3DSPSM_ABSNEG:
1199         src = ureg_negate(ureg_abs(src));
1200         break;
1201     case NINED3DSPSM_NEG:
1202         src = ureg_negate(src);
1203         break;
1204     case NINED3DSPSM_BIAS:
1205         tmp = tx_scratch(tx);
1206         ureg_ADD(ureg, tmp, src, ureg_imm1f(ureg, -0.5f));
1207         src = ureg_src(tmp);
1208         break;
1209     case NINED3DSPSM_BIASNEG:
1210         tmp = tx_scratch(tx);
1211         ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 0.5f), ureg_negate(src));
1212         src = ureg_src(tmp);
1213         break;
1214     case NINED3DSPSM_NOT:
1215         if (tx->native_integers && param->file == D3DSPR_CONSTBOOL) {
1216             tmp = tx_scratch(tx);
1217             ureg_NOT(ureg, tmp, src);
1218             src = ureg_src(tmp);
1219             break;
1220         } else { /* predicate */
1221             tmp = tx_scratch(tx);
1222             ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
1223             src = ureg_src(tmp);
1224         }
1225         /* fall through */
1226     case NINED3DSPSM_COMP:
1227         tmp = tx_scratch(tx);
1228         ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
1229         src = ureg_src(tmp);
1230         break;
1231     case NINED3DSPSM_DZ:
1232     case NINED3DSPSM_DW:
1233         /* Already handled*/
1234         break;
1235     case NINED3DSPSM_SIGN:
1236         tmp = tx_scratch(tx);
1237         ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1238         src = ureg_src(tmp);
1239         break;
1240     case NINED3DSPSM_SIGNNEG:
1241         tmp = tx_scratch(tx);
1242         ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
1243         src = ureg_src(tmp);
1244         break;
1245     case NINED3DSPSM_X2:
1246         tmp = tx_scratch(tx);
1247         ureg_ADD(ureg, tmp, src, src);
1248         src = ureg_src(tmp);
1249         break;
1250     case NINED3DSPSM_X2NEG:
1251         tmp = tx_scratch(tx);
1252         ureg_ADD(ureg, tmp, src, src);
1253         src = ureg_negate(ureg_src(tmp));
1254         break;
1255     default:
1256         assert(param->mod == NINED3DSPSM_NONE);
1257         break;
1258     }
1259 
1260     return src;
1261 }
1262 
1263 static struct ureg_dst
_tx_dst_param(struct shader_translator * tx,const struct sm1_dst_param * param)1264 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1265 {
1266     struct ureg_dst dst;
1267 
1268     switch (param->file)
1269     {
1270     case D3DSPR_TEMP:
1271         assert(!param->rel);
1272         tx_temp_alloc(tx, param->idx);
1273         dst = tx->regs.r[param->idx];
1274         break;
1275  /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1276     case D3DSPR_ADDR:
1277         assert(!param->rel);
1278         if (tx->version.major < 2 && !IS_VS) {
1279             if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1280                 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1281             dst = tx->regs.tS[param->idx];
1282         } else
1283         if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1284             tx_texcoord_alloc(tx, param->idx);
1285             dst = ureg_dst(tx->regs.vT[param->idx]);
1286         } else {
1287             tx_addr_alloc(tx, param->idx);
1288             dst = tx->regs.a0;
1289         }
1290         break;
1291     case D3DSPR_RASTOUT:
1292         assert(!param->rel);
1293         switch (param->idx) {
1294         case 0:
1295             if (ureg_dst_is_undef(tx->regs.oPos))
1296                 tx->regs.oPos =
1297                     ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1298             dst = tx->regs.oPos;
1299             break;
1300         case 1:
1301             if (ureg_dst_is_undef(tx->regs.oFog))
1302                 tx->regs.oFog =
1303                     ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16));
1304             dst = tx->regs.oFog;
1305             break;
1306         case 2:
1307             if (ureg_dst_is_undef(tx->regs.oPts))
1308                 tx->regs.oPts = ureg_DECL_temporary(tx->ureg);
1309             dst = tx->regs.oPts;
1310             break;
1311         default:
1312             assert(0);
1313             break;
1314         }
1315         break;
1316  /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1317     case D3DSPR_OUTPUT:
1318         if (tx->version.major < 3) {
1319             assert(!param->rel);
1320             dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1321         } else {
1322             assert(!param->rel); /* TODO */
1323             assert(param->idx < ARRAY_SIZE(tx->regs.o));
1324             dst = tx->regs.o[param->idx];
1325         }
1326         break;
1327     case D3DSPR_ATTROUT: /* VS */
1328     case D3DSPR_COLOROUT: /* PS */
1329         assert(param->idx >= 0 && param->idx < 4);
1330         assert(!param->rel);
1331         tx->info->rt_mask |= 1 << param->idx;
1332         if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
1333             /* ps < 3: oCol[0] will have fog blending afterward */
1334             if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
1335                 tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
1336             } else {
1337                 tx->regs.oCol[param->idx] =
1338                     ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1339             }
1340         }
1341         dst = tx->regs.oCol[param->idx];
1342         if (IS_VS && tx->version.major < 3)
1343             dst = ureg_saturate(dst);
1344         break;
1345     case D3DSPR_DEPTHOUT:
1346         assert(!param->rel);
1347         if (ureg_dst_is_undef(tx->regs.oDepth))
1348            tx->regs.oDepth =
1349               ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1350                                       TGSI_WRITEMASK_Z, 0, 1);
1351         dst = tx->regs.oDepth; /* XXX: must write .z component */
1352         break;
1353     case D3DSPR_PREDICATE:
1354         if (ureg_dst_is_undef(tx->regs.predicate))
1355             tx->regs.predicate = ureg_DECL_temporary(tx->ureg);
1356         dst = tx->regs.predicate;
1357         break;
1358     case D3DSPR_TEMPFLOAT16:
1359         DBG("unhandled D3DSPR: %u\n", param->file);
1360         break;
1361     default:
1362         assert(!"invalid dst D3DSPR");
1363         break;
1364     }
1365     if (param->rel)
1366         dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1367 
1368     if (param->mask != NINED3DSP_WRITEMASK_ALL)
1369         dst = ureg_writemask(dst, param->mask);
1370     if (param->mod & NINED3DSPDM_SATURATE)
1371         dst = ureg_saturate(dst);
1372 
1373     if (tx->predicated_activated) {
1374         tx->regs.predicate_dst = dst;
1375         dst = tx->regs.predicate_tmp;
1376     }
1377 
1378     return dst;
1379 }
1380 
1381 static struct ureg_dst
tx_dst_param(struct shader_translator * tx,const struct sm1_dst_param * param)1382 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1383 {
1384     if (param->shift) {
1385         tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1386         return tx->regs.tdst;
1387     }
1388     return _tx_dst_param(tx, param);
1389 }
1390 
1391 static void
tx_apply_dst0_modifiers(struct shader_translator * tx)1392 tx_apply_dst0_modifiers(struct shader_translator *tx)
1393 {
1394     struct ureg_dst rdst;
1395     float f;
1396 
1397     if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1398         return;
1399     rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1400 
1401     assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1402 
1403     if (tx->insn.dst[0].shift < 0)
1404         f = 1.0f / (1 << -tx->insn.dst[0].shift);
1405     else
1406         f = 1 << tx->insn.dst[0].shift;
1407 
1408     ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1409 }
1410 
1411 static struct ureg_src
tx_dst_param_as_src(struct shader_translator * tx,const struct sm1_dst_param * param)1412 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1413 {
1414     struct ureg_src src;
1415 
1416     assert(!param->shift);
1417     assert(!(param->mod & NINED3DSPDM_SATURATE));
1418 
1419     switch (param->file) {
1420     case D3DSPR_INPUT:
1421         if (IS_VS) {
1422             src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1423         } else {
1424             assert(!param->rel);
1425             assert(param->idx < ARRAY_SIZE(tx->regs.v));
1426             src = tx->regs.v[param->idx];
1427         }
1428         break;
1429     default:
1430         src = ureg_src(tx_dst_param(tx, param));
1431         break;
1432     }
1433     if (param->rel)
1434         src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1435 
1436     if (!param->mask)
1437         WARN("mask is 0, using identity swizzle\n");
1438 
1439     if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1440         char s[4];
1441         int n;
1442         int c;
1443         for (n = 0, c = 0; c < 4; ++c)
1444             if (param->mask & (1 << c))
1445                 s[n++] = c;
1446         assert(n);
1447         for (c = n; c < 4; ++c)
1448             s[c] = s[n - 1];
1449         src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1450     }
1451     return src;
1452 }
1453 
1454 static HRESULT
NineTranslateInstruction_Mkxn(struct shader_translator * tx,const unsigned k,const unsigned n)1455 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1456 {
1457     struct ureg_program *ureg = tx->ureg;
1458     struct ureg_dst dst;
1459     struct ureg_src src[2];
1460     struct sm1_src_param *src_mat = &tx->insn.src[1];
1461     unsigned i;
1462 
1463     dst = tx_dst_param(tx, &tx->insn.dst[0]);
1464     src[0] = tx_src_param(tx, &tx->insn.src[0]);
1465 
1466     for (i = 0; i < n; i++)
1467     {
1468         const unsigned m = (1 << i);
1469 
1470         src[1] = tx_src_param(tx, src_mat);
1471         src_mat->idx++;
1472 
1473         if (!(dst.WriteMask & m))
1474             continue;
1475 
1476         /* XXX: src == dst case ? */
1477 
1478         switch (k) {
1479         case 3:
1480             ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1481             break;
1482         case 4:
1483             ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1484             break;
1485         default:
1486             DBG("invalid operation: M%ux%u\n", m, n);
1487             break;
1488         }
1489     }
1490 
1491     return D3D_OK;
1492 }
1493 
1494 #define VNOTSUPPORTED   0, 0
1495 #define V(maj, min)     (((maj) << 8) | (min))
1496 
1497 static inline const char *
d3dsio_to_string(unsigned opcode)1498 d3dsio_to_string( unsigned opcode )
1499 {
1500     static const char *names[] = {
1501         "NOP",
1502         "MOV",
1503         "ADD",
1504         "SUB",
1505         "MAD",
1506         "MUL",
1507         "RCP",
1508         "RSQ",
1509         "DP3",
1510         "DP4",
1511         "MIN",
1512         "MAX",
1513         "SLT",
1514         "SGE",
1515         "EXP",
1516         "LOG",
1517         "LIT",
1518         "DST",
1519         "LRP",
1520         "FRC",
1521         "M4x4",
1522         "M4x3",
1523         "M3x4",
1524         "M3x3",
1525         "M3x2",
1526         "CALL",
1527         "CALLNZ",
1528         "LOOP",
1529         "RET",
1530         "ENDLOOP",
1531         "LABEL",
1532         "DCL",
1533         "POW",
1534         "CRS",
1535         "SGN",
1536         "ABS",
1537         "NRM",
1538         "SINCOS",
1539         "REP",
1540         "ENDREP",
1541         "IF",
1542         "IFC",
1543         "ELSE",
1544         "ENDIF",
1545         "BREAK",
1546         "BREAKC",
1547         "MOVA",
1548         "DEFB",
1549         "DEFI",
1550         NULL,
1551         NULL,
1552         NULL,
1553         NULL,
1554         NULL,
1555         NULL,
1556         NULL,
1557         NULL,
1558         NULL,
1559         NULL,
1560         NULL,
1561         NULL,
1562         NULL,
1563         NULL,
1564         NULL,
1565         "TEXCOORD",
1566         "TEXKILL",
1567         "TEX",
1568         "TEXBEM",
1569         "TEXBEML",
1570         "TEXREG2AR",
1571         "TEXREG2GB",
1572         "TEXM3x2PAD",
1573         "TEXM3x2TEX",
1574         "TEXM3x3PAD",
1575         "TEXM3x3TEX",
1576         NULL,
1577         "TEXM3x3SPEC",
1578         "TEXM3x3VSPEC",
1579         "EXPP",
1580         "LOGP",
1581         "CND",
1582         "DEF",
1583         "TEXREG2RGB",
1584         "TEXDP3TEX",
1585         "TEXM3x2DEPTH",
1586         "TEXDP3",
1587         "TEXM3x3",
1588         "TEXDEPTH",
1589         "CMP",
1590         "BEM",
1591         "DP2ADD",
1592         "DSX",
1593         "DSY",
1594         "TEXLDD",
1595         "SETP",
1596         "TEXLDL",
1597         "BREAKP"
1598     };
1599 
1600     if (opcode < ARRAY_SIZE(names)) return names[opcode];
1601 
1602     switch (opcode) {
1603     case D3DSIO_PHASE: return "PHASE";
1604     case D3DSIO_COMMENT: return "COMMENT";
1605     case D3DSIO_END: return "END";
1606     default:
1607         return NULL;
1608     }
1609 }
1610 
1611 #define NULL_INSTRUCTION            { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1612 #define IS_VALID_INSTRUCTION(inst)  ((inst).vert_version.min | \
1613                                      (inst).vert_version.max | \
1614                                      (inst).frag_version.min | \
1615                                      (inst).frag_version.max)
1616 
1617 #define SPECIAL(name) \
1618     NineTranslateInstruction_##name
1619 
1620 #define DECL_SPECIAL(name) \
1621     static HRESULT \
1622     NineTranslateInstruction_##name( struct shader_translator *tx )
1623 
1624 static HRESULT
1625 NineTranslateInstruction_Generic(struct shader_translator *);
1626 
DECL_SPECIAL(NOP)1627 DECL_SPECIAL(NOP)
1628 {
1629     /* Nothing to do. NOP was used to avoid hangs
1630      * with very old d3d drivers. */
1631     return D3D_OK;
1632 }
1633 
DECL_SPECIAL(SUB)1634 DECL_SPECIAL(SUB)
1635 {
1636     struct ureg_program *ureg = tx->ureg;
1637     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1638     struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1639     struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1640 
1641     ureg_ADD(ureg, dst, src0, ureg_negate(src1));
1642     return D3D_OK;
1643 }
1644 
DECL_SPECIAL(ABS)1645 DECL_SPECIAL(ABS)
1646 {
1647     struct ureg_program *ureg = tx->ureg;
1648     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1649     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1650 
1651     ureg_MOV(ureg, dst, ureg_abs(src));
1652     return D3D_OK;
1653 }
1654 
DECL_SPECIAL(XPD)1655 DECL_SPECIAL(XPD)
1656 {
1657     struct ureg_program *ureg = tx->ureg;
1658     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1659     struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1660     struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1661 
1662     ureg_MUL(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
1663              ureg_swizzle(src0, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
1664                           TGSI_SWIZZLE_X, 0),
1665              ureg_swizzle(src1, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
1666                           TGSI_SWIZZLE_Y, 0));
1667     ureg_MAD(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
1668              ureg_swizzle(src0, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
1669                           TGSI_SWIZZLE_Y, 0),
1670              ureg_negate(ureg_swizzle(src1, TGSI_SWIZZLE_Y,
1671                                       TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)),
1672              ureg_src(dst));
1673     ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
1674              ureg_imm1f(ureg, 1));
1675     return D3D_OK;
1676 }
1677 
DECL_SPECIAL(M4x4)1678 DECL_SPECIAL(M4x4)
1679 {
1680     return NineTranslateInstruction_Mkxn(tx, 4, 4);
1681 }
1682 
DECL_SPECIAL(M4x3)1683 DECL_SPECIAL(M4x3)
1684 {
1685     return NineTranslateInstruction_Mkxn(tx, 4, 3);
1686 }
1687 
DECL_SPECIAL(M3x4)1688 DECL_SPECIAL(M3x4)
1689 {
1690     return NineTranslateInstruction_Mkxn(tx, 3, 4);
1691 }
1692 
DECL_SPECIAL(M3x3)1693 DECL_SPECIAL(M3x3)
1694 {
1695     return NineTranslateInstruction_Mkxn(tx, 3, 3);
1696 }
1697 
DECL_SPECIAL(M3x2)1698 DECL_SPECIAL(M3x2)
1699 {
1700     return NineTranslateInstruction_Mkxn(tx, 3, 2);
1701 }
1702 
DECL_SPECIAL(CMP)1703 DECL_SPECIAL(CMP)
1704 {
1705     ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1706              tx_src_param(tx, &tx->insn.src[0]),
1707              tx_src_param(tx, &tx->insn.src[2]),
1708              tx_src_param(tx, &tx->insn.src[1]));
1709     return D3D_OK;
1710 }
1711 
DECL_SPECIAL(CND)1712 DECL_SPECIAL(CND)
1713 {
1714     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1715     struct ureg_dst cgt;
1716     struct ureg_src cnd;
1717 
1718     /* the coissue flag was a tip for compilers to advise to
1719      * execute two operations at the same time, in cases
1720      * the two executions had same dst with different channels.
1721      * It has no effect on current hw. However it seems CND
1722      * is affected. The handling of this very specific case
1723      * handled below mimick wine behaviour */
1724     if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1725         ureg_MOV(tx->ureg,
1726                  dst, tx_src_param(tx, &tx->insn.src[1]));
1727         return D3D_OK;
1728     }
1729 
1730     cnd = tx_src_param(tx, &tx->insn.src[0]);
1731     cgt = tx_scratch(tx);
1732 
1733     if (tx->version.major == 1 && tx->version.minor < 4)
1734         cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1735 
1736     ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1737 
1738     ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1739              tx_src_param(tx, &tx->insn.src[1]),
1740              tx_src_param(tx, &tx->insn.src[2]));
1741     return D3D_OK;
1742 }
1743 
DECL_SPECIAL(CALL)1744 DECL_SPECIAL(CALL)
1745 {
1746     assert(tx->insn.src[0].idx < tx->num_inst_labels);
1747     ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1748     return D3D_OK;
1749 }
1750 
DECL_SPECIAL(CALLNZ)1751 DECL_SPECIAL(CALLNZ)
1752 {
1753     struct ureg_program *ureg = tx->ureg;
1754     struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1755 
1756     if (!tx->native_integers)
1757         ureg_IF(ureg, src, tx_cond(tx));
1758     else
1759         ureg_UIF(ureg, src, tx_cond(tx));
1760     ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1761     tx_endcond(tx);
1762     ureg_ENDIF(ureg);
1763     return D3D_OK;
1764 }
1765 
DECL_SPECIAL(LOOP)1766 DECL_SPECIAL(LOOP)
1767 {
1768     struct ureg_program *ureg = tx->ureg;
1769     unsigned *label;
1770     struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1771     struct ureg_dst ctr;
1772     struct ureg_dst tmp;
1773     struct ureg_src ctrx;
1774 
1775     label = tx_bgnloop(tx);
1776     ctr = tx_get_loopctr(tx, TRUE);
1777     ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1778 
1779     /* src: num_iterations - start_value of al - step for al - 0 */
1780     ureg_MOV(ureg, ctr, src);
1781     ureg_BGNLOOP(tx->ureg, label);
1782     tmp = tx_scratch_scalar(tx);
1783     /* Initially ctr.x contains the number of iterations.
1784      * ctr.y will contain the updated value of al.
1785      * We decrease ctr.x at the end of every iteration,
1786      * and stop when it reaches 0. */
1787 
1788     if (!tx->native_integers) {
1789         /* case src and ctr contain floats */
1790         /* to avoid precision issue, we stop when ctr <= 0.5 */
1791         ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1792         ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1793     } else {
1794         /* case src and ctr contain integers */
1795         ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1796         ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1797     }
1798     ureg_BRK(ureg);
1799     tx_endcond(tx);
1800     ureg_ENDIF(ureg);
1801     return D3D_OK;
1802 }
1803 
DECL_SPECIAL(RET)1804 DECL_SPECIAL(RET)
1805 {
1806     /* RET as a last instruction could be safely ignored.
1807      * Remove it to prevent crashes/warnings in case underlying
1808      * driver doesn't implement arbitrary returns.
1809      */
1810     if (*(tx->parse_next) != NINED3DSP_END) {
1811         ureg_RET(tx->ureg);
1812     }
1813     return D3D_OK;
1814 }
1815 
DECL_SPECIAL(ENDLOOP)1816 DECL_SPECIAL(ENDLOOP)
1817 {
1818     struct ureg_program *ureg = tx->ureg;
1819     struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
1820     struct ureg_dst dst_ctrx, dst_al;
1821     struct ureg_src src_ctr, al_counter;
1822 
1823     dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1824     dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
1825     src_ctr = ureg_src(ctr);
1826     al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
1827 
1828     /* ctr.x -= 1
1829      * ctr.y (aL) += step */
1830     if (!tx->native_integers) {
1831         ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1832         ureg_ADD(ureg, dst_al, src_ctr, al_counter);
1833     } else {
1834         ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1835         ureg_UADD(ureg, dst_al, src_ctr, al_counter);
1836     }
1837     ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1838     return D3D_OK;
1839 }
1840 
DECL_SPECIAL(LABEL)1841 DECL_SPECIAL(LABEL)
1842 {
1843     unsigned k = tx->num_inst_labels;
1844     unsigned n = tx->insn.src[0].idx;
1845     assert(n < 2048);
1846     if (n >= k)
1847        tx->inst_labels = REALLOC(tx->inst_labels,
1848                                  k * sizeof(tx->inst_labels[0]),
1849                                  n * sizeof(tx->inst_labels[0]));
1850 
1851     tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1852     return D3D_OK;
1853 }
1854 
DECL_SPECIAL(SINCOS)1855 DECL_SPECIAL(SINCOS)
1856 {
1857     struct ureg_program *ureg = tx->ureg;
1858     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1859     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1860     struct ureg_dst tmp = tx_scratch_scalar(tx);
1861 
1862     assert(!(dst.WriteMask & 0xc));
1863 
1864     /* Copying to a temporary register avoids src/dst aliasing.
1865      * src is supposed to have replicated swizzle. */
1866     ureg_MOV(ureg, tmp, src);
1867 
1868     /* z undefined, w untouched */
1869     ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
1870              tx_src_scalar(tmp));
1871     ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
1872              tx_src_scalar(tmp));
1873     return D3D_OK;
1874 }
1875 
DECL_SPECIAL(SGN)1876 DECL_SPECIAL(SGN)
1877 {
1878     ureg_SSG(tx->ureg,
1879              tx_dst_param(tx, &tx->insn.dst[0]),
1880              tx_src_param(tx, &tx->insn.src[0]));
1881     return D3D_OK;
1882 }
1883 
DECL_SPECIAL(REP)1884 DECL_SPECIAL(REP)
1885 {
1886     struct ureg_program *ureg = tx->ureg;
1887     unsigned *label;
1888     struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1889     struct ureg_dst ctr;
1890     struct ureg_dst tmp;
1891     struct ureg_src ctrx;
1892 
1893     label = tx_bgnloop(tx);
1894     ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
1895     ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1896 
1897     /* NOTE: rep must be constant, so we don't have to save the count */
1898     assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1899 
1900     /* rep: num_iterations - 0 - 0 - 0 */
1901     ureg_MOV(ureg, ctr, rep);
1902     ureg_BGNLOOP(ureg, label);
1903     tmp = tx_scratch_scalar(tx);
1904     /* Initially ctr.x contains the number of iterations.
1905      * We decrease ctr.x at the end of every iteration,
1906      * and stop when it reaches 0. */
1907 
1908     if (!tx->native_integers) {
1909         /* case src and ctr contain floats */
1910         /* to avoid precision issue, we stop when ctr <= 0.5 */
1911         ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1912         ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1913     } else {
1914         /* case src and ctr contain integers */
1915         ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1916         ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1917     }
1918     ureg_BRK(ureg);
1919     tx_endcond(tx);
1920     ureg_ENDIF(ureg);
1921 
1922     return D3D_OK;
1923 }
1924 
DECL_SPECIAL(ENDREP)1925 DECL_SPECIAL(ENDREP)
1926 {
1927     struct ureg_program *ureg = tx->ureg;
1928     struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
1929     struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1930     struct ureg_src src_ctr = ureg_src(ctr);
1931 
1932     /* ctr.x -= 1 */
1933     if (!tx->native_integers)
1934         ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1935     else
1936         ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1937 
1938     ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1939     return D3D_OK;
1940 }
1941 
DECL_SPECIAL(ENDIF)1942 DECL_SPECIAL(ENDIF)
1943 {
1944     tx_endcond(tx);
1945     ureg_ENDIF(tx->ureg);
1946     return D3D_OK;
1947 }
1948 
DECL_SPECIAL(IF)1949 DECL_SPECIAL(IF)
1950 {
1951     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1952 
1953     if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1954         ureg_UIF(tx->ureg, src, tx_cond(tx));
1955     else
1956         ureg_IF(tx->ureg, src, tx_cond(tx));
1957 
1958     return D3D_OK;
1959 }
1960 
1961 static inline unsigned
sm1_insn_flags_to_tgsi_setop(BYTE flags)1962 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1963 {
1964     switch (flags) {
1965     case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1966     case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1967     case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1968     case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1969     case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1970     case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1971     default:
1972         assert(!"invalid comparison flags");
1973         return TGSI_OPCODE_SGT;
1974     }
1975 }
1976 
DECL_SPECIAL(IFC)1977 DECL_SPECIAL(IFC)
1978 {
1979     const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1980     struct ureg_src src[2];
1981     struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1982     src[0] = tx_src_param(tx, &tx->insn.src[0]);
1983     src[1] = tx_src_param(tx, &tx->insn.src[1]);
1984     ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
1985     ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1986     return D3D_OK;
1987 }
1988 
DECL_SPECIAL(ELSE)1989 DECL_SPECIAL(ELSE)
1990 {
1991     ureg_ELSE(tx->ureg, tx_elsecond(tx));
1992     return D3D_OK;
1993 }
1994 
DECL_SPECIAL(BREAKC)1995 DECL_SPECIAL(BREAKC)
1996 {
1997     const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1998     struct ureg_src src[2];
1999     struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
2000     src[0] = tx_src_param(tx, &tx->insn.src[0]);
2001     src[1] = tx_src_param(tx, &tx->insn.src[1]);
2002     ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
2003     ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
2004     ureg_BRK(tx->ureg);
2005     tx_endcond(tx);
2006     ureg_ENDIF(tx->ureg);
2007     return D3D_OK;
2008 }
2009 
2010 static const char *sm1_declusage_names[] =
2011 {
2012     [D3DDECLUSAGE_POSITION] = "POSITION",
2013     [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
2014     [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
2015     [D3DDECLUSAGE_NORMAL] = "NORMAL",
2016     [D3DDECLUSAGE_PSIZE] = "PSIZE",
2017     [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
2018     [D3DDECLUSAGE_TANGENT] = "TANGENT",
2019     [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
2020     [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
2021     [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
2022     [D3DDECLUSAGE_COLOR] = "COLOR",
2023     [D3DDECLUSAGE_FOG] = "FOG",
2024     [D3DDECLUSAGE_DEPTH] = "DEPTH",
2025     [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
2026 };
2027 
2028 static inline unsigned
sm1_to_nine_declusage(struct sm1_semantic * dcl)2029 sm1_to_nine_declusage(struct sm1_semantic *dcl)
2030 {
2031     return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
2032 }
2033 
2034 static void
sm1_declusage_to_tgsi(struct tgsi_declaration_semantic * sem,boolean tc,struct sm1_semantic * dcl)2035 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
2036                       boolean tc,
2037                       struct sm1_semantic *dcl)
2038 {
2039     BYTE index = dcl->usage_idx;
2040 
2041     /* For everything that is not matching to a TGSI_SEMANTIC_****,
2042      * we match to a TGSI_SEMANTIC_GENERIC with index.
2043      *
2044      * The index can be anything UINT16 and usage_idx is BYTE,
2045      * so we can fit everything. It doesn't matter if indices
2046      * are close together or low.
2047      *
2048      *
2049      * POSITION >= 1: 10 * index + 7
2050      * COLOR >= 2: 10 * (index-1) + 8
2051      * FOG: 16
2052      * TEXCOORD[0..15]: index
2053      * BLENDWEIGHT: 10 * index + 19
2054      * BLENDINDICES: 10 * index + 20
2055      * NORMAL: 10 * index + 21
2056      * TANGENT: 10 * index + 22
2057      * BINORMAL: 10 * index + 23
2058      * TESSFACTOR: 10 * index + 24
2059      */
2060 
2061     switch (dcl->usage) {
2062     case D3DDECLUSAGE_POSITION:
2063     case D3DDECLUSAGE_POSITIONT:
2064     case D3DDECLUSAGE_DEPTH:
2065         if (index == 0) {
2066             sem->Name = TGSI_SEMANTIC_POSITION;
2067             sem->Index = 0;
2068         } else {
2069             sem->Name = TGSI_SEMANTIC_GENERIC;
2070             sem->Index = 10 * index + 7;
2071         }
2072         break;
2073     case D3DDECLUSAGE_COLOR:
2074         if (index < 2) {
2075             sem->Name = TGSI_SEMANTIC_COLOR;
2076             sem->Index = index;
2077         } else {
2078             sem->Name = TGSI_SEMANTIC_GENERIC;
2079             sem->Index = 10 * (index-1) + 8;
2080         }
2081         break;
2082     case D3DDECLUSAGE_FOG:
2083         assert(index == 0);
2084         sem->Name = TGSI_SEMANTIC_GENERIC;
2085         sem->Index = 16;
2086         break;
2087     case D3DDECLUSAGE_PSIZE:
2088         assert(index == 0);
2089         sem->Name = TGSI_SEMANTIC_PSIZE;
2090         sem->Index = 0;
2091         break;
2092     case D3DDECLUSAGE_TEXCOORD:
2093         assert(index < 16);
2094         if (index < 8 && tc)
2095             sem->Name = TGSI_SEMANTIC_TEXCOORD;
2096         else
2097             sem->Name = TGSI_SEMANTIC_GENERIC;
2098         sem->Index = index;
2099         break;
2100     case D3DDECLUSAGE_BLENDWEIGHT:
2101         sem->Name = TGSI_SEMANTIC_GENERIC;
2102         sem->Index = 10 * index + 19;
2103         break;
2104     case D3DDECLUSAGE_BLENDINDICES:
2105         sem->Name = TGSI_SEMANTIC_GENERIC;
2106         sem->Index = 10 * index + 20;
2107         break;
2108     case D3DDECLUSAGE_NORMAL:
2109         sem->Name = TGSI_SEMANTIC_GENERIC;
2110         sem->Index = 10 * index + 21;
2111         break;
2112     case D3DDECLUSAGE_TANGENT:
2113         sem->Name = TGSI_SEMANTIC_GENERIC;
2114         sem->Index = 10 * index + 22;
2115         break;
2116     case D3DDECLUSAGE_BINORMAL:
2117         sem->Name = TGSI_SEMANTIC_GENERIC;
2118         sem->Index = 10 * index + 23;
2119         break;
2120     case D3DDECLUSAGE_TESSFACTOR:
2121         sem->Name = TGSI_SEMANTIC_GENERIC;
2122         sem->Index = 10 * index + 24;
2123         break;
2124     case D3DDECLUSAGE_SAMPLE:
2125         sem->Name = TGSI_SEMANTIC_COUNT;
2126         sem->Index = 0;
2127         break;
2128     default:
2129         unreachable("Invalid DECLUSAGE.");
2130         break;
2131     }
2132 }
2133 
2134 #define NINED3DSTT_1D     (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
2135 #define NINED3DSTT_2D     (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
2136 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
2137 #define NINED3DSTT_CUBE   (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
2138 static inline unsigned
d3dstt_to_tgsi_tex(BYTE sampler_type)2139 d3dstt_to_tgsi_tex(BYTE sampler_type)
2140 {
2141     switch (sampler_type) {
2142     case NINED3DSTT_1D:     return TGSI_TEXTURE_1D;
2143     case NINED3DSTT_2D:     return TGSI_TEXTURE_2D;
2144     case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
2145     case NINED3DSTT_CUBE:   return TGSI_TEXTURE_CUBE;
2146     default:
2147         assert(0);
2148         return TGSI_TEXTURE_UNKNOWN;
2149     }
2150 }
2151 static inline unsigned
d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)2152 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
2153 {
2154     switch (sampler_type) {
2155     case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
2156     case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
2157     case NINED3DSTT_VOLUME:
2158     case NINED3DSTT_CUBE:
2159     default:
2160         assert(0);
2161         return TGSI_TEXTURE_UNKNOWN;
2162     }
2163 }
2164 static inline unsigned
ps1x_sampler_type(const struct nine_shader_info * info,unsigned stage)2165 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
2166 {
2167     boolean shadow = !!(info->sampler_mask_shadow & (1 << stage));
2168     switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
2169     case 1: return shadow ? TGSI_TEXTURE_SHADOW1D : TGSI_TEXTURE_1D;
2170     case 0: return shadow ? TGSI_TEXTURE_SHADOW2D : TGSI_TEXTURE_2D;
2171     case 3: return TGSI_TEXTURE_3D;
2172     default:
2173         return TGSI_TEXTURE_CUBE;
2174     }
2175 }
2176 
2177 static const char *
sm1_sampler_type_name(BYTE sampler_type)2178 sm1_sampler_type_name(BYTE sampler_type)
2179 {
2180     switch (sampler_type) {
2181     case NINED3DSTT_1D:     return "1D";
2182     case NINED3DSTT_2D:     return "2D";
2183     case NINED3DSTT_VOLUME: return "VOLUME";
2184     case NINED3DSTT_CUBE:   return "CUBE";
2185     default:
2186         return "(D3DSTT_?)";
2187     }
2188 }
2189 
2190 static inline unsigned
nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic * sem)2191 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
2192 {
2193     switch (sem->Name) {
2194     case TGSI_SEMANTIC_POSITION:
2195     case TGSI_SEMANTIC_NORMAL:
2196         return TGSI_INTERPOLATE_LINEAR;
2197     case TGSI_SEMANTIC_BCOLOR:
2198     case TGSI_SEMANTIC_COLOR:
2199         return TGSI_INTERPOLATE_COLOR;
2200     case TGSI_SEMANTIC_FOG:
2201     case TGSI_SEMANTIC_GENERIC:
2202     case TGSI_SEMANTIC_TEXCOORD:
2203     case TGSI_SEMANTIC_CLIPDIST:
2204     case TGSI_SEMANTIC_CLIPVERTEX:
2205         return TGSI_INTERPOLATE_PERSPECTIVE;
2206     case TGSI_SEMANTIC_EDGEFLAG:
2207     case TGSI_SEMANTIC_FACE:
2208     case TGSI_SEMANTIC_INSTANCEID:
2209     case TGSI_SEMANTIC_PCOORD:
2210     case TGSI_SEMANTIC_PRIMID:
2211     case TGSI_SEMANTIC_PSIZE:
2212     case TGSI_SEMANTIC_VERTEXID:
2213         return TGSI_INTERPOLATE_CONSTANT;
2214     default:
2215         assert(0);
2216         return TGSI_INTERPOLATE_CONSTANT;
2217     }
2218 }
2219 
DECL_SPECIAL(DCL)2220 DECL_SPECIAL(DCL)
2221 {
2222     struct ureg_program *ureg = tx->ureg;
2223     boolean is_input;
2224     boolean is_sampler;
2225     struct tgsi_declaration_semantic tgsi;
2226     struct sm1_semantic sem;
2227     sm1_read_semantic(tx, &sem);
2228 
2229     is_input = sem.reg.file == D3DSPR_INPUT;
2230     is_sampler =
2231         sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
2232 
2233     DUMP("DCL ");
2234     sm1_dump_dst_param(&sem.reg);
2235     if (is_sampler)
2236         DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
2237     else
2238     if (tx->version.major >= 3)
2239         DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
2240     else
2241     if (sem.usage | sem.usage_idx)
2242         DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
2243     else
2244         DUMP("\n");
2245 
2246     if (is_sampler) {
2247         const unsigned m = 1 << sem.reg.idx;
2248         ureg_DECL_sampler(ureg, sem.reg.idx);
2249         tx->info->sampler_mask |= m;
2250         tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
2251             d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
2252             d3dstt_to_tgsi_tex(sem.sampler_type);
2253         return D3D_OK;
2254     }
2255 
2256     sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
2257     if (IS_VS) {
2258         if (is_input) {
2259             /* linkage outside of shader with vertex declaration */
2260             ureg_DECL_vs_input(ureg, sem.reg.idx);
2261             assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map));
2262             tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
2263             tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1);
2264             /* NOTE: preserving order in case of indirect access */
2265         } else
2266         if (tx->version.major >= 3) {
2267             /* SM2 output semantic determined by file */
2268             assert(sem.reg.mask != 0);
2269             if (sem.usage == D3DDECLUSAGE_POSITIONT)
2270                 tx->info->position_t = TRUE;
2271             assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o));
2272             assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing");
2273             tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
2274                 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
2275             nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx);
2276             if (tx->info->process_vertices && sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) {
2277                 tx->regs.oPos_out = tx->regs.o[sem.reg.idx];
2278                 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2279                 tx->regs.oPos = tx->regs.o[sem.reg.idx];
2280             }
2281 
2282             if (tgsi.Name == TGSI_SEMANTIC_PSIZE) {
2283                 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2284                 tx->regs.oPts = tx->regs.o[sem.reg.idx];
2285             }
2286         }
2287     } else {
2288         if (is_input && tx->version.major >= 3) {
2289             unsigned interp_location = 0;
2290             /* SM3 only, SM2 input semantic determined by file */
2291             assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v));
2292             assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing");
2293             /* PositionT and tessfactor forbidden */
2294             if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR)
2295                 return D3DERR_INVALIDCALL;
2296 
2297             if (tgsi.Name == TGSI_SEMANTIC_POSITION) {
2298                 /* Position0 is forbidden (likely because vPos already does that) */
2299                 if (sem.usage == D3DDECLUSAGE_POSITION)
2300                     return D3DERR_INVALIDCALL;
2301                 /* Following code is for depth */
2302                 tx->regs.v[sem.reg.idx] = nine_get_position_input(tx);
2303                 return D3D_OK;
2304             }
2305 
2306             if (sem.reg.mod & NINED3DSPDM_CENTROID ||
2307                 (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
2308                 interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
2309 
2310             tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
2311                 ureg, tgsi.Name, tgsi.Index,
2312                 nine_tgsi_to_interp_mode(&tgsi),
2313                 0, /* cylwrap */
2314                 interp_location, 0, 1);
2315         } else
2316         if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
2317             /* FragColor or FragDepth */
2318             assert(sem.reg.mask != 0);
2319             ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
2320                                     0, 1);
2321         }
2322     }
2323     return D3D_OK;
2324 }
2325 
DECL_SPECIAL(DEF)2326 DECL_SPECIAL(DEF)
2327 {
2328     tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
2329     return D3D_OK;
2330 }
2331 
DECL_SPECIAL(DEFB)2332 DECL_SPECIAL(DEFB)
2333 {
2334     tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2335     return D3D_OK;
2336 }
2337 
DECL_SPECIAL(DEFI)2338 DECL_SPECIAL(DEFI)
2339 {
2340     tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2341     return D3D_OK;
2342 }
2343 
DECL_SPECIAL(POW)2344 DECL_SPECIAL(POW)
2345 {
2346     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2347     struct ureg_src src[2] = {
2348         tx_src_param(tx, &tx->insn.src[0]),
2349         tx_src_param(tx, &tx->insn.src[1])
2350     };
2351     ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2352     return D3D_OK;
2353 }
2354 
2355 /* Tests results on Win 10:
2356  * NV (NVIDIA GeForce GT 635M)
2357  * AMD (AMD Radeon HD 7730M)
2358  * INTEL (Intel(R) HD Graphics 4000)
2359  * PS2 and PS3:
2360  * RCP and RSQ can generate inf on NV and AMD.
2361  * RCP and RSQ are clamped on INTEL (+- FLT_MAX),
2362  * NV: log not clamped
2363  * AMD: log(0) is -FLT_MAX (but log(inf) is inf)
2364  * INTEL: log(0) is -FLT_MAX and log(inf) is 127
2365  * All devices have 0*anything = 0
2366  *
2367  * INTEL VS2 and VS3: same behaviour.
2368  * Some differences VS2 and VS3 for constants defined with inf/NaN.
2369  * While PS3, VS3 and PS2 keep NaN and Inf shader constants without change,
2370  * VS2 seems to clamp to zero (may be test failure).
2371  * AMD VS2: unknown, VS3: very likely behaviour of PS3
2372  * NV VS2 and VS3: very likely behaviour of PS3
2373  * For both, Inf in VS becomes NaN is PS
2374  * "Very likely" because the test was less extensive.
2375  *
2376  * Thus all clamping can be removed for shaders 2 and 3,
2377  * as long as 0*anything = 0.
2378  * Else clamps to enforce 0*anything = 0 (anything being then
2379  * neither inf or NaN, the user being unlikely to pass them
2380  * as constant).
2381  * The status for VS1 and PS1 is unknown.
2382  */
2383 
DECL_SPECIAL(RCP)2384 DECL_SPECIAL(RCP)
2385 {
2386     struct ureg_program *ureg = tx->ureg;
2387     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2388     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2389     struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx);
2390     ureg_RCP(ureg, tmp, src);
2391     if (!tx->mul_zero_wins) {
2392         /* FLT_MAX has issues with Rayman */
2393         ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX/2.f), ureg_src(tmp));
2394         ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX/2.f), ureg_src(tmp));
2395     }
2396     return D3D_OK;
2397 }
2398 
DECL_SPECIAL(RSQ)2399 DECL_SPECIAL(RSQ)
2400 {
2401     struct ureg_program *ureg = tx->ureg;
2402     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2403     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2404     struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx);
2405     ureg_RSQ(ureg, tmp, ureg_abs(src));
2406     if (!tx->mul_zero_wins)
2407         ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2408     return D3D_OK;
2409 }
2410 
DECL_SPECIAL(LOG)2411 DECL_SPECIAL(LOG)
2412 {
2413     struct ureg_program *ureg = tx->ureg;
2414     struct ureg_dst tmp = tx_scratch_scalar(tx);
2415     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2416     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2417     ureg_LG2(ureg, tmp, ureg_abs(src));
2418     if (tx->mul_zero_wins) {
2419         ureg_MOV(ureg, dst, tx_src_scalar(tmp));
2420     } else {
2421         ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2422     }
2423     return D3D_OK;
2424 }
2425 
DECL_SPECIAL(LIT)2426 DECL_SPECIAL(LIT)
2427 {
2428     struct ureg_program *ureg = tx->ureg;
2429     struct ureg_dst tmp = tx_scratch(tx);
2430     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2431     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2432     ureg_LIT(ureg, tmp, src);
2433     /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2434      * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2435      * it 0^0 if src.w=0, which value is driver dependent. */
2436     ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
2437              ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
2438              ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
2439     ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
2440     return D3D_OK;
2441 }
2442 
DECL_SPECIAL(NRM)2443 DECL_SPECIAL(NRM)
2444 {
2445     struct ureg_program *ureg = tx->ureg;
2446     struct ureg_dst tmp = tx_scratch_scalar(tx);
2447     struct ureg_src nrm = tx_src_scalar(tmp);
2448     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2449     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2450     ureg_DP3(ureg, tmp, src, src);
2451     ureg_RSQ(ureg, tmp, nrm);
2452     if (!tx->mul_zero_wins)
2453         ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2454     ureg_MUL(ureg, dst, src, nrm);
2455     return D3D_OK;
2456 }
2457 
DECL_SPECIAL(DP2ADD)2458 DECL_SPECIAL(DP2ADD)
2459 {
2460     struct ureg_dst tmp = tx_scratch_scalar(tx);
2461     struct ureg_src dp2 = tx_src_scalar(tmp);
2462     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2463     struct ureg_src src[3];
2464     int i;
2465     for (i = 0; i < 3; ++i)
2466         src[i] = tx_src_param(tx, &tx->insn.src[i]);
2467     assert_replicate_swizzle(&src[2]);
2468 
2469     ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2470     ureg_ADD(tx->ureg, dst, src[2], dp2);
2471 
2472     return D3D_OK;
2473 }
2474 
DECL_SPECIAL(TEXCOORD)2475 DECL_SPECIAL(TEXCOORD)
2476 {
2477     struct ureg_program *ureg = tx->ureg;
2478     const unsigned s = tx->insn.dst[0].idx;
2479     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2480 
2481     tx_texcoord_alloc(tx, s);
2482     ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2483     ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2484 
2485     return D3D_OK;
2486 }
2487 
DECL_SPECIAL(TEXCOORD_ps14)2488 DECL_SPECIAL(TEXCOORD_ps14)
2489 {
2490     struct ureg_program *ureg = tx->ureg;
2491     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2492     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2493 
2494     assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2495 
2496     ureg_MOV(ureg, dst, src);
2497 
2498     return D3D_OK;
2499 }
2500 
DECL_SPECIAL(TEXKILL)2501 DECL_SPECIAL(TEXKILL)
2502 {
2503     struct ureg_src reg;
2504 
2505     if (tx->version.major > 1 || tx->version.minor > 3) {
2506         reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2507     } else {
2508         tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2509         reg = tx->regs.vT[tx->insn.dst[0].idx];
2510     }
2511     if (tx->version.major < 2)
2512         reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2513     ureg_KILL_IF(tx->ureg, reg);
2514 
2515     return D3D_OK;
2516 }
2517 
DECL_SPECIAL(TEXBEM)2518 DECL_SPECIAL(TEXBEM)
2519 {
2520     struct ureg_program *ureg = tx->ureg;
2521     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2522     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2523     struct ureg_dst tmp, tmp2, texcoord;
2524     struct ureg_src sample, m00, m01, m10, m11, c8m, c16m2;
2525     struct ureg_src bumpenvlscale, bumpenvloffset;
2526     const int m = tx->insn.dst[0].idx;
2527 
2528     assert(tx->version.major == 1);
2529 
2530     sample = ureg_DECL_sampler(ureg, m);
2531     tx->info->sampler_mask |= 1 << m;
2532 
2533     tx_texcoord_alloc(tx, m);
2534 
2535     tmp = tx_scratch(tx);
2536     tmp2 = tx_scratch(tx);
2537     texcoord = tx_scratch(tx);
2538     /*
2539      * Bump-env-matrix:
2540      * 00 is X
2541      * 01 is Y
2542      * 10 is Z
2543      * 11 is W
2544      */
2545     c8m = nine_float_constant_src(tx, 8+m);
2546     c16m2 = nine_float_constant_src(tx, 8+8+m/2);
2547 
2548     m00 = NINE_APPLY_SWIZZLE(c8m, X);
2549     m01 = NINE_APPLY_SWIZZLE(c8m, Y);
2550     m10 = NINE_APPLY_SWIZZLE(c8m, Z);
2551     m11 = NINE_APPLY_SWIZZLE(c8m, W);
2552 
2553     /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2554     if (m % 2 == 0) {
2555         bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, X);
2556         bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, Y);
2557     } else {
2558         bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, Z);
2559         bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, W);
2560     }
2561 
2562     apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
2563 
2564     /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R  */
2565     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2566              NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord));
2567     /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2568     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2569              NINE_APPLY_SWIZZLE(src, Y),
2570              NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2571 
2572     /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2573     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2574              NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord));
2575     /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2576     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2577              NINE_APPLY_SWIZZLE(src, Y),
2578              NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2579 
2580     /* Now the texture coordinates are in tmp.xy */
2581 
2582     if (tx->insn.opcode == D3DSIO_TEXBEM) {
2583         ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2584     } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
2585         /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2586         ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2587         ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(src, Z),
2588                  bumpenvlscale, bumpenvloffset);
2589         ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
2590     }
2591 
2592     tx->info->bumpenvmat_needed = 1;
2593 
2594     return D3D_OK;
2595 }
2596 
DECL_SPECIAL(TEXREG2AR)2597 DECL_SPECIAL(TEXREG2AR)
2598 {
2599     struct ureg_program *ureg = tx->ureg;
2600     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2601     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2602     struct ureg_src sample;
2603     const int m = tx->insn.dst[0].idx;
2604     ASSERTED const int n = tx->insn.src[0].idx;
2605     assert(m >= 0 && m > n);
2606 
2607     sample = ureg_DECL_sampler(ureg, m);
2608     tx->info->sampler_mask |= 1 << m;
2609     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(W,X,X,X)), sample);
2610 
2611     return D3D_OK;
2612 }
2613 
DECL_SPECIAL(TEXREG2GB)2614 DECL_SPECIAL(TEXREG2GB)
2615 {
2616     struct ureg_program *ureg = tx->ureg;
2617     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2618     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2619     struct ureg_src sample;
2620     const int m = tx->insn.dst[0].idx;
2621     ASSERTED const int n = tx->insn.src[0].idx;
2622     assert(m >= 0 && m > n);
2623 
2624     sample = ureg_DECL_sampler(ureg, m);
2625     tx->info->sampler_mask |= 1 << m;
2626     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2627 
2628     return D3D_OK;
2629 }
2630 
DECL_SPECIAL(TEXM3x2PAD)2631 DECL_SPECIAL(TEXM3x2PAD)
2632 {
2633     return D3D_OK; /* this is just padding */
2634 }
2635 
DECL_SPECIAL(TEXM3x2TEX)2636 DECL_SPECIAL(TEXM3x2TEX)
2637 {
2638     struct ureg_program *ureg = tx->ureg;
2639     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2640     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2641     struct ureg_src sample;
2642     const int m = tx->insn.dst[0].idx - 1;
2643     ASSERTED const int n = tx->insn.src[0].idx;
2644     assert(m >= 0 && m > n);
2645 
2646     tx_texcoord_alloc(tx, m);
2647     tx_texcoord_alloc(tx, m+1);
2648 
2649     /* performs the matrix multiplication */
2650     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2651     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2652 
2653     sample = ureg_DECL_sampler(ureg, m + 1);
2654     tx->info->sampler_mask |= 1 << (m + 1);
2655     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2656 
2657     return D3D_OK;
2658 }
2659 
DECL_SPECIAL(TEXM3x3PAD)2660 DECL_SPECIAL(TEXM3x3PAD)
2661 {
2662     return D3D_OK; /* this is just padding */
2663 }
2664 
DECL_SPECIAL(TEXM3x3SPEC)2665 DECL_SPECIAL(TEXM3x3SPEC)
2666 {
2667     struct ureg_program *ureg = tx->ureg;
2668     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2669     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2670     struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2671     struct ureg_src sample;
2672     struct ureg_dst tmp;
2673     const int m = tx->insn.dst[0].idx - 2;
2674     ASSERTED const int n = tx->insn.src[0].idx;
2675     assert(m >= 0 && m > n);
2676 
2677     tx_texcoord_alloc(tx, m);
2678     tx_texcoord_alloc(tx, m+1);
2679     tx_texcoord_alloc(tx, m+2);
2680 
2681     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2682     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2683     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src);
2684 
2685     sample = ureg_DECL_sampler(ureg, m + 2);
2686     tx->info->sampler_mask |= 1 << (m + 2);
2687     tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2688 
2689     /* At this step, dst = N = (u', w', z').
2690      * We want dst to be the texture sampled at (u'', w'', z''), with
2691      * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2692     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2693     ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2694     /* at this step tmp.x = 1/N.N */
2695     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2696     /* at this step tmp.y = N.E */
2697     ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2698     /* at this step tmp.x = N.E/N.N */
2699     ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2700     ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2701     /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2702     ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(E));
2703     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2704 
2705     return D3D_OK;
2706 }
2707 
DECL_SPECIAL(TEXREG2RGB)2708 DECL_SPECIAL(TEXREG2RGB)
2709 {
2710     struct ureg_program *ureg = tx->ureg;
2711     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2712     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2713     struct ureg_src sample;
2714     const int m = tx->insn.dst[0].idx;
2715     ASSERTED const int n = tx->insn.src[0].idx;
2716     assert(m >= 0 && m > n);
2717 
2718     sample = ureg_DECL_sampler(ureg, m);
2719     tx->info->sampler_mask |= 1 << m;
2720     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), src, sample);
2721 
2722     return D3D_OK;
2723 }
2724 
DECL_SPECIAL(TEXDP3TEX)2725 DECL_SPECIAL(TEXDP3TEX)
2726 {
2727     struct ureg_program *ureg = tx->ureg;
2728     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2729     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2730     struct ureg_dst tmp;
2731     struct ureg_src sample;
2732     const int m = tx->insn.dst[0].idx;
2733     ASSERTED const int n = tx->insn.src[0].idx;
2734     assert(m >= 0 && m > n);
2735 
2736     tx_texcoord_alloc(tx, m);
2737 
2738     tmp = tx_scratch(tx);
2739     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2740     ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2741 
2742     sample = ureg_DECL_sampler(ureg, m);
2743     tx->info->sampler_mask |= 1 << m;
2744     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2745 
2746     return D3D_OK;
2747 }
2748 
DECL_SPECIAL(TEXM3x2DEPTH)2749 DECL_SPECIAL(TEXM3x2DEPTH)
2750 {
2751     struct ureg_program *ureg = tx->ureg;
2752     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2753     struct ureg_dst tmp;
2754     const int m = tx->insn.dst[0].idx - 1;
2755     ASSERTED const int n = tx->insn.src[0].idx;
2756     assert(m >= 0 && m > n);
2757 
2758     tx_texcoord_alloc(tx, m);
2759     tx_texcoord_alloc(tx, m+1);
2760 
2761     tmp = tx_scratch(tx);
2762 
2763     /* performs the matrix multiplication */
2764     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2765     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2766 
2767     ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2768     /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2769     ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2770     /* res = 'w' == 0 ? 1.0 : z/w */
2771     ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2772              ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2773     /* replace the depth for depth testing with the result */
2774     tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2775                                               TGSI_WRITEMASK_Z, 0, 1);
2776     ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2777     /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2778     return D3D_OK;
2779 }
2780 
DECL_SPECIAL(TEXDP3)2781 DECL_SPECIAL(TEXDP3)
2782 {
2783     struct ureg_program *ureg = tx->ureg;
2784     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2785     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2786     const int m = tx->insn.dst[0].idx;
2787     ASSERTED const int n = tx->insn.src[0].idx;
2788     assert(m >= 0 && m > n);
2789 
2790     tx_texcoord_alloc(tx, m);
2791 
2792     ureg_DP3(ureg, dst, tx->regs.vT[m], src);
2793 
2794     return D3D_OK;
2795 }
2796 
DECL_SPECIAL(TEXM3x3)2797 DECL_SPECIAL(TEXM3x3)
2798 {
2799     struct ureg_program *ureg = tx->ureg;
2800     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2801     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2802     struct ureg_src sample;
2803     struct ureg_dst E, tmp;
2804     const int m = tx->insn.dst[0].idx - 2;
2805     ASSERTED const int n = tx->insn.src[0].idx;
2806     assert(m >= 0 && m > n);
2807 
2808     tx_texcoord_alloc(tx, m);
2809     tx_texcoord_alloc(tx, m+1);
2810     tx_texcoord_alloc(tx, m+2);
2811 
2812     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2813     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2814     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src);
2815 
2816     switch (tx->insn.opcode) {
2817     case D3DSIO_TEXM3x3:
2818         ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2819         break;
2820     case D3DSIO_TEXM3x3TEX:
2821         sample = ureg_DECL_sampler(ureg, m + 2);
2822         tx->info->sampler_mask |= 1 << (m + 2);
2823         ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2824         break;
2825     case D3DSIO_TEXM3x3VSPEC:
2826         sample = ureg_DECL_sampler(ureg, m + 2);
2827         tx->info->sampler_mask |= 1 << (m + 2);
2828         E = tx_scratch(tx);
2829         tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2830         ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2831         ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2832         ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2833         /* At this step, dst = N = (u', w', z').
2834          * We want dst to be the texture sampled at (u'', w'', z''), with
2835          * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2836         ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2837         ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2838         /* at this step tmp.x = 1/N.N */
2839         ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2840         /* at this step tmp.y = N.E */
2841         ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2842         /* at this step tmp.x = N.E/N.N */
2843         ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2844         ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2845         /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2846         ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(ureg_src(E)));
2847         ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2848         break;
2849     default:
2850         return D3DERR_INVALIDCALL;
2851     }
2852     return D3D_OK;
2853 }
2854 
DECL_SPECIAL(TEXDEPTH)2855 DECL_SPECIAL(TEXDEPTH)
2856 {
2857     struct ureg_program *ureg = tx->ureg;
2858     struct ureg_dst r5;
2859     struct ureg_src r5r, r5g;
2860 
2861     assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2862 
2863     /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2864      * r5 won't be used afterward, thus we can use r5.ba */
2865     r5 = tx->regs.r[5];
2866     r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2867     r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2868 
2869     ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2870     ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2871     /* r5.r = r/g */
2872     ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2873              r5r, ureg_imm1f(ureg, 1.0f));
2874     /* replace the depth for depth testing with the result */
2875     tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2876                                               TGSI_WRITEMASK_Z, 0, 1);
2877     ureg_MOV(ureg, tx->regs.oDepth, r5r);
2878 
2879     return D3D_OK;
2880 }
2881 
DECL_SPECIAL(BEM)2882 DECL_SPECIAL(BEM)
2883 {
2884     struct ureg_program *ureg = tx->ureg;
2885     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2886     struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
2887     struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
2888     struct ureg_src m00, m01, m10, m11, c8m;
2889     const int m = tx->insn.dst[0].idx;
2890     struct ureg_dst tmp = tx_scratch(tx);
2891     /*
2892      * Bump-env-matrix:
2893      * 00 is X
2894      * 01 is Y
2895      * 10 is Z
2896      * 11 is W
2897      */
2898     c8m = nine_float_constant_src(tx, 8+m);
2899     m00 = NINE_APPLY_SWIZZLE(c8m, X);
2900     m01 = NINE_APPLY_SWIZZLE(c8m, Y);
2901     m10 = NINE_APPLY_SWIZZLE(c8m, Z);
2902     m11 = NINE_APPLY_SWIZZLE(c8m, W);
2903     /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r  */
2904     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2905              NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
2906     /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2907     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2908              NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2909 
2910     /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2911     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2912              NINE_APPLY_SWIZZLE(src1, X), src0);
2913     /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2914     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2915              NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2916     ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
2917 
2918     tx->info->bumpenvmat_needed = 1;
2919 
2920     return D3D_OK;
2921 }
2922 
DECL_SPECIAL(TEXLD)2923 DECL_SPECIAL(TEXLD)
2924 {
2925     struct ureg_program *ureg = tx->ureg;
2926     unsigned target;
2927     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2928     struct ureg_src src[2] = {
2929         tx_src_param(tx, &tx->insn.src[0]),
2930         tx_src_param(tx, &tx->insn.src[1])
2931     };
2932     assert(tx->insn.src[1].idx >= 0 &&
2933            tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2934     target = tx->sampler_targets[tx->insn.src[1].idx];
2935 
2936     switch (tx->insn.flags) {
2937     case 0:
2938         ureg_TEX(ureg, dst, target, src[0], src[1]);
2939         break;
2940     case NINED3DSI_TEXLD_PROJECT:
2941         ureg_TXP(ureg, dst, target, src[0], src[1]);
2942         break;
2943     case NINED3DSI_TEXLD_BIAS:
2944         ureg_TXB(ureg, dst, target, src[0], src[1]);
2945         break;
2946     default:
2947         assert(0);
2948         return D3DERR_INVALIDCALL;
2949     }
2950     return D3D_OK;
2951 }
2952 
DECL_SPECIAL(TEXLD_14)2953 DECL_SPECIAL(TEXLD_14)
2954 {
2955     struct ureg_program *ureg = tx->ureg;
2956     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2957     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2958     const unsigned s = tx->insn.dst[0].idx;
2959     const unsigned t = ps1x_sampler_type(tx->info, s);
2960 
2961     tx->info->sampler_mask |= 1 << s;
2962     ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2963 
2964     return D3D_OK;
2965 }
2966 
DECL_SPECIAL(TEX)2967 DECL_SPECIAL(TEX)
2968 {
2969     struct ureg_program *ureg = tx->ureg;
2970     const unsigned s = tx->insn.dst[0].idx;
2971     const unsigned t = ps1x_sampler_type(tx->info, s);
2972     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2973     struct ureg_src src[2];
2974 
2975     tx_texcoord_alloc(tx, s);
2976 
2977     src[0] = tx->regs.vT[s];
2978     src[1] = ureg_DECL_sampler(ureg, s);
2979     tx->info->sampler_mask |= 1 << s;
2980 
2981     TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
2982 
2983     return D3D_OK;
2984 }
2985 
DECL_SPECIAL(TEXLDD)2986 DECL_SPECIAL(TEXLDD)
2987 {
2988     unsigned target;
2989     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2990     struct ureg_src src[4] = {
2991         tx_src_param(tx, &tx->insn.src[0]),
2992         tx_src_param(tx, &tx->insn.src[1]),
2993         tx_src_param(tx, &tx->insn.src[2]),
2994         tx_src_param(tx, &tx->insn.src[3])
2995     };
2996     assert(tx->insn.src[1].idx >= 0 &&
2997            tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2998     target = tx->sampler_targets[tx->insn.src[1].idx];
2999 
3000     ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
3001     return D3D_OK;
3002 }
3003 
DECL_SPECIAL(TEXLDL)3004 DECL_SPECIAL(TEXLDL)
3005 {
3006     unsigned target;
3007     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3008     struct ureg_src src[2] = {
3009        tx_src_param(tx, &tx->insn.src[0]),
3010        tx_src_param(tx, &tx->insn.src[1])
3011     };
3012     assert(tx->insn.src[1].idx >= 0 &&
3013            tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
3014     target = tx->sampler_targets[tx->insn.src[1].idx];
3015 
3016     ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
3017     return D3D_OK;
3018 }
3019 
DECL_SPECIAL(SETP)3020 DECL_SPECIAL(SETP)
3021 {
3022     const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
3023     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3024     struct ureg_src src[2] = {
3025        tx_src_param(tx, &tx->insn.src[0]),
3026        tx_src_param(tx, &tx->insn.src[1])
3027     };
3028     ureg_insn(tx->ureg, cmp_op, &dst, 1, src, 2, 0);
3029     return D3D_OK;
3030 }
3031 
DECL_SPECIAL(BREAKP)3032 DECL_SPECIAL(BREAKP)
3033 {
3034     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
3035     ureg_IF(tx->ureg, src, tx_cond(tx));
3036     ureg_BRK(tx->ureg);
3037     tx_endcond(tx);
3038     ureg_ENDIF(tx->ureg);
3039     return D3D_OK;
3040 }
3041 
DECL_SPECIAL(PHASE)3042 DECL_SPECIAL(PHASE)
3043 {
3044     return D3D_OK; /* we don't care about phase */
3045 }
3046 
DECL_SPECIAL(COMMENT)3047 DECL_SPECIAL(COMMENT)
3048 {
3049     return D3D_OK; /* nothing to do */
3050 }
3051 
3052 
3053 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
3054     { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
3055 
3056 static const struct sm1_op_info inst_table[] =
3057 {
3058     _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP)), /* 0 */
3059     _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
3060     _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
3061     _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */
3062     _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
3063     _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
3064     _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RCP)), /* 6 */
3065     _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
3066     _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
3067     _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
3068     _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
3069     _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
3070     _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
3071     _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
3072     _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
3073     _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
3074     _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
3075     _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
3076     _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
3077     _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
3078 
3079     _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
3080     _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
3081     _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
3082     _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
3083     _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
3084 
3085     _OPI(CALL,    CAL,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
3086     _OPI(CALLNZ,  CAL,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
3087     _OPI(LOOP,    BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
3088     _OPI(RET,     RET,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
3089     _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
3090     _OPI(LABEL,   NOP,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
3091 
3092     _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
3093 
3094     _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
3095     _OPI(CRS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(XPD)), /* XXX: .w */
3096     _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
3097     _OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)),
3098     _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
3099 
3100     _OPI(SINCOS, NOP, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
3101     _OPI(SINCOS, NOP, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
3102 
3103     /* More flow control */
3104     _OPI(REP,    NOP,    V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
3105     _OPI(ENDREP, NOP,    V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
3106     _OPI(IF,     IF,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
3107     _OPI(IFC,    IF,     V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
3108     _OPI(ELSE,   ELSE,   V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
3109     _OPI(ENDIF,  ENDIF,  V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
3110     _OPI(BREAK,  BRK,    V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
3111     _OPI(BREAKC, NOP,    V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
3112     /* we don't write to the address register, but a normal register (copied
3113      * when needed to the address register), thus we don't use ARR */
3114     _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
3115 
3116     _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
3117     _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
3118 
3119     _OPI(TEXCOORD,     NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
3120     _OPI(TEXCOORD,     MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
3121     _OPI(TEXKILL,      KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
3122     _OPI(TEX,          TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
3123     _OPI(TEX,          TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
3124     _OPI(TEX,          TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
3125     _OPI(TEXBEM,       TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
3126     _OPI(TEXBEML,      TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
3127     _OPI(TEXREG2AR,    TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
3128     _OPI(TEXREG2GB,    TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
3129     _OPI(TEXM3x2PAD,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
3130     _OPI(TEXM3x2TEX,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
3131     _OPI(TEXM3x3PAD,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
3132     _OPI(TEXM3x3TEX,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3133     _OPI(TEXM3x3SPEC,  TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
3134     _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3135 
3136     _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
3137     _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
3138     _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
3139     _OPI(CND,  NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
3140 
3141     _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
3142 
3143     /* More tex stuff */
3144     _OPI(TEXREG2RGB,   TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
3145     _OPI(TEXDP3TEX,    TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
3146     _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
3147     _OPI(TEXDP3,       TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
3148     _OPI(TEXM3x3,      TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3149     _OPI(TEXDEPTH,     TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
3150 
3151     /* Misc */
3152     _OPI(CMP,    CMP,  V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
3153     _OPI(BEM,    NOP,  V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
3154     _OPI(DP2ADD, NOP,  V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
3155     _OPI(DSX,    DDX,  V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
3156     _OPI(DSY,    DDY,  V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
3157     _OPI(TEXLDD, TXD,  V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
3158     _OPI(SETP,   NOP,  V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
3159     _OPI(TEXLDL, TXL,  V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
3160     _OPI(BREAKP, BRK,  V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
3161 };
3162 
3163 static const struct sm1_op_info inst_phase =
3164     _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
3165 
3166 static const struct sm1_op_info inst_comment =
3167     _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
3168 
3169 static void
create_op_info_map(struct shader_translator * tx)3170 create_op_info_map(struct shader_translator *tx)
3171 {
3172     const unsigned version = (tx->version.major << 8) | tx->version.minor;
3173     unsigned i;
3174 
3175     for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i)
3176         tx->op_info_map[i] = -1;
3177 
3178     if (tx->processor == PIPE_SHADER_VERTEX) {
3179         for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3180             assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3181             if (inst_table[i].vert_version.min <= version &&
3182                 inst_table[i].vert_version.max >= version)
3183                 tx->op_info_map[inst_table[i].sio] = i;
3184         }
3185     } else {
3186         for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3187             assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3188             if (inst_table[i].frag_version.min <= version &&
3189                 inst_table[i].frag_version.max >= version)
3190                 tx->op_info_map[inst_table[i].sio] = i;
3191         }
3192     }
3193 }
3194 
3195 static inline HRESULT
NineTranslateInstruction_Generic(struct shader_translator * tx)3196 NineTranslateInstruction_Generic(struct shader_translator *tx)
3197 {
3198     struct ureg_dst dst[1];
3199     struct ureg_src src[4];
3200     unsigned i;
3201 
3202     for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i)
3203         dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
3204     for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i)
3205         src[i] = tx_src_param(tx, &tx->insn.src[i]);
3206 
3207     ureg_insn(tx->ureg, tx->insn.info->opcode,
3208               dst, tx->insn.ndst,
3209               src, tx->insn.nsrc, 0);
3210     return D3D_OK;
3211 }
3212 
3213 static inline DWORD
TOKEN_PEEK(struct shader_translator * tx)3214 TOKEN_PEEK(struct shader_translator *tx)
3215 {
3216     return *(tx->parse);
3217 }
3218 
3219 static inline DWORD
TOKEN_NEXT(struct shader_translator * tx)3220 TOKEN_NEXT(struct shader_translator *tx)
3221 {
3222     return *(tx->parse)++;
3223 }
3224 
3225 static inline void
TOKEN_JUMP(struct shader_translator * tx)3226 TOKEN_JUMP(struct shader_translator *tx)
3227 {
3228     if (tx->parse_next && tx->parse != tx->parse_next) {
3229         WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
3230         tx->parse = tx->parse_next;
3231     }
3232 }
3233 
3234 static inline boolean
sm1_parse_eof(struct shader_translator * tx)3235 sm1_parse_eof(struct shader_translator *tx)
3236 {
3237     return TOKEN_PEEK(tx) == NINED3DSP_END;
3238 }
3239 
3240 static void
sm1_read_version(struct shader_translator * tx)3241 sm1_read_version(struct shader_translator *tx)
3242 {
3243     const DWORD tok = TOKEN_NEXT(tx);
3244 
3245     tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
3246     tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
3247 
3248     switch (tok >> 16) {
3249     case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break;
3250     case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break;
3251     default:
3252        DBG("Invalid shader type: %x\n", tok);
3253        tx->processor = ~0;
3254        break;
3255     }
3256 }
3257 
3258 /* This is just to check if we parsed the instruction properly. */
3259 static void
sm1_parse_get_skip(struct shader_translator * tx)3260 sm1_parse_get_skip(struct shader_translator *tx)
3261 {
3262     const DWORD tok = TOKEN_PEEK(tx);
3263 
3264     if (tx->version.major >= 2) {
3265         tx->parse_next = tx->parse + 1 /* this */ +
3266             ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
3267     } else {
3268         tx->parse_next = NULL; /* TODO: determine from param count */
3269     }
3270 }
3271 
3272 static void
sm1_print_comment(const char * comment,UINT size)3273 sm1_print_comment(const char *comment, UINT size)
3274 {
3275     if (!size)
3276         return;
3277     /* TODO */
3278 }
3279 
3280 static void
sm1_parse_comments(struct shader_translator * tx,BOOL print)3281 sm1_parse_comments(struct shader_translator *tx, BOOL print)
3282 {
3283     DWORD tok = TOKEN_PEEK(tx);
3284 
3285     while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
3286     {
3287         const char *comment = "";
3288         UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
3289         tx->parse += size + 1;
3290 
3291         if (print)
3292             sm1_print_comment(comment, size);
3293 
3294         tok = TOKEN_PEEK(tx);
3295     }
3296 }
3297 
3298 static void
sm1_parse_get_param(struct shader_translator * tx,DWORD * reg,DWORD * rel)3299 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
3300 {
3301     *reg = TOKEN_NEXT(tx);
3302 
3303     if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
3304     {
3305         if (tx->version.major < 2)
3306             *rel = (1 << 31) |
3307                 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
3308                 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT)  & D3DSP_REGTYPE_MASK) |
3309                 D3DSP_NOSWIZZLE;
3310         else
3311             *rel = TOKEN_NEXT(tx);
3312     }
3313 }
3314 
3315 static void
sm1_parse_dst_param(struct sm1_dst_param * dst,DWORD tok)3316 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
3317 {
3318     int8_t shift;
3319     dst->file =
3320         (tok & D3DSP_REGTYPE_MASK)  >> D3DSP_REGTYPE_SHIFT |
3321         (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
3322     dst->type = TGSI_RETURN_TYPE_FLOAT;
3323     dst->idx = tok & D3DSP_REGNUM_MASK;
3324     dst->rel = NULL;
3325     dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
3326     dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
3327     shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
3328     dst->shift = (shift & 0x7) - (shift & 0x8);
3329 }
3330 
3331 static void
sm1_parse_src_param(struct sm1_src_param * src,DWORD tok)3332 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
3333 {
3334     src->file =
3335         ((tok & D3DSP_REGTYPE_MASK)  >> D3DSP_REGTYPE_SHIFT) |
3336         ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
3337     src->type = TGSI_RETURN_TYPE_FLOAT;
3338     src->idx = tok & D3DSP_REGNUM_MASK;
3339     src->rel = NULL;
3340     src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
3341     src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
3342 
3343     switch (src->file) {
3344     case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
3345     case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
3346     case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
3347     default:
3348         break;
3349     }
3350 }
3351 
3352 static void
sm1_parse_immediate(struct shader_translator * tx,struct sm1_src_param * imm)3353 sm1_parse_immediate(struct shader_translator *tx,
3354                     struct sm1_src_param *imm)
3355 {
3356     imm->file = NINED3DSPR_IMMEDIATE;
3357     imm->idx = INT_MIN;
3358     imm->rel = NULL;
3359     imm->swizzle = NINED3DSP_NOSWIZZLE;
3360     imm->mod = 0;
3361     switch (tx->insn.opcode) {
3362     case D3DSIO_DEF:
3363         imm->type = NINED3DSPTYPE_FLOAT4;
3364         memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3365         tx->parse += 4;
3366         break;
3367     case D3DSIO_DEFI:
3368         imm->type = NINED3DSPTYPE_INT4;
3369         memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3370         tx->parse += 4;
3371         break;
3372     case D3DSIO_DEFB:
3373         imm->type = NINED3DSPTYPE_BOOL;
3374         memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
3375         tx->parse += 1;
3376         break;
3377     default:
3378        assert(0);
3379        break;
3380     }
3381 }
3382 
3383 static void
sm1_read_dst_param(struct shader_translator * tx,struct sm1_dst_param * dst,struct sm1_src_param * rel)3384 sm1_read_dst_param(struct shader_translator *tx,
3385                    struct sm1_dst_param *dst,
3386                    struct sm1_src_param *rel)
3387 {
3388     DWORD tok_dst, tok_rel = 0;
3389 
3390     sm1_parse_get_param(tx, &tok_dst, &tok_rel);
3391     sm1_parse_dst_param(dst, tok_dst);
3392     if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
3393         sm1_parse_src_param(rel, tok_rel);
3394         dst->rel = rel;
3395     }
3396 }
3397 
3398 static void
sm1_read_src_param(struct shader_translator * tx,struct sm1_src_param * src,struct sm1_src_param * rel)3399 sm1_read_src_param(struct shader_translator *tx,
3400                    struct sm1_src_param *src,
3401                    struct sm1_src_param *rel)
3402 {
3403     DWORD tok_src, tok_rel = 0;
3404 
3405     sm1_parse_get_param(tx, &tok_src, &tok_rel);
3406     sm1_parse_src_param(src, tok_src);
3407     if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
3408         assert(rel);
3409         sm1_parse_src_param(rel, tok_rel);
3410         src->rel = rel;
3411     }
3412 }
3413 
3414 static void
sm1_read_semantic(struct shader_translator * tx,struct sm1_semantic * sem)3415 sm1_read_semantic(struct shader_translator *tx,
3416                   struct sm1_semantic *sem)
3417 {
3418     const DWORD tok_usg = TOKEN_NEXT(tx);
3419     const DWORD tok_dst = TOKEN_NEXT(tx);
3420 
3421     sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
3422     sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
3423     sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
3424 
3425     sm1_parse_dst_param(&sem->reg, tok_dst);
3426 }
3427 
3428 static void
sm1_parse_instruction(struct shader_translator * tx)3429 sm1_parse_instruction(struct shader_translator *tx)
3430 {
3431     struct sm1_instruction *insn = &tx->insn;
3432     HRESULT hr;
3433     DWORD tok;
3434     const struct sm1_op_info *info = NULL;
3435     unsigned i;
3436 
3437     sm1_parse_comments(tx, TRUE);
3438     sm1_parse_get_skip(tx);
3439 
3440     tok = TOKEN_NEXT(tx);
3441 
3442     insn->opcode = tok & D3DSI_OPCODE_MASK;
3443     insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
3444     insn->coissue = !!(tok & D3DSI_COISSUE);
3445     insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
3446 
3447     if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) {
3448         int k = tx->op_info_map[insn->opcode];
3449         if (k >= 0) {
3450             assert(k < ARRAY_SIZE(inst_table));
3451             info = &inst_table[k];
3452         }
3453     } else {
3454        if (insn->opcode == D3DSIO_PHASE)   info = &inst_phase;
3455        if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
3456     }
3457     if (!info) {
3458        DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
3459        TOKEN_JUMP(tx);
3460        return;
3461     }
3462     insn->info = info;
3463     insn->ndst = info->ndst;
3464     insn->nsrc = info->nsrc;
3465 
3466     /* check version */
3467     {
3468         unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
3469         unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
3470         unsigned ver = (tx->version.major << 8) | tx->version.minor;
3471         if (ver < min || ver > max) {
3472             DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3473                 min, ver, max);
3474             return;
3475         }
3476     }
3477 
3478     for (i = 0; i < insn->ndst; ++i)
3479         sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
3480     if (insn->predicated)
3481         sm1_read_src_param(tx, &insn->pred, NULL);
3482     for (i = 0; i < insn->nsrc; ++i)
3483         sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
3484 
3485     /* parse here so we can dump them before processing */
3486     if (insn->opcode == D3DSIO_DEF ||
3487         insn->opcode == D3DSIO_DEFI ||
3488         insn->opcode == D3DSIO_DEFB)
3489         sm1_parse_immediate(tx, &tx->insn.src[0]);
3490 
3491     sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
3492     sm1_instruction_check(insn);
3493 
3494     if (insn->predicated) {
3495         tx->predicated_activated = true;
3496         if (ureg_dst_is_undef(tx->regs.predicate_tmp)) {
3497             tx->regs.predicate_tmp = ureg_DECL_temporary(tx->ureg);
3498             tx->regs.predicate_dst = ureg_DECL_temporary(tx->ureg);
3499         }
3500     }
3501 
3502     if (info->handler)
3503         hr = info->handler(tx);
3504     else
3505         hr = NineTranslateInstruction_Generic(tx);
3506     tx_apply_dst0_modifiers(tx);
3507 
3508     if (insn->predicated) {
3509         tx->predicated_activated = false;
3510         /* TODO: predicate might be allowed on outputs,
3511          * which cannot be src. Workaround it. */
3512         ureg_CMP(tx->ureg, tx->regs.predicate_dst,
3513                  ureg_negate(tx_src_param(tx, &insn->pred)),
3514                  ureg_src(tx->regs.predicate_tmp),
3515                  ureg_src(tx->regs.predicate_dst));
3516     }
3517 
3518     if (hr != D3D_OK)
3519         tx->failure = TRUE;
3520     tx->num_scratch = 0; /* reset */
3521 
3522     TOKEN_JUMP(tx);
3523 }
3524 
3525 #define GET_CAP(n) screen->get_param( \
3526       screen, PIPE_CAP_##n)
3527 #define GET_SHADER_CAP(n) screen->get_shader_param( \
3528       screen, info->type, PIPE_SHADER_CAP_##n)
3529 
3530 static HRESULT
tx_ctor(struct shader_translator * tx,struct pipe_screen * screen,struct nine_shader_info * info)3531 tx_ctor(struct shader_translator *tx, struct pipe_screen *screen, struct nine_shader_info *info)
3532 {
3533     unsigned i;
3534 
3535     memset(tx, 0, sizeof(*tx));
3536 
3537     tx->info = info;
3538 
3539     tx->byte_code = info->byte_code;
3540     tx->parse = info->byte_code;
3541 
3542     for (i = 0; i < ARRAY_SIZE(info->input_map); ++i)
3543         info->input_map[i] = NINE_DECLUSAGE_NONE;
3544     info->num_inputs = 0;
3545 
3546     info->position_t = FALSE;
3547     info->point_size = FALSE;
3548 
3549     memset(tx->slots_used, 0, sizeof(tx->slots_used));
3550     memset(info->int_slots_used, 0, sizeof(info->int_slots_used));
3551     memset(info->bool_slots_used, 0, sizeof(info->bool_slots_used));
3552 
3553     tx->info->const_float_slots = 0;
3554     tx->info->const_int_slots = 0;
3555     tx->info->const_bool_slots = 0;
3556 
3557     info->sampler_mask = 0x0;
3558     info->rt_mask = 0x0;
3559 
3560     info->lconstf.data = NULL;
3561     info->lconstf.ranges = NULL;
3562 
3563     info->bumpenvmat_needed = 0;
3564 
3565     for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) {
3566         tx->regs.rL[i] = ureg_dst_undef();
3567     }
3568     tx->regs.address = ureg_dst_undef();
3569     tx->regs.a0 = ureg_dst_undef();
3570     tx->regs.p = ureg_dst_undef();
3571     tx->regs.oDepth = ureg_dst_undef();
3572     tx->regs.vPos = ureg_src_undef();
3573     tx->regs.vFace = ureg_src_undef();
3574     for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i)
3575         tx->regs.o[i] = ureg_dst_undef();
3576     for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i)
3577         tx->regs.oCol[i] = ureg_dst_undef();
3578     for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i)
3579         tx->regs.vC[i] = ureg_src_undef();
3580     for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i)
3581         tx->regs.vT[i] = ureg_src_undef();
3582 
3583     sm1_read_version(tx);
3584 
3585     info->version = (tx->version.major << 4) | tx->version.minor;
3586 
3587     tx->num_outputs = 0;
3588 
3589     create_op_info_map(tx);
3590 
3591     tx->ureg = ureg_create(info->type);
3592     if (!tx->ureg) {
3593         return E_OUTOFMEMORY;
3594     }
3595 
3596     tx->native_integers = GET_SHADER_CAP(INTEGERS);
3597     tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3598     tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3599     tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3600     tx->texcoord_sn = tx->want_texcoord ?
3601         TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3602     tx->wpos_is_sysval = GET_CAP(TGSI_FS_POSITION_IS_SYSVAL);
3603     tx->face_is_sysval_integer = GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL);
3604 
3605     if (IS_VS) {
3606         tx->num_constf_allowed = NINE_MAX_CONST_F;
3607     } else if (tx->version.major < 2) {/* IS_PS v1 */
3608         tx->num_constf_allowed = 8;
3609     } else if (tx->version.major == 2) {/* IS_PS v2 */
3610         tx->num_constf_allowed = 32;
3611     } else {/* IS_PS v3 */
3612         tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
3613     }
3614 
3615     if (tx->version.major < 2) {
3616         tx->num_consti_allowed = 0;
3617         tx->num_constb_allowed = 0;
3618     } else {
3619         tx->num_consti_allowed = NINE_MAX_CONST_I;
3620         tx->num_constb_allowed = NINE_MAX_CONST_B;
3621     }
3622 
3623     if (info->swvp_on && tx->version.major >= 2) {
3624         tx->num_constf_allowed = 8192;
3625         tx->num_consti_allowed = 2048;
3626         tx->num_constb_allowed = 2048;
3627     }
3628 
3629     /* VS must always write position. Declare it here to make it the 1st output.
3630      * (Some drivers like nv50 are buggy and rely on that.)
3631      */
3632     if (IS_VS) {
3633         tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3634     } else {
3635         ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3636         if (!tx->shift_wpos)
3637             ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3638     }
3639 
3640     tx->mul_zero_wins = GET_CAP(TGSI_MUL_ZERO_WINS);
3641     if (tx->mul_zero_wins)
3642        ureg_property(tx->ureg, TGSI_PROPERTY_MUL_ZERO_WINS, 1);
3643 
3644     /* Add additional definition of constants */
3645     if (info->add_constants_defs.c_combination) {
3646         unsigned i;
3647 
3648         assert(info->add_constants_defs.int_const_added);
3649         assert(info->add_constants_defs.bool_const_added);
3650         /* We only add constants that are used by the shader
3651          * and that are not defined in the shader */
3652         for (i = 0; i < NINE_MAX_CONST_I; ++i) {
3653             if ((*info->add_constants_defs.int_const_added)[i]) {
3654                 DBG("Defining const i%i : { %i %i %i %i }\n", i,
3655                     info->add_constants_defs.c_combination->const_i[i][0],
3656                     info->add_constants_defs.c_combination->const_i[i][1],
3657                     info->add_constants_defs.c_combination->const_i[i][2],
3658                     info->add_constants_defs.c_combination->const_i[i][3]);
3659                 tx_set_lconsti(tx, i, info->add_constants_defs.c_combination->const_i[i]);
3660             }
3661         }
3662         for (i = 0; i < NINE_MAX_CONST_B; ++i) {
3663             if ((*info->add_constants_defs.bool_const_added)[i]) {
3664                 DBG("Defining const b%i : %i\n", i, (int)(info->add_constants_defs.c_combination->const_b[i] != 0));
3665                 tx_set_lconstb(tx, i, info->add_constants_defs.c_combination->const_b[i]);
3666             }
3667         }
3668     }
3669     return D3D_OK;
3670 }
3671 
3672 static void
tx_dtor(struct shader_translator * tx)3673 tx_dtor(struct shader_translator *tx)
3674 {
3675     if (tx->slot_map)
3676         FREE(tx->slot_map);
3677     if (tx->num_inst_labels)
3678         FREE(tx->inst_labels);
3679     FREE(tx->lconstf);
3680     FREE(tx->regs.r);
3681     FREE(tx);
3682 }
3683 
3684 /* CONST[0].xyz = width/2, -height/2, zmax-zmin
3685  * CONST[1].xyz = x+width/2, y+height/2, zmin */
3686 static void
shader_add_vs_viewport_transform(struct shader_translator * tx)3687 shader_add_vs_viewport_transform(struct shader_translator *tx)
3688 {
3689     struct ureg_program *ureg = tx->ureg;
3690     struct ureg_src c0 = ureg_src_register(TGSI_FILE_CONSTANT, 0);
3691     struct ureg_src c1 = ureg_src_register(TGSI_FILE_CONSTANT, 1);
3692     /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/
3693 
3694     c0 = ureg_src_dimension(c0, 4);
3695     c1 = ureg_src_dimension(c1, 4);
3696     /* TODO: find out when we need to apply the viewport transformation or not.
3697      * Likely will be XYZ vs XYZRHW in vdecl_out
3698      * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0);
3699      * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1);
3700      */
3701     ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos));
3702 }
3703 
3704 static void
shader_add_ps_fog_stage(struct shader_translator * tx,struct ureg_src src_col)3705 shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
3706 {
3707     struct ureg_program *ureg = tx->ureg;
3708     struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
3709     struct ureg_src fog_end, fog_coeff, fog_density, fog_params;
3710     struct ureg_src fog_vs, fog_color;
3711     struct ureg_dst fog_factor, depth;
3712 
3713     if (!tx->info->fog_enable) {
3714         ureg_MOV(ureg, oCol0, src_col);
3715         return;
3716     }
3717 
3718     if (tx->info->fog_mode != D3DFOG_NONE) {
3719         depth = tx_scratch_scalar(tx);
3720         /* Depth used for fog is perspective interpolated */
3721         ureg_RCP(ureg, depth, ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_W));
3722         ureg_MUL(ureg, depth, ureg_src(depth), ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_Z));
3723     }
3724 
3725     fog_color = nine_float_constant_src(tx, 32);
3726     fog_params = nine_float_constant_src(tx, 33);
3727     fog_factor = tx_scratch_scalar(tx);
3728 
3729     if (tx->info->fog_mode == D3DFOG_LINEAR) {
3730         fog_end = NINE_APPLY_SWIZZLE(fog_params, X);
3731         fog_coeff = NINE_APPLY_SWIZZLE(fog_params, Y);
3732         ureg_ADD(ureg, fog_factor, fog_end, ureg_negate(ureg_src(depth)));
3733         ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
3734     } else if (tx->info->fog_mode == D3DFOG_EXP) {
3735         fog_density = NINE_APPLY_SWIZZLE(fog_params, X);
3736         ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density);
3737         ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3738         ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3739     } else if (tx->info->fog_mode == D3DFOG_EXP2) {
3740         fog_density = NINE_APPLY_SWIZZLE(fog_params, X);
3741         ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density);
3742         ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
3743         ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3744         ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3745     } else {
3746         fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 16,
3747                                             TGSI_INTERPOLATE_PERSPECTIVE),
3748                                             TGSI_SWIZZLE_X);
3749         ureg_MOV(ureg, fog_factor, fog_vs);
3750     }
3751 
3752     ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ),
3753              tx_src_scalar(fog_factor), src_col, fog_color);
3754     ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
3755 }
3756 
parse_shader(struct shader_translator * tx)3757 static void parse_shader(struct shader_translator *tx)
3758 {
3759     struct nine_shader_info *info = tx->info;
3760 
3761     while (!sm1_parse_eof(tx) && !tx->failure)
3762         sm1_parse_instruction(tx);
3763     tx->parse++; /* for byte_size */
3764 
3765     if (tx->failure)
3766         return;
3767 
3768     if (IS_PS && tx->version.major < 3) {
3769         if (tx->version.major < 2) {
3770             assert(tx->num_temp); /* there must be color output */
3771             info->rt_mask |= 0x1;
3772             shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
3773         } else {
3774             shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
3775         }
3776     }
3777 
3778     if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
3779         tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16);
3780         ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
3781     }
3782 
3783     if (info->position_t)
3784         ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
3785 
3786     if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
3787         struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
3788         ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
3789         ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
3790         info->point_size = TRUE;
3791     }
3792 
3793     if (info->process_vertices)
3794         shader_add_vs_viewport_transform(tx);
3795 
3796     ureg_END(tx->ureg);
3797 }
3798 
3799 #define NINE_SHADER_DEBUG_OPTION_NIR_VS           (1 << 0)
3800 #define NINE_SHADER_DEBUG_OPTION_NIR_PS           (1 << 1)
3801 #define NINE_SHADER_DEBUG_OPTION_NO_NIR_VS        (1 << 2)
3802 #define NINE_SHADER_DEBUG_OPTION_NO_NIR_PS        (1 << 3)
3803 #define NINE_SHADER_DEBUG_OPTION_DUMP_NIR         (1 << 4)
3804 #define NINE_SHADER_DEBUG_OPTION_DUMP_TGSI        (1 << 5)
3805 
3806 static const struct debug_named_value nine_shader_debug_options[] = {
3807     { "nir_vs", NINE_SHADER_DEBUG_OPTION_NIR_VS, "Use NIR for vertex shaders even if the driver doesn't prefer it." },
3808     { "nir_ps", NINE_SHADER_DEBUG_OPTION_NIR_PS, "Use NIR for pixel shaders even if the driver doesn't prefer it." },
3809     { "no_nir_vs", NINE_SHADER_DEBUG_OPTION_NO_NIR_VS, "Never use NIR for vertex shaders even if the driver prefers it." },
3810     { "no_nir_ps", NINE_SHADER_DEBUG_OPTION_NO_NIR_PS, "Never use NIR for pixel shaders even if the driver prefers it." },
3811     { "dump_nir", NINE_SHADER_DEBUG_OPTION_DUMP_NIR, "Print translated NIR shaders." },
3812     { "dump_tgsi", NINE_SHADER_DEBUG_OPTION_DUMP_TGSI, "Print TGSI shaders." },
3813     DEBUG_NAMED_VALUE_END /* must be last */
3814 };
3815 
3816 static inline boolean
nine_shader_get_debug_flag(uint64_t flag)3817 nine_shader_get_debug_flag(uint64_t flag)
3818 {
3819     static uint64_t flags = 0;
3820     static boolean first_run = TRUE;
3821 
3822     if (unlikely(first_run)) {
3823         first_run = FALSE;
3824         flags = debug_get_flags_option("NINE_SHADER", nine_shader_debug_options, 0);
3825 
3826         // Check old TGSI dump envvar too
3827         if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
3828             flags |= NINE_SHADER_DEBUG_OPTION_DUMP_TGSI;
3829         }
3830     }
3831 
3832     return !!(flags & flag);
3833 }
3834 
3835 static void
nine_pipe_nir_shader_state_from_tgsi(struct pipe_shader_state * state,const struct tgsi_token * tgsi_tokens,struct pipe_screen * screen)3836 nine_pipe_nir_shader_state_from_tgsi(struct pipe_shader_state *state, const struct tgsi_token *tgsi_tokens,
3837                                      struct pipe_screen *screen)
3838 {
3839     struct nir_shader *nir = tgsi_to_nir(tgsi_tokens, screen, true);
3840 
3841     if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_NIR))) {
3842         nir_print_shader(nir, stdout);
3843     }
3844 
3845     state->type = PIPE_SHADER_IR_NIR;
3846     state->tokens = NULL;
3847     state->ir.nir = nir;
3848     memset(&state->stream_output, 0, sizeof(state->stream_output));
3849 }
3850 
3851 static void *
nine_ureg_create_shader(struct ureg_program * ureg,struct pipe_context * pipe,const struct pipe_stream_output_info * so)3852 nine_ureg_create_shader(struct ureg_program                  *ureg,
3853                         struct pipe_context                  *pipe,
3854                         const struct pipe_stream_output_info   *so)
3855 {
3856     struct pipe_shader_state state;
3857     const struct tgsi_token *tgsi_tokens;
3858     struct pipe_screen *screen = pipe->screen;
3859 
3860     tgsi_tokens = ureg_finalize(ureg);
3861     if (!tgsi_tokens)
3862         return NULL;
3863 
3864     assert(((struct tgsi_header *) &tgsi_tokens[0])->HeaderSize >= 2);
3865     enum pipe_shader_type shader_type = ((struct tgsi_processor *) &tgsi_tokens[1])->Processor;
3866 
3867     int preferred_ir = screen->get_shader_param(screen, shader_type, PIPE_SHADER_CAP_PREFERRED_IR);
3868     bool prefer_nir = (preferred_ir == PIPE_SHADER_IR_NIR);
3869     bool use_nir = prefer_nir ||
3870         ((shader_type == PIPE_SHADER_VERTEX) && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NIR_VS)) ||
3871         ((shader_type == PIPE_SHADER_FRAGMENT) && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NIR_PS));
3872 
3873     /* Allow user to override preferred IR, this is very useful for debugging */
3874     if (unlikely(shader_type == PIPE_SHADER_VERTEX && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_VS)))
3875         use_nir = false;
3876     if (unlikely(shader_type == PIPE_SHADER_FRAGMENT && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_PS)))
3877         use_nir = false;
3878 
3879     DUMP("shader type: %s, preferred IR: %s, selected IR: %s\n",
3880          shader_type == PIPE_SHADER_VERTEX ? "VS" : "PS",
3881          prefer_nir ? "NIR" : "TGSI",
3882          use_nir ? "NIR" : "TGSI");
3883 
3884     if (use_nir) {
3885         nine_pipe_nir_shader_state_from_tgsi(&state, tgsi_tokens, screen);
3886     } else {
3887         pipe_shader_state_from_tgsi(&state, tgsi_tokens);
3888     }
3889 
3890     assert(state.tokens || state.ir.nir);
3891 
3892     if (so)
3893         state.stream_output = *so;
3894 
3895     switch (shader_type) {
3896     case PIPE_SHADER_VERTEX:
3897         return pipe->create_vs_state(pipe, &state);
3898     case PIPE_SHADER_FRAGMENT:
3899         return pipe->create_fs_state(pipe, &state);
3900     default:
3901         unreachable("unsupported shader type");
3902     }
3903 }
3904 
3905 
3906 void *
nine_create_shader_with_so_and_destroy(struct ureg_program * p,struct pipe_context * pipe,const struct pipe_stream_output_info * so)3907 nine_create_shader_with_so_and_destroy(struct ureg_program                   *p,
3908                                        struct pipe_context                *pipe,
3909                                        const struct pipe_stream_output_info *so)
3910 {
3911     void *result = nine_ureg_create_shader(p, pipe, so);
3912     ureg_destroy(p);
3913     return result;
3914 }
3915 
3916 HRESULT
nine_translate_shader(struct NineDevice9 * device,struct nine_shader_info * info,struct pipe_context * pipe)3917 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe)
3918 {
3919     struct shader_translator *tx;
3920     HRESULT hr = D3D_OK;
3921     const unsigned processor = info->type;
3922     struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen;
3923     unsigned *const_ranges = NULL;
3924 
3925     user_assert(processor != ~0, D3DERR_INVALIDCALL);
3926 
3927     tx = MALLOC_STRUCT(shader_translator);
3928     if (!tx)
3929         return E_OUTOFMEMORY;
3930 
3931     if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) {
3932         hr = E_OUTOFMEMORY;
3933         goto out;
3934     }
3935 
3936     assert(IS_VS || !info->swvp_on);
3937 
3938     if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
3939         hr = D3DERR_INVALIDCALL;
3940         DBG("Unsupported shader version: %u.%u !\n",
3941             tx->version.major, tx->version.minor);
3942         goto out;
3943     }
3944     if (tx->processor != processor) {
3945         hr = D3DERR_INVALIDCALL;
3946         DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
3947         goto out;
3948     }
3949     DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS",
3950          tx->version.major, tx->version.minor);
3951 
3952     parse_shader(tx);
3953 
3954     if (tx->failure) {
3955         /* For VS shaders, we print the warning later,
3956          * we first try with swvp. */
3957         if (IS_PS)
3958             ERR("Encountered buggy shader\n");
3959         ureg_destroy(tx->ureg);
3960         hr = D3DERR_INVALIDCALL;
3961         goto out;
3962     }
3963 
3964     /* Recompile after compacting constant slots if possible */
3965     if (!tx->indirect_const_access && !info->swvp_on && tx->num_slots > 0) {
3966         unsigned *slot_map;
3967         unsigned c;
3968         int i, j, num_ranges, prev;
3969 
3970         DBG("Recompiling shader for constant compaction\n");
3971         ureg_destroy(tx->ureg);
3972 
3973         if (tx->num_inst_labels)
3974             FREE(tx->inst_labels);
3975         FREE(tx->lconstf);
3976         FREE(tx->regs.r);
3977 
3978         num_ranges = 0;
3979         prev = -2;
3980         for (i = 0; i < NINE_MAX_CONST_ALL; i++) {
3981             if (tx->slots_used[i]) {
3982                 if (prev != i - 1)
3983                     num_ranges++;
3984                 prev = i;
3985             }
3986         }
3987         slot_map = MALLOC(NINE_MAX_CONST_ALL * sizeof(unsigned));
3988         const_ranges = CALLOC(num_ranges + 1, 2 * sizeof(unsigned)); /* ranges stop when last is of size 0 */
3989         if (!slot_map || !const_ranges) {
3990             hr = E_OUTOFMEMORY;
3991             goto out;
3992         }
3993         c = 0;
3994         j = -1;
3995         prev = -2;
3996         for (i = 0; i < NINE_MAX_CONST_ALL; i++) {
3997             if (tx->slots_used[i]) {
3998                 if (prev != i - 1)
3999                     j++;
4000                 /* Initialize first slot of the range */
4001                 if (!const_ranges[2*j+1])
4002                     const_ranges[2*j] = i;
4003                 const_ranges[2*j+1]++;
4004                 prev = i;
4005                 slot_map[i] = c++;
4006             }
4007         }
4008 
4009         if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) {
4010             hr = E_OUTOFMEMORY;
4011             goto out;
4012         }
4013         tx->slot_map = slot_map;
4014         parse_shader(tx);
4015         assert(!tx->failure);
4016 #if !defined(NDEBUG)
4017         i = 0;
4018         j = 0;
4019         while (const_ranges[i*2+1] != 0) {
4020             j += const_ranges[i*2+1];
4021             i++;
4022         }
4023         assert(j == tx->num_slots);
4024 #endif
4025     }
4026 
4027     /* record local constants */
4028     if (tx->num_lconstf && tx->indirect_const_access) {
4029         struct nine_range *ranges;
4030         float *data;
4031         int *indices;
4032         unsigned i, k, n;
4033 
4034         hr = E_OUTOFMEMORY;
4035 
4036         data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
4037         if (!data)
4038             goto out;
4039         info->lconstf.data = data;
4040 
4041         indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
4042         if (!indices)
4043             goto out;
4044 
4045         /* lazy sort, num_lconstf should be small */
4046         for (n = 0; n < tx->num_lconstf; ++n) {
4047             for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
4048                 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
4049                     k = i;
4050             }
4051             indices[n] = tx->lconstf[k].idx;
4052             memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float));
4053             tx->lconstf[k].idx = INT_MAX;
4054         }
4055 
4056         /* count ranges */
4057         for (n = 1, i = 1; i < tx->num_lconstf; ++i)
4058             if (indices[i] != indices[i - 1] + 1)
4059                 ++n;
4060         ranges = MALLOC(n * sizeof(ranges[0]));
4061         if (!ranges) {
4062             FREE(indices);
4063             goto out;
4064         }
4065         info->lconstf.ranges = ranges;
4066 
4067         k = 0;
4068         ranges[k].bgn = indices[0];
4069         for (i = 1; i < tx->num_lconstf; ++i) {
4070             if (indices[i] != indices[i - 1] + 1) {
4071                 ranges[k].next = &ranges[k + 1];
4072                 ranges[k].end = indices[i - 1] + 1;
4073                 ++k;
4074                 ranges[k].bgn = indices[i];
4075             }
4076         }
4077         ranges[k].end = indices[i - 1] + 1;
4078         ranges[k].next = NULL;
4079         assert(n == (k + 1));
4080 
4081         FREE(indices);
4082         hr = D3D_OK;
4083     }
4084 
4085     /* r500 */
4086     if (info->const_float_slots > device->max_vs_const_f &&
4087         (info->const_int_slots || info->const_bool_slots) &&
4088         !info->swvp_on)
4089         ERR("Overlapping constant slots. The shader is likely to be buggy\n");
4090 
4091 
4092     if (tx->indirect_const_access) { /* vs only */
4093         info->const_float_slots = device->max_vs_const_f;
4094         tx->num_slots = MAX2(tx->num_slots, device->max_vs_const_f);
4095     }
4096 
4097     if (!info->swvp_on) {
4098         info->const_used_size = sizeof(float[4]) * tx->num_slots;
4099         if (tx->num_slots)
4100             ureg_DECL_constant2D(tx->ureg, 0, tx->num_slots-1, 0);
4101     } else {
4102          ureg_DECL_constant2D(tx->ureg, 0, 4095, 0);
4103          ureg_DECL_constant2D(tx->ureg, 0, 4095, 1);
4104          ureg_DECL_constant2D(tx->ureg, 0, 2047, 2);
4105          ureg_DECL_constant2D(tx->ureg, 0, 511, 3);
4106     }
4107 
4108     if (info->process_vertices)
4109         ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */
4110 
4111     if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_TGSI))) {
4112         const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, NULL);
4113         tgsi_dump(toks, 0);
4114         ureg_free_tokens(toks);
4115     }
4116 
4117     if (info->process_vertices) {
4118         NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out,
4119                                                     tx->output_info,
4120                                                     tx->num_outputs,
4121                                                     &(info->so));
4122         info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so));
4123     } else
4124         info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, NULL);
4125     if (!info->cso) {
4126         hr = D3DERR_DRIVERINTERNALERROR;
4127         FREE(info->lconstf.data);
4128         FREE(info->lconstf.ranges);
4129         goto out;
4130     }
4131 
4132     info->const_ranges = const_ranges;
4133     const_ranges = NULL;
4134     info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
4135 out:
4136     if (const_ranges)
4137         FREE(const_ranges);
4138     tx_dtor(tx);
4139     return hr;
4140 }
4141