1 /**********************************************************
2  * Copyright 1998-2013 VMware, Inc.  All rights reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person
5  * obtaining a copy of this software and associated documentation
6  * files (the "Software"), to deal in the Software without
7  * restriction, including without limitation the rights to use, copy,
8  * modify, merge, publish, distribute, sublicense, and/or sell copies
9  * of the Software, and to permit persons to whom the Software is
10  * furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be
13  * included in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  **********************************************************/
25 
26 /**
27  * @file svga_tgsi_vgpu10.c
28  *
29  * TGSI -> VGPU10 shader translation.
30  *
31  * \author Mingcheng Chen
32  * \author Brian Paul
33  */
34 
35 #include "pipe/p_compiler.h"
36 #include "pipe/p_shader_tokens.h"
37 #include "pipe/p_defines.h"
38 #include "tgsi/tgsi_build.h"
39 #include "tgsi/tgsi_dump.h"
40 #include "tgsi/tgsi_info.h"
41 #include "tgsi/tgsi_parse.h"
42 #include "tgsi/tgsi_scan.h"
43 #include "tgsi/tgsi_two_side.h"
44 #include "tgsi/tgsi_aa_point.h"
45 #include "tgsi/tgsi_util.h"
46 #include "util/u_math.h"
47 #include "util/u_memory.h"
48 #include "util/u_bitmask.h"
49 #include "util/u_debug.h"
50 #include "util/u_pstipple.h"
51 
52 #include "svga_context.h"
53 #include "svga_debug.h"
54 #include "svga_link.h"
55 #include "svga_shader.h"
56 #include "svga_tgsi.h"
57 
58 #include "VGPU10ShaderTokens.h"
59 
60 
61 #define INVALID_INDEX 99999
62 #define MAX_INTERNAL_TEMPS 3
63 #define MAX_SYSTEM_VALUES 4
64 #define MAX_IMMEDIATE_COUNT \
65         (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4)
66 #define MAX_TEMP_ARRAYS 64  /* Enough? */
67 
68 
69 /**
70  * Clipping is complicated.  There's four different cases which we
71  * handle during VS/GS shader translation:
72  */
73 enum clipping_mode
74 {
75    CLIP_NONE,     /**< No clipping enabled */
76    CLIP_LEGACY,   /**< The shader has no clipping declarations or code but
77                    * one or more user-defined clip planes are enabled.  We
78                    * generate extra code to emit clip distances.
79                    */
80    CLIP_DISTANCE, /**< The shader already declares clip distance output
81                    * registers and has code to write to them.
82                    */
83    CLIP_VERTEX    /**< The shader declares a clip vertex output register and
84                   * has code that writes to the register.  We convert the
85                   * clipvertex position into one or more clip distances.
86                   */
87 };
88 
89 
90 struct svga_shader_emitter_v10
91 {
92    /* The token output buffer */
93    unsigned size;
94    char *buf;
95    char *ptr;
96 
97    /* Information about the shader and state (does not change) */
98    struct svga_compile_key key;
99    struct tgsi_shader_info info;
100    unsigned unit;
101 
102    unsigned inst_start_token;
103    boolean discard_instruction; /**< throw away current instruction? */
104 
105    union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4];
106    unsigned num_immediates;      /**< Number of immediates emitted */
107    unsigned common_immediate_pos[8];  /**< literals for common immediates */
108    unsigned num_common_immediates;
109    boolean immediates_emitted;
110 
111    unsigned num_outputs;      /**< include any extra outputs */
112                               /**  The first extra output is reserved for
113                                *   non-adjusted vertex position for
114                                *   stream output purpose
115                                */
116 
117    /* Temporary Registers */
118    unsigned num_shader_temps; /**< num of temps used by original shader */
119    unsigned internal_temp_count;  /**< currently allocated internal temps */
120    struct {
121       unsigned start, size;
122    } temp_arrays[MAX_TEMP_ARRAYS];
123    unsigned num_temp_arrays;
124 
125    /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */
126    struct {
127       unsigned arrayId, index;
128    } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */
129 
130    /** Number of constants used by original shader for each constant buffer.
131     * The size should probably always match with that of svga_state.constbufs.
132     */
133    unsigned num_shader_consts[SVGA_MAX_CONST_BUFS];
134 
135    /* Samplers */
136    unsigned num_samplers;
137    boolean sampler_view[PIPE_MAX_SAMPLERS];  /**< True if sampler view exists*/
138    ubyte sampler_target[PIPE_MAX_SAMPLERS];  /**< TGSI_TEXTURE_x */
139    ubyte sampler_return_type[PIPE_MAX_SAMPLERS];  /**< TGSI_RETURN_TYPE_x */
140 
141    /* Address regs (really implemented with temps) */
142    unsigned num_address_regs;
143    unsigned address_reg_index[MAX_VGPU10_ADDR_REGS];
144 
145    /* Output register usage masks */
146    ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS];
147 
148    /* To map TGSI system value index to VGPU shader input indexes */
149    ubyte system_value_indexes[MAX_SYSTEM_VALUES];
150 
151    struct {
152       /* vertex position scale/translation */
153       unsigned out_index;  /**< the real position output reg */
154       unsigned tmp_index;  /**< the fake/temp position output reg */
155       unsigned so_index;   /**< the non-adjusted position output reg */
156       unsigned prescale_scale_index, prescale_trans_index;
157       boolean  need_prescale;
158    } vposition;
159 
160    /* For vertex shaders only */
161    struct {
162       /* viewport constant */
163       unsigned viewport_index;
164 
165       /* temp index of adjusted vertex attributes */
166       unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS];
167    } vs;
168 
169    /* For fragment shaders only */
170    struct {
171       unsigned color_out_index[PIPE_MAX_COLOR_BUFS];  /**< the real color output regs */
172       unsigned num_color_outputs;
173       unsigned color_tmp_index;  /**< fake/temp color output reg */
174       unsigned alpha_ref_index;  /**< immediate constant for alpha ref */
175 
176       /* front-face */
177       unsigned face_input_index; /**< real fragment shader face reg (bool) */
178       unsigned face_tmp_index;   /**< temp face reg converted to -1 / +1 */
179 
180       unsigned pstipple_sampler_unit;
181 
182       unsigned fragcoord_input_index;  /**< real fragment position input reg */
183       unsigned fragcoord_tmp_index;    /**< 1/w modified position temp reg */
184 
185       /** Which texture units are doing shadow comparison in the FS code */
186       unsigned shadow_compare_units;
187    } fs;
188 
189    /* For geometry shaders only */
190    struct {
191       VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */
192       VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */
193       unsigned input_size;       /**< size of input arrays */
194       unsigned prim_id_index;    /**< primitive id register index */
195       unsigned max_out_vertices; /**< maximum number of output vertices */
196    } gs;
197 
198    /* For vertex or geometry shaders */
199    enum clipping_mode clip_mode;
200    unsigned clip_dist_out_index; /**< clip distance output register index */
201    unsigned clip_dist_tmp_index; /**< clip distance temporary register */
202    unsigned clip_dist_so_index;  /**< clip distance shadow copy */
203 
204    /** Index of temporary holding the clipvertex coordinate */
205    unsigned clip_vertex_out_index; /**< clip vertex output register index */
206    unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */
207 
208    /* user clip plane constant slot indexes */
209    unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES];
210 
211    unsigned num_output_writes;
212    boolean constant_color_output;
213 
214    boolean uses_flat_interp;
215 
216    /* For all shaders: const reg index for RECT coord scaling */
217    unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS];
218 
219    /* For all shaders: const reg index for texture buffer size */
220    unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS];
221 
222    /* VS/GS/FS Linkage info */
223    struct shader_linkage linkage;
224 
225    bool register_overflow;  /**< Set if we exceed a VGPU10 register limit */
226 };
227 
228 
229 static boolean
230 emit_post_helpers(struct svga_shader_emitter_v10 *emit);
231 
232 static boolean
233 emit_vertex(struct svga_shader_emitter_v10 *emit,
234             const struct tgsi_full_instruction *inst);
235 
236 static char err_buf[128];
237 
238 static boolean
expand(struct svga_shader_emitter_v10 * emit)239 expand(struct svga_shader_emitter_v10 *emit)
240 {
241    char *new_buf;
242    unsigned newsize = emit->size * 2;
243 
244    if (emit->buf != err_buf)
245       new_buf = REALLOC(emit->buf, emit->size, newsize);
246    else
247       new_buf = NULL;
248 
249    if (!new_buf) {
250       emit->ptr = err_buf;
251       emit->buf = err_buf;
252       emit->size = sizeof(err_buf);
253       return FALSE;
254    }
255 
256    emit->size = newsize;
257    emit->ptr = new_buf + (emit->ptr - emit->buf);
258    emit->buf = new_buf;
259    return TRUE;
260 }
261 
262 /**
263  * Create and initialize a new svga_shader_emitter_v10 object.
264  */
265 static struct svga_shader_emitter_v10 *
alloc_emitter(void)266 alloc_emitter(void)
267 {
268    struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit));
269 
270    if (!emit)
271       return NULL;
272 
273    /* to initialize the output buffer */
274    emit->size = 512;
275    if (!expand(emit)) {
276       FREE(emit);
277       return NULL;
278    }
279    return emit;
280 }
281 
282 /**
283  * Free an svga_shader_emitter_v10 object.
284  */
285 static void
free_emitter(struct svga_shader_emitter_v10 * emit)286 free_emitter(struct svga_shader_emitter_v10 *emit)
287 {
288    assert(emit);
289    FREE(emit->buf);    /* will be NULL if translation succeeded */
290    FREE(emit);
291 }
292 
293 static inline boolean
reserve(struct svga_shader_emitter_v10 * emit,unsigned nr_dwords)294 reserve(struct svga_shader_emitter_v10 *emit,
295         unsigned nr_dwords)
296 {
297    while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) {
298       if (!expand(emit))
299          return FALSE;
300    }
301 
302    return TRUE;
303 }
304 
305 static boolean
emit_dword(struct svga_shader_emitter_v10 * emit,uint32 dword)306 emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword)
307 {
308    if (!reserve(emit, 1))
309       return FALSE;
310 
311    *(uint32 *)emit->ptr = dword;
312    emit->ptr += sizeof dword;
313    return TRUE;
314 }
315 
316 static boolean
emit_dwords(struct svga_shader_emitter_v10 * emit,const uint32 * dwords,unsigned nr)317 emit_dwords(struct svga_shader_emitter_v10 *emit,
318             const uint32 *dwords,
319             unsigned nr)
320 {
321    if (!reserve(emit, nr))
322       return FALSE;
323 
324    memcpy(emit->ptr, dwords, nr * sizeof *dwords);
325    emit->ptr += nr * sizeof *dwords;
326    return TRUE;
327 }
328 
329 /** Return the number of tokens in the emitter's buffer */
330 static unsigned
emit_get_num_tokens(const struct svga_shader_emitter_v10 * emit)331 emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit)
332 {
333    return (emit->ptr - emit->buf) / sizeof(unsigned);
334 }
335 
336 
337 /**
338  * Check for register overflow.  If we overflow we'll set an
339  * error flag.  This function can be called for register declarations
340  * or use as src/dst instruction operands.
341  * \param type  register type.  One of VGPU10_OPERAND_TYPE_x
342                 or VGPU10_OPCODE_DCL_x
343  * \param index  the register index
344  */
345 static void
check_register_index(struct svga_shader_emitter_v10 * emit,unsigned operandType,unsigned index)346 check_register_index(struct svga_shader_emitter_v10 *emit,
347                      unsigned operandType, unsigned index)
348 {
349    bool overflow_before = emit->register_overflow;
350 
351    switch (operandType) {
352    case VGPU10_OPERAND_TYPE_TEMP:
353    case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP:
354    case VGPU10_OPCODE_DCL_TEMPS:
355       if (index >= VGPU10_MAX_TEMPS) {
356          emit->register_overflow = TRUE;
357       }
358       break;
359    case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER:
360    case VGPU10_OPCODE_DCL_CONSTANT_BUFFER:
361       if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
362          emit->register_overflow = TRUE;
363       }
364       break;
365    case VGPU10_OPERAND_TYPE_INPUT:
366    case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID:
367    case VGPU10_OPCODE_DCL_INPUT:
368    case VGPU10_OPCODE_DCL_INPUT_SGV:
369    case VGPU10_OPCODE_DCL_INPUT_SIV:
370    case VGPU10_OPCODE_DCL_INPUT_PS:
371    case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
372    case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
373       if ((emit->unit == PIPE_SHADER_VERTEX &&
374            index >= VGPU10_MAX_VS_INPUTS) ||
375           (emit->unit == PIPE_SHADER_GEOMETRY &&
376            index >= VGPU10_MAX_GS_INPUTS) ||
377           (emit->unit == PIPE_SHADER_FRAGMENT &&
378            index >= VGPU10_MAX_FS_INPUTS)) {
379          emit->register_overflow = TRUE;
380       }
381       break;
382    case VGPU10_OPERAND_TYPE_OUTPUT:
383    case VGPU10_OPCODE_DCL_OUTPUT:
384    case VGPU10_OPCODE_DCL_OUTPUT_SGV:
385    case VGPU10_OPCODE_DCL_OUTPUT_SIV:
386       if ((emit->unit == PIPE_SHADER_VERTEX &&
387            index >= VGPU10_MAX_VS_OUTPUTS) ||
388           (emit->unit == PIPE_SHADER_GEOMETRY &&
389            index >= VGPU10_MAX_GS_OUTPUTS) ||
390           (emit->unit == PIPE_SHADER_FRAGMENT &&
391            index >= VGPU10_MAX_FS_OUTPUTS)) {
392          emit->register_overflow = TRUE;
393       }
394       break;
395    case VGPU10_OPERAND_TYPE_SAMPLER:
396    case VGPU10_OPCODE_DCL_SAMPLER:
397       if (index >= VGPU10_MAX_SAMPLERS) {
398          emit->register_overflow = TRUE;
399       }
400       break;
401    case VGPU10_OPERAND_TYPE_RESOURCE:
402    case VGPU10_OPCODE_DCL_RESOURCE:
403       if (index >= VGPU10_MAX_RESOURCES) {
404          emit->register_overflow = TRUE;
405       }
406       break;
407    case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
408       if (index >= MAX_IMMEDIATE_COUNT) {
409          emit->register_overflow = TRUE;
410       }
411       break;
412    default:
413       assert(0);
414       ; /* nothing */
415    }
416 
417    if (emit->register_overflow && !overflow_before) {
418       debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n",
419                    operandType, index);
420    }
421 }
422 
423 
424 /**
425  * Examine misc state to determine the clipping mode.
426  */
427 static void
determine_clipping_mode(struct svga_shader_emitter_v10 * emit)428 determine_clipping_mode(struct svga_shader_emitter_v10 *emit)
429 {
430    if (emit->info.num_written_clipdistance > 0) {
431       emit->clip_mode = CLIP_DISTANCE;
432    }
433    else if (emit->info.writes_clipvertex) {
434       emit->clip_mode = CLIP_VERTEX;
435    }
436    else if (emit->key.clip_plane_enable) {
437       emit->clip_mode = CLIP_LEGACY;
438    }
439    else {
440       emit->clip_mode = CLIP_NONE;
441    }
442 }
443 
444 
445 /**
446  * For clip distance register declarations and clip distance register
447  * writes we need to mask the declaration usage or instruction writemask
448  * (respectively) against the set of the really-enabled clipping planes.
449  *
450  * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables
451  * has a VS that writes to all 8 clip distance registers, but the plane enable
452  * flags are a subset of that.
453  *
454  * This function is used to apply the plane enable flags to the register
455  * declaration or instruction writemask.
456  *
457  * \param writemask  the declaration usage mask or instruction writemask
458  * \param clip_reg_index  which clip plane register is being declared/written.
459  *                        The legal values are 0 and 1 (two clip planes per
460  *                        register, for a total of 8 clip planes)
461  */
462 static unsigned
apply_clip_plane_mask(struct svga_shader_emitter_v10 * emit,unsigned writemask,unsigned clip_reg_index)463 apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit,
464                       unsigned writemask, unsigned clip_reg_index)
465 {
466    unsigned shift;
467 
468    assert(clip_reg_index < 2);
469 
470    /* four clip planes per clip register: */
471    shift = clip_reg_index * 4;
472    writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf);
473 
474    return writemask;
475 }
476 
477 
478 /**
479  * Translate gallium shader type into VGPU10 type.
480  */
481 static VGPU10_PROGRAM_TYPE
translate_shader_type(unsigned type)482 translate_shader_type(unsigned type)
483 {
484    switch (type) {
485    case PIPE_SHADER_VERTEX:
486       return VGPU10_VERTEX_SHADER;
487    case PIPE_SHADER_GEOMETRY:
488       return VGPU10_GEOMETRY_SHADER;
489    case PIPE_SHADER_FRAGMENT:
490       return VGPU10_PIXEL_SHADER;
491    default:
492       assert(!"Unexpected shader type");
493       return VGPU10_VERTEX_SHADER;
494    }
495 }
496 
497 
498 /**
499  * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x
500  * Note: we only need to translate the opcodes for "simple" instructions,
501  * as seen below.  All other opcodes are handled/translated specially.
502  */
503 static VGPU10_OPCODE_TYPE
translate_opcode(unsigned opcode)504 translate_opcode(unsigned opcode)
505 {
506    switch (opcode) {
507    case TGSI_OPCODE_MOV:
508       return VGPU10_OPCODE_MOV;
509    case TGSI_OPCODE_MUL:
510       return VGPU10_OPCODE_MUL;
511    case TGSI_OPCODE_ADD:
512       return VGPU10_OPCODE_ADD;
513    case TGSI_OPCODE_DP3:
514       return VGPU10_OPCODE_DP3;
515    case TGSI_OPCODE_DP4:
516       return VGPU10_OPCODE_DP4;
517    case TGSI_OPCODE_MIN:
518       return VGPU10_OPCODE_MIN;
519    case TGSI_OPCODE_MAX:
520       return VGPU10_OPCODE_MAX;
521    case TGSI_OPCODE_MAD:
522       return VGPU10_OPCODE_MAD;
523    case TGSI_OPCODE_SQRT:
524       return VGPU10_OPCODE_SQRT;
525    case TGSI_OPCODE_FRC:
526       return VGPU10_OPCODE_FRC;
527    case TGSI_OPCODE_FLR:
528       return VGPU10_OPCODE_ROUND_NI;
529    case TGSI_OPCODE_FSEQ:
530       return VGPU10_OPCODE_EQ;
531    case TGSI_OPCODE_FSGE:
532       return VGPU10_OPCODE_GE;
533    case TGSI_OPCODE_FSNE:
534       return VGPU10_OPCODE_NE;
535    case TGSI_OPCODE_DDX:
536       return VGPU10_OPCODE_DERIV_RTX;
537    case TGSI_OPCODE_DDY:
538       return VGPU10_OPCODE_DERIV_RTY;
539    case TGSI_OPCODE_RET:
540       return VGPU10_OPCODE_RET;
541    case TGSI_OPCODE_DIV:
542       return VGPU10_OPCODE_DIV;
543    case TGSI_OPCODE_IDIV:
544       return VGPU10_OPCODE_IDIV;
545    case TGSI_OPCODE_DP2:
546       return VGPU10_OPCODE_DP2;
547    case TGSI_OPCODE_BRK:
548       return VGPU10_OPCODE_BREAK;
549    case TGSI_OPCODE_IF:
550       return VGPU10_OPCODE_IF;
551    case TGSI_OPCODE_ELSE:
552       return VGPU10_OPCODE_ELSE;
553    case TGSI_OPCODE_ENDIF:
554       return VGPU10_OPCODE_ENDIF;
555    case TGSI_OPCODE_CEIL:
556       return VGPU10_OPCODE_ROUND_PI;
557    case TGSI_OPCODE_I2F:
558       return VGPU10_OPCODE_ITOF;
559    case TGSI_OPCODE_NOT:
560       return VGPU10_OPCODE_NOT;
561    case TGSI_OPCODE_TRUNC:
562       return VGPU10_OPCODE_ROUND_Z;
563    case TGSI_OPCODE_SHL:
564       return VGPU10_OPCODE_ISHL;
565    case TGSI_OPCODE_AND:
566       return VGPU10_OPCODE_AND;
567    case TGSI_OPCODE_OR:
568       return VGPU10_OPCODE_OR;
569    case TGSI_OPCODE_XOR:
570       return VGPU10_OPCODE_XOR;
571    case TGSI_OPCODE_CONT:
572       return VGPU10_OPCODE_CONTINUE;
573    case TGSI_OPCODE_EMIT:
574       return VGPU10_OPCODE_EMIT;
575    case TGSI_OPCODE_ENDPRIM:
576       return VGPU10_OPCODE_CUT;
577    case TGSI_OPCODE_BGNLOOP:
578       return VGPU10_OPCODE_LOOP;
579    case TGSI_OPCODE_ENDLOOP:
580       return VGPU10_OPCODE_ENDLOOP;
581    case TGSI_OPCODE_ENDSUB:
582       return VGPU10_OPCODE_RET;
583    case TGSI_OPCODE_NOP:
584       return VGPU10_OPCODE_NOP;
585    case TGSI_OPCODE_END:
586       return VGPU10_OPCODE_RET;
587    case TGSI_OPCODE_F2I:
588       return VGPU10_OPCODE_FTOI;
589    case TGSI_OPCODE_IMAX:
590       return VGPU10_OPCODE_IMAX;
591    case TGSI_OPCODE_IMIN:
592       return VGPU10_OPCODE_IMIN;
593    case TGSI_OPCODE_UDIV:
594    case TGSI_OPCODE_UMOD:
595    case TGSI_OPCODE_MOD:
596       return VGPU10_OPCODE_UDIV;
597    case TGSI_OPCODE_IMUL_HI:
598       return VGPU10_OPCODE_IMUL;
599    case TGSI_OPCODE_INEG:
600       return VGPU10_OPCODE_INEG;
601    case TGSI_OPCODE_ISHR:
602       return VGPU10_OPCODE_ISHR;
603    case TGSI_OPCODE_ISGE:
604       return VGPU10_OPCODE_IGE;
605    case TGSI_OPCODE_ISLT:
606       return VGPU10_OPCODE_ILT;
607    case TGSI_OPCODE_F2U:
608       return VGPU10_OPCODE_FTOU;
609    case TGSI_OPCODE_UADD:
610       return VGPU10_OPCODE_IADD;
611    case TGSI_OPCODE_U2F:
612       return VGPU10_OPCODE_UTOF;
613    case TGSI_OPCODE_UCMP:
614       return VGPU10_OPCODE_MOVC;
615    case TGSI_OPCODE_UMAD:
616       return VGPU10_OPCODE_UMAD;
617    case TGSI_OPCODE_UMAX:
618       return VGPU10_OPCODE_UMAX;
619    case TGSI_OPCODE_UMIN:
620       return VGPU10_OPCODE_UMIN;
621    case TGSI_OPCODE_UMUL:
622    case TGSI_OPCODE_UMUL_HI:
623       return VGPU10_OPCODE_UMUL;
624    case TGSI_OPCODE_USEQ:
625       return VGPU10_OPCODE_IEQ;
626    case TGSI_OPCODE_USGE:
627       return VGPU10_OPCODE_UGE;
628    case TGSI_OPCODE_USHR:
629       return VGPU10_OPCODE_USHR;
630    case TGSI_OPCODE_USLT:
631       return VGPU10_OPCODE_ULT;
632    case TGSI_OPCODE_USNE:
633       return VGPU10_OPCODE_INE;
634    case TGSI_OPCODE_SWITCH:
635       return VGPU10_OPCODE_SWITCH;
636    case TGSI_OPCODE_CASE:
637       return VGPU10_OPCODE_CASE;
638    case TGSI_OPCODE_DEFAULT:
639       return VGPU10_OPCODE_DEFAULT;
640    case TGSI_OPCODE_ENDSWITCH:
641       return VGPU10_OPCODE_ENDSWITCH;
642    case TGSI_OPCODE_FSLT:
643       return VGPU10_OPCODE_LT;
644    case TGSI_OPCODE_ROUND:
645       return VGPU10_OPCODE_ROUND_NE;
646    default:
647       assert(!"Unexpected TGSI opcode in translate_opcode()");
648       return VGPU10_OPCODE_NOP;
649    }
650 }
651 
652 
653 /**
654  * Translate a TGSI register file type into a VGPU10 operand type.
655  * \param array  is the TGSI_FILE_TEMPORARY register an array?
656  */
657 static VGPU10_OPERAND_TYPE
translate_register_file(enum tgsi_file_type file,boolean array)658 translate_register_file(enum tgsi_file_type file, boolean array)
659 {
660    switch (file) {
661    case TGSI_FILE_CONSTANT:
662       return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
663    case TGSI_FILE_INPUT:
664       return VGPU10_OPERAND_TYPE_INPUT;
665    case TGSI_FILE_OUTPUT:
666       return VGPU10_OPERAND_TYPE_OUTPUT;
667    case TGSI_FILE_TEMPORARY:
668       return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP
669                    : VGPU10_OPERAND_TYPE_TEMP;
670    case TGSI_FILE_IMMEDIATE:
671       /* all immediates are 32-bit values at this time so
672        * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time.
673        */
674       return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER;
675    case TGSI_FILE_SAMPLER:
676       return VGPU10_OPERAND_TYPE_SAMPLER;
677    case TGSI_FILE_SYSTEM_VALUE:
678       return VGPU10_OPERAND_TYPE_INPUT;
679 
680    /* XXX TODO more cases to finish */
681 
682    default:
683       assert(!"Bad tgsi register file!");
684       return VGPU10_OPERAND_TYPE_NULL;
685    }
686 }
687 
688 
689 /**
690  * Emit a null dst register
691  */
692 static void
emit_null_dst_register(struct svga_shader_emitter_v10 * emit)693 emit_null_dst_register(struct svga_shader_emitter_v10 *emit)
694 {
695    VGPU10OperandToken0 operand;
696 
697    operand.value = 0;
698    operand.operandType = VGPU10_OPERAND_TYPE_NULL;
699    operand.numComponents = VGPU10_OPERAND_0_COMPONENT;
700 
701    emit_dword(emit, operand.value);
702 }
703 
704 
705 /**
706  * If the given register is a temporary, return the array ID.
707  * Else return zero.
708  */
709 static unsigned
get_temp_array_id(const struct svga_shader_emitter_v10 * emit,enum tgsi_file_type file,unsigned index)710 get_temp_array_id(const struct svga_shader_emitter_v10 *emit,
711                   enum tgsi_file_type file, unsigned index)
712 {
713    if (file == TGSI_FILE_TEMPORARY) {
714       return emit->temp_map[index].arrayId;
715    }
716    else {
717       return 0;
718    }
719 }
720 
721 
722 /**
723  * If the given register is a temporary, convert the index from a TGSI
724  * TEMPORARY index to a VGPU10 temp index.
725  */
726 static unsigned
remap_temp_index(const struct svga_shader_emitter_v10 * emit,enum tgsi_file_type file,unsigned index)727 remap_temp_index(const struct svga_shader_emitter_v10 *emit,
728                  enum tgsi_file_type file, unsigned index)
729 {
730    if (file == TGSI_FILE_TEMPORARY) {
731       return emit->temp_map[index].index;
732    }
733    else {
734       return index;
735    }
736 }
737 
738 
739 /**
740  * Setup the operand0 fields related to indexing (1D, 2D, relative, etc).
741  * Note: the operandType field must already be initialized.
742  */
743 static VGPU10OperandToken0
setup_operand0_indexing(struct svga_shader_emitter_v10 * emit,VGPU10OperandToken0 operand0,enum tgsi_file_type file,boolean indirect,boolean index2D,unsigned tempArrayID)744 setup_operand0_indexing(struct svga_shader_emitter_v10 *emit,
745                         VGPU10OperandToken0 operand0,
746                         enum tgsi_file_type file,
747                         boolean indirect, boolean index2D,
748                         unsigned tempArrayID)
749 {
750    unsigned indexDim, index0Rep, index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
751 
752    /*
753     * Compute index dimensions
754     */
755    if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 ||
756        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
757       /* there's no swizzle for in-line immediates */
758       indexDim = VGPU10_OPERAND_INDEX_0D;
759       assert(operand0.selectionMode == 0);
760    }
761    else {
762       if (index2D ||
763           tempArrayID > 0 ||
764           operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) {
765          indexDim = VGPU10_OPERAND_INDEX_2D;
766       }
767       else {
768          indexDim = VGPU10_OPERAND_INDEX_1D;
769       }
770    }
771 
772    /*
773     * Compute index representations (immediate, relative, etc).
774     */
775    if (tempArrayID > 0) {
776       assert(file == TGSI_FILE_TEMPORARY);
777       /* First index is the array ID, second index is the array element */
778       index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
779       if (indirect) {
780          index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
781       }
782       else {
783          index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
784       }
785    }
786    else if (indirect) {
787       if (file == TGSI_FILE_CONSTANT) {
788          /* index[0] indicates which constant buffer while index[1] indicates
789           * the position in the constant buffer.
790           */
791          index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
792          index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
793       }
794       else {
795          /* All other register files are 1-dimensional */
796          index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
797       }
798    }
799    else {
800       index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
801       index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
802    }
803 
804    operand0.indexDimension = indexDim;
805    operand0.index0Representation = index0Rep;
806    operand0.index1Representation = index1Rep;
807 
808    return operand0;
809 }
810 
811 
812 /**
813  * Emit the operand for expressing an address register for indirect indexing.
814  * Note that the address register is really just a temp register.
815  * \param addr_reg_index  which address register to use
816  */
817 static void
emit_indirect_register(struct svga_shader_emitter_v10 * emit,unsigned addr_reg_index)818 emit_indirect_register(struct svga_shader_emitter_v10 *emit,
819                        unsigned addr_reg_index)
820 {
821    unsigned tmp_reg_index;
822    VGPU10OperandToken0 operand0;
823 
824    assert(addr_reg_index < MAX_VGPU10_ADDR_REGS);
825 
826    tmp_reg_index = emit->address_reg_index[addr_reg_index];
827 
828    /* operand0 is a simple temporary register, selecting one component */
829    operand0.value = 0;
830    operand0.operandType = VGPU10_OPERAND_TYPE_TEMP;
831    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
832    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
833    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
834    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
835    operand0.swizzleX = 0;
836    operand0.swizzleY = 1;
837    operand0.swizzleZ = 2;
838    operand0.swizzleW = 3;
839 
840    emit_dword(emit, operand0.value);
841    emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index));
842 }
843 
844 
845 /**
846  * Translate the dst register of a TGSI instruction and emit VGPU10 tokens.
847  * \param emit  the emitter context
848  * \param reg  the TGSI dst register to translate
849  */
850 static void
emit_dst_register(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * reg)851 emit_dst_register(struct svga_shader_emitter_v10 *emit,
852                   const struct tgsi_full_dst_register *reg)
853 {
854    enum tgsi_file_type file = reg->Register.File;
855    unsigned index = reg->Register.Index;
856    const enum tgsi_semantic sem_name = emit->info.output_semantic_name[index];
857    const unsigned sem_index = emit->info.output_semantic_index[index];
858    unsigned writemask = reg->Register.WriteMask;
859    const unsigned indirect = reg->Register.Indirect;
860    const unsigned tempArrayId = get_temp_array_id(emit, file, index);
861    const unsigned index2d = reg->Register.Dimension;
862    VGPU10OperandToken0 operand0;
863 
864    if (file == TGSI_FILE_OUTPUT) {
865       if (emit->unit == PIPE_SHADER_VERTEX ||
866           emit->unit == PIPE_SHADER_GEOMETRY) {
867          if (index == emit->vposition.out_index &&
868              emit->vposition.tmp_index != INVALID_INDEX) {
869             /* replace OUTPUT[POS] with TEMP[POS].  We need to store the
870              * vertex position result in a temporary so that we can modify
871              * it in the post_helper() code.
872              */
873             file = TGSI_FILE_TEMPORARY;
874             index = emit->vposition.tmp_index;
875          }
876          else if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
877                   emit->clip_dist_tmp_index != INVALID_INDEX) {
878             /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
879              * We store the clip distance in a temporary first, then
880              * we'll copy it to the shadow copy and to CLIPDIST with the
881              * enabled planes mask in emit_clip_distance_instructions().
882              */
883             file = TGSI_FILE_TEMPORARY;
884             index = emit->clip_dist_tmp_index + sem_index;
885          }
886          else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
887                   emit->clip_vertex_tmp_index != INVALID_INDEX) {
888             /* replace the CLIPVERTEX output register with a temporary */
889             assert(emit->clip_mode == CLIP_VERTEX);
890             assert(sem_index == 0);
891             file = TGSI_FILE_TEMPORARY;
892             index = emit->clip_vertex_tmp_index;
893          }
894       }
895       else if (emit->unit == PIPE_SHADER_FRAGMENT) {
896          if (sem_name == TGSI_SEMANTIC_POSITION) {
897             /* Fragment depth output register */
898             operand0.value = 0;
899             operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
900             operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
901             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
902             emit_dword(emit, operand0.value);
903             return;
904          }
905          else if (index == emit->fs.color_out_index[0] &&
906              emit->fs.color_tmp_index != INVALID_INDEX) {
907             /* replace OUTPUT[COLOR] with TEMP[COLOR].  We need to store the
908              * fragment color result in a temporary so that we can read it
909              * it in the post_helper() code.
910              */
911             file = TGSI_FILE_TEMPORARY;
912             index = emit->fs.color_tmp_index;
913          }
914          else {
915             /* Typically, for fragment shaders, the output register index
916              * matches the color semantic index.  But not when we write to
917              * the fragment depth register.  In that case, OUT[0] will be
918              * fragdepth and OUT[1] will be the 0th color output.  We need
919              * to use the semantic index for color outputs.
920              */
921             assert(sem_name == TGSI_SEMANTIC_COLOR);
922             index = emit->info.output_semantic_index[index];
923 
924             emit->num_output_writes++;
925          }
926       }
927    }
928 
929    /* init operand tokens to all zero */
930    operand0.value = 0;
931 
932    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
933 
934    /* the operand has a writemask */
935    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
936 
937    /* Which of the four dest components to write to. Note that we can use a
938     * simple assignment here since TGSI writemasks match VGPU10 writemasks.
939     */
940    STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X);
941    operand0.mask = writemask;
942 
943    /* translate TGSI register file type to VGPU10 operand type */
944    operand0.operandType = translate_register_file(file, tempArrayId > 0);
945 
946    check_register_index(emit, operand0.operandType, index);
947 
948    operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
949                                       index2d, tempArrayId);
950 
951    /* Emit tokens */
952    emit_dword(emit, operand0.value);
953    if (tempArrayId > 0) {
954       emit_dword(emit, tempArrayId);
955    }
956 
957    emit_dword(emit, remap_temp_index(emit, file, index));
958 
959    if (indirect) {
960       emit_indirect_register(emit, reg->Indirect.Index);
961    }
962 }
963 
964 
965 /**
966  * Translate a src register of a TGSI instruction and emit VGPU10 tokens.
967  */
968 static void
emit_src_register(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * reg)969 emit_src_register(struct svga_shader_emitter_v10 *emit,
970                   const struct tgsi_full_src_register *reg)
971 {
972    enum tgsi_file_type file = reg->Register.File;
973    unsigned index = reg->Register.Index;
974    const unsigned indirect = reg->Register.Indirect;
975    const unsigned tempArrayId = get_temp_array_id(emit, file, index);
976    const unsigned index2d = reg->Register.Dimension;
977    const unsigned swizzleX = reg->Register.SwizzleX;
978    const unsigned swizzleY = reg->Register.SwizzleY;
979    const unsigned swizzleZ = reg->Register.SwizzleZ;
980    const unsigned swizzleW = reg->Register.SwizzleW;
981    const unsigned absolute = reg->Register.Absolute;
982    const unsigned negate = reg->Register.Negate;
983    bool is_prim_id = FALSE;
984 
985    VGPU10OperandToken0 operand0;
986    VGPU10OperandToken1 operand1;
987 
988    if (emit->unit == PIPE_SHADER_FRAGMENT &&
989       file == TGSI_FILE_INPUT) {
990       if (index == emit->fs.face_input_index) {
991          /* Replace INPUT[FACE] with TEMP[FACE] */
992          file = TGSI_FILE_TEMPORARY;
993          index = emit->fs.face_tmp_index;
994       }
995       else if (index == emit->fs.fragcoord_input_index) {
996          /* Replace INPUT[POSITION] with TEMP[POSITION] */
997          file = TGSI_FILE_TEMPORARY;
998          index = emit->fs.fragcoord_tmp_index;
999       }
1000       else {
1001          /* We remap fragment shader inputs to that FS input indexes
1002           * match up with VS/GS output indexes.
1003           */
1004          index = emit->linkage.input_map[index];
1005       }
1006    }
1007    else if (emit->unit == PIPE_SHADER_GEOMETRY &&
1008             file == TGSI_FILE_INPUT) {
1009       is_prim_id = (index == emit->gs.prim_id_index);
1010       index = emit->linkage.input_map[index];
1011    }
1012    else if (emit->unit == PIPE_SHADER_VERTEX) {
1013       if (file == TGSI_FILE_INPUT) {
1014          /* if input is adjusted... */
1015          if ((emit->key.vs.adjust_attrib_w_1 |
1016               emit->key.vs.adjust_attrib_itof |
1017               emit->key.vs.adjust_attrib_utof |
1018               emit->key.vs.attrib_is_bgra |
1019               emit->key.vs.attrib_puint_to_snorm |
1020               emit->key.vs.attrib_puint_to_uscaled |
1021               emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) {
1022             file = TGSI_FILE_TEMPORARY;
1023             index = emit->vs.adjusted_input[index];
1024          }
1025       }
1026       else if (file == TGSI_FILE_SYSTEM_VALUE) {
1027          assert(index < ARRAY_SIZE(emit->system_value_indexes));
1028          index = emit->system_value_indexes[index];
1029       }
1030    }
1031 
1032    operand0.value = operand1.value = 0;
1033 
1034    if (is_prim_id) {
1035       /* NOTE: we should be using VGPU10_OPERAND_1_COMPONENT here, but
1036        * our virtual GPU accepts this as-is.
1037        */
1038       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
1039       operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1040    }
1041    else {
1042       operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1043       operand0.operandType = translate_register_file(file, tempArrayId > 0);
1044    }
1045 
1046    operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1047                                       index2d, tempArrayId);
1048 
1049    if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 &&
1050        operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
1051       /* there's no swizzle for in-line immediates */
1052       if (swizzleX == swizzleY &&
1053           swizzleX == swizzleZ &&
1054           swizzleX == swizzleW) {
1055          operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1056       }
1057       else {
1058          operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1059       }
1060 
1061       operand0.swizzleX = swizzleX;
1062       operand0.swizzleY = swizzleY;
1063       operand0.swizzleZ = swizzleZ;
1064       operand0.swizzleW = swizzleW;
1065 
1066       if (absolute || negate) {
1067          operand0.extended = 1;
1068          operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER;
1069          if (absolute && !negate)
1070             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS;
1071          if (!absolute && negate)
1072             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG;
1073          if (absolute && negate)
1074             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG;
1075       }
1076    }
1077 
1078    /* Emit the operand tokens */
1079    emit_dword(emit, operand0.value);
1080    if (operand0.extended)
1081       emit_dword(emit, operand1.value);
1082 
1083    if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) {
1084       /* Emit the four float/int in-line immediate values */
1085       unsigned *c;
1086       assert(index < ARRAY_SIZE(emit->immediates));
1087       assert(file == TGSI_FILE_IMMEDIATE);
1088       assert(swizzleX < 4);
1089       assert(swizzleY < 4);
1090       assert(swizzleZ < 4);
1091       assert(swizzleW < 4);
1092       c = (unsigned *) emit->immediates[index];
1093       emit_dword(emit, c[swizzleX]);
1094       emit_dword(emit, c[swizzleY]);
1095       emit_dword(emit, c[swizzleZ]);
1096       emit_dword(emit, c[swizzleW]);
1097    }
1098    else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) {
1099       /* Emit the register index(es) */
1100       if (index2d ||
1101           operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) {
1102          emit_dword(emit, reg->Dimension.Index);
1103       }
1104 
1105       if (tempArrayId > 0) {
1106          emit_dword(emit, tempArrayId);
1107       }
1108 
1109       emit_dword(emit, remap_temp_index(emit, file, index));
1110 
1111       if (indirect) {
1112          emit_indirect_register(emit, reg->Indirect.Index);
1113       }
1114    }
1115 }
1116 
1117 
1118 /**
1119  * Emit a resource operand (for use with a SAMPLE instruction).
1120  */
1121 static void
emit_resource_register(struct svga_shader_emitter_v10 * emit,unsigned resource_number)1122 emit_resource_register(struct svga_shader_emitter_v10 *emit,
1123                        unsigned resource_number)
1124 {
1125    VGPU10OperandToken0 operand0;
1126 
1127    check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number);
1128 
1129    /* init */
1130    operand0.value = 0;
1131 
1132    operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
1133    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1134    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1135    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1136    operand0.swizzleX = VGPU10_COMPONENT_X;
1137    operand0.swizzleY = VGPU10_COMPONENT_Y;
1138    operand0.swizzleZ = VGPU10_COMPONENT_Z;
1139    operand0.swizzleW = VGPU10_COMPONENT_W;
1140 
1141    emit_dword(emit, operand0.value);
1142    emit_dword(emit, resource_number);
1143 }
1144 
1145 
1146 /**
1147  * Emit a sampler operand (for use with a SAMPLE instruction).
1148  */
1149 static void
emit_sampler_register(struct svga_shader_emitter_v10 * emit,unsigned sampler_number)1150 emit_sampler_register(struct svga_shader_emitter_v10 *emit,
1151                       unsigned sampler_number)
1152 {
1153    VGPU10OperandToken0 operand0;
1154 
1155    check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number);
1156 
1157    /* init */
1158    operand0.value = 0;
1159 
1160    operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
1161    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1162 
1163    emit_dword(emit, operand0.value);
1164    emit_dword(emit, sampler_number);
1165 }
1166 
1167 
1168 /**
1169  * Emit an operand which reads the IS_FRONT_FACING register.
1170  */
1171 static void
emit_face_register(struct svga_shader_emitter_v10 * emit)1172 emit_face_register(struct svga_shader_emitter_v10 *emit)
1173 {
1174    VGPU10OperandToken0 operand0;
1175    unsigned index = emit->linkage.input_map[emit->fs.face_input_index];
1176 
1177    /* init */
1178    operand0.value = 0;
1179 
1180    operand0.operandType = VGPU10_OPERAND_TYPE_INPUT;
1181    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1182    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1183    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1184 
1185    operand0.swizzleX = VGPU10_COMPONENT_X;
1186    operand0.swizzleY = VGPU10_COMPONENT_X;
1187    operand0.swizzleZ = VGPU10_COMPONENT_X;
1188    operand0.swizzleW = VGPU10_COMPONENT_X;
1189 
1190    emit_dword(emit, operand0.value);
1191    emit_dword(emit, index);
1192 }
1193 
1194 
1195 /**
1196  * Emit the token for a VGPU10 opcode.
1197  * \param saturate   clamp result to [0,1]?
1198  */
1199 static void
emit_opcode(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,boolean saturate)1200 emit_opcode(struct svga_shader_emitter_v10 *emit,
1201             unsigned vgpu10_opcode, boolean saturate)
1202 {
1203    VGPU10OpcodeToken0 token0;
1204 
1205    token0.value = 0;  /* init all fields to zero */
1206    token0.opcodeType = vgpu10_opcode;
1207    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
1208    token0.saturate = saturate;
1209 
1210    emit_dword(emit, token0.value);
1211 }
1212 
1213 
1214 /**
1215  * Emit the token for a VGPU10 resinfo instruction.
1216  * \param modifier   return type modifier, _uint or _rcpFloat.
1217  *                   TODO: We may want to remove this parameter if it will
1218  *                   only ever be used as _uint.
1219  */
1220 static void
emit_opcode_resinfo(struct svga_shader_emitter_v10 * emit,VGPU10_RESINFO_RETURN_TYPE modifier)1221 emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit,
1222                     VGPU10_RESINFO_RETURN_TYPE modifier)
1223 {
1224    VGPU10OpcodeToken0 token0;
1225 
1226    token0.value = 0;  /* init all fields to zero */
1227    token0.opcodeType = VGPU10_OPCODE_RESINFO;
1228    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
1229    token0.resinfoReturnType = modifier;
1230 
1231    emit_dword(emit, token0.value);
1232 }
1233 
1234 
1235 /**
1236  * Emit opcode tokens for a texture sample instruction.  Texture instructions
1237  * can be rather complicated (texel offsets, etc) so we have this specialized
1238  * function.
1239  */
1240 static void
emit_sample_opcode(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,boolean saturate,const int offsets[3])1241 emit_sample_opcode(struct svga_shader_emitter_v10 *emit,
1242                    unsigned vgpu10_opcode, boolean saturate,
1243                    const int offsets[3])
1244 {
1245    VGPU10OpcodeToken0 token0;
1246    VGPU10OpcodeToken1 token1;
1247 
1248    token0.value = 0;  /* init all fields to zero */
1249    token0.opcodeType = vgpu10_opcode;
1250    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
1251    token0.saturate = saturate;
1252 
1253    if (offsets[0] || offsets[1] || offsets[2]) {
1254       assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
1255       assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
1256       assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
1257       assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
1258       assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
1259       assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
1260 
1261       token0.extended = 1;
1262       token1.value = 0;
1263       token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS;
1264       token1.offsetU = offsets[0];
1265       token1.offsetV = offsets[1];
1266       token1.offsetW = offsets[2];
1267    }
1268 
1269    emit_dword(emit, token0.value);
1270    if (token0.extended) {
1271       emit_dword(emit, token1.value);
1272    }
1273 }
1274 
1275 
1276 /**
1277  * Emit a DISCARD opcode token.
1278  * If nonzero is set, we'll discard the fragment if the X component is not 0.
1279  * Otherwise, we'll discard the fragment if the X component is 0.
1280  */
1281 static void
emit_discard_opcode(struct svga_shader_emitter_v10 * emit,boolean nonzero)1282 emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero)
1283 {
1284    VGPU10OpcodeToken0 opcode0;
1285 
1286    opcode0.value = 0;
1287    opcode0.opcodeType = VGPU10_OPCODE_DISCARD;
1288    if (nonzero)
1289       opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
1290 
1291    emit_dword(emit, opcode0.value);
1292 }
1293 
1294 
1295 /**
1296  * We need to call this before we begin emitting a VGPU10 instruction.
1297  */
1298 static void
begin_emit_instruction(struct svga_shader_emitter_v10 * emit)1299 begin_emit_instruction(struct svga_shader_emitter_v10 *emit)
1300 {
1301    assert(emit->inst_start_token == 0);
1302    /* Save location of the instruction's VGPU10OpcodeToken0 token.
1303     * Note, we can't save a pointer because it would become invalid if
1304     * we have to realloc the output buffer.
1305     */
1306    emit->inst_start_token = emit_get_num_tokens(emit);
1307 }
1308 
1309 
1310 /**
1311  * We need to call this after we emit the last token of a VGPU10 instruction.
1312  * This function patches in the opcode token's instructionLength field.
1313  */
1314 static void
end_emit_instruction(struct svga_shader_emitter_v10 * emit)1315 end_emit_instruction(struct svga_shader_emitter_v10 *emit)
1316 {
1317    VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
1318    unsigned inst_length;
1319 
1320    assert(emit->inst_start_token > 0);
1321 
1322    if (emit->discard_instruction) {
1323       /* Back up the emit->ptr to where this instruction started so
1324        * that we discard the current instruction.
1325        */
1326       emit->ptr = (char *) (tokens + emit->inst_start_token);
1327    }
1328    else {
1329       /* Compute instruction length and patch that into the start of
1330        * the instruction.
1331        */
1332       inst_length = emit_get_num_tokens(emit) - emit->inst_start_token;
1333 
1334       assert(inst_length > 0);
1335 
1336       tokens[emit->inst_start_token].instructionLength = inst_length;
1337    }
1338 
1339    emit->inst_start_token = 0; /* reset to zero for error checking */
1340    emit->discard_instruction = FALSE;
1341 }
1342 
1343 
1344 /**
1345  * Return index for a free temporary register.
1346  */
1347 static unsigned
get_temp_index(struct svga_shader_emitter_v10 * emit)1348 get_temp_index(struct svga_shader_emitter_v10 *emit)
1349 {
1350    assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS);
1351    return emit->num_shader_temps + emit->internal_temp_count++;
1352 }
1353 
1354 
1355 /**
1356  * Release the temporaries which were generated by get_temp_index().
1357  */
1358 static void
free_temp_indexes(struct svga_shader_emitter_v10 * emit)1359 free_temp_indexes(struct svga_shader_emitter_v10 *emit)
1360 {
1361    emit->internal_temp_count = 0;
1362 }
1363 
1364 
1365 /**
1366  * Create a tgsi_full_src_register.
1367  */
1368 static struct tgsi_full_src_register
make_src_reg(enum tgsi_file_type file,unsigned index)1369 make_src_reg(enum tgsi_file_type file, unsigned index)
1370 {
1371    struct tgsi_full_src_register reg;
1372 
1373    memset(&reg, 0, sizeof(reg));
1374    reg.Register.File = file;
1375    reg.Register.Index = index;
1376    reg.Register.SwizzleX = TGSI_SWIZZLE_X;
1377    reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
1378    reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1379    reg.Register.SwizzleW = TGSI_SWIZZLE_W;
1380    return reg;
1381 }
1382 
1383 
1384 /**
1385  * Create a tgsi_full_src_register for a temporary.
1386  */
1387 static struct tgsi_full_src_register
make_src_temp_reg(unsigned index)1388 make_src_temp_reg(unsigned index)
1389 {
1390    return make_src_reg(TGSI_FILE_TEMPORARY, index);
1391 }
1392 
1393 
1394 /**
1395  * Create a tgsi_full_src_register for a constant.
1396  */
1397 static struct tgsi_full_src_register
make_src_const_reg(unsigned index)1398 make_src_const_reg(unsigned index)
1399 {
1400    return make_src_reg(TGSI_FILE_CONSTANT, index);
1401 }
1402 
1403 
1404 /**
1405  * Create a tgsi_full_src_register for an immediate constant.
1406  */
1407 static struct tgsi_full_src_register
make_src_immediate_reg(unsigned index)1408 make_src_immediate_reg(unsigned index)
1409 {
1410    return make_src_reg(TGSI_FILE_IMMEDIATE, index);
1411 }
1412 
1413 
1414 /**
1415  * Create a tgsi_full_dst_register.
1416  */
1417 static struct tgsi_full_dst_register
make_dst_reg(enum tgsi_file_type file,unsigned index)1418 make_dst_reg(enum tgsi_file_type file, unsigned index)
1419 {
1420    struct tgsi_full_dst_register reg;
1421 
1422    memset(&reg, 0, sizeof(reg));
1423    reg.Register.File = file;
1424    reg.Register.Index = index;
1425    reg.Register.WriteMask = TGSI_WRITEMASK_XYZW;
1426    return reg;
1427 }
1428 
1429 
1430 /**
1431  * Create a tgsi_full_dst_register for a temporary.
1432  */
1433 static struct tgsi_full_dst_register
make_dst_temp_reg(unsigned index)1434 make_dst_temp_reg(unsigned index)
1435 {
1436    return make_dst_reg(TGSI_FILE_TEMPORARY, index);
1437 }
1438 
1439 
1440 /**
1441  * Create a tgsi_full_dst_register for an output.
1442  */
1443 static struct tgsi_full_dst_register
make_dst_output_reg(unsigned index)1444 make_dst_output_reg(unsigned index)
1445 {
1446    return make_dst_reg(TGSI_FILE_OUTPUT, index);
1447 }
1448 
1449 
1450 /**
1451  * Create negated tgsi_full_src_register.
1452  */
1453 static struct tgsi_full_src_register
negate_src(const struct tgsi_full_src_register * reg)1454 negate_src(const struct tgsi_full_src_register *reg)
1455 {
1456    struct tgsi_full_src_register neg = *reg;
1457    neg.Register.Negate = !reg->Register.Negate;
1458    return neg;
1459 }
1460 
1461 /**
1462  * Create absolute value of a tgsi_full_src_register.
1463  */
1464 static struct tgsi_full_src_register
absolute_src(const struct tgsi_full_src_register * reg)1465 absolute_src(const struct tgsi_full_src_register *reg)
1466 {
1467    struct tgsi_full_src_register absolute = *reg;
1468    absolute.Register.Absolute = 1;
1469    return absolute;
1470 }
1471 
1472 
1473 /** Return the named swizzle term from the src register */
1474 static inline unsigned
get_swizzle(const struct tgsi_full_src_register * reg,enum tgsi_swizzle term)1475 get_swizzle(const struct tgsi_full_src_register *reg, enum tgsi_swizzle term)
1476 {
1477    switch (term) {
1478    case TGSI_SWIZZLE_X:
1479       return reg->Register.SwizzleX;
1480    case TGSI_SWIZZLE_Y:
1481       return reg->Register.SwizzleY;
1482    case TGSI_SWIZZLE_Z:
1483       return reg->Register.SwizzleZ;
1484    case TGSI_SWIZZLE_W:
1485       return reg->Register.SwizzleW;
1486    default:
1487       assert(!"Bad swizzle");
1488       return TGSI_SWIZZLE_X;
1489    }
1490 }
1491 
1492 
1493 /**
1494  * Create swizzled tgsi_full_src_register.
1495  */
1496 static struct tgsi_full_src_register
swizzle_src(const struct tgsi_full_src_register * reg,enum tgsi_swizzle swizzleX,enum tgsi_swizzle swizzleY,enum tgsi_swizzle swizzleZ,enum tgsi_swizzle swizzleW)1497 swizzle_src(const struct tgsi_full_src_register *reg,
1498             enum tgsi_swizzle swizzleX, enum tgsi_swizzle swizzleY,
1499             enum tgsi_swizzle swizzleZ, enum tgsi_swizzle swizzleW)
1500 {
1501    struct tgsi_full_src_register swizzled = *reg;
1502    /* Note: we swizzle the current swizzle */
1503    swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX);
1504    swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY);
1505    swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ);
1506    swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW);
1507    return swizzled;
1508 }
1509 
1510 
1511 /**
1512  * Create swizzled tgsi_full_src_register where all the swizzle
1513  * terms are the same.
1514  */
1515 static struct tgsi_full_src_register
scalar_src(const struct tgsi_full_src_register * reg,enum tgsi_swizzle swizzle)1516 scalar_src(const struct tgsi_full_src_register *reg, enum tgsi_swizzle swizzle)
1517 {
1518    struct tgsi_full_src_register swizzled = *reg;
1519    /* Note: we swizzle the current swizzle */
1520    swizzled.Register.SwizzleX =
1521    swizzled.Register.SwizzleY =
1522    swizzled.Register.SwizzleZ =
1523    swizzled.Register.SwizzleW = get_swizzle(reg, swizzle);
1524    return swizzled;
1525 }
1526 
1527 
1528 /**
1529  * Create new tgsi_full_dst_register with writemask.
1530  * \param mask  bitmask of TGSI_WRITEMASK_[XYZW]
1531  */
1532 static struct tgsi_full_dst_register
writemask_dst(const struct tgsi_full_dst_register * reg,unsigned mask)1533 writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask)
1534 {
1535    struct tgsi_full_dst_register masked = *reg;
1536    masked.Register.WriteMask = mask;
1537    return masked;
1538 }
1539 
1540 
1541 /**
1542  * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW.
1543  */
1544 static boolean
same_swizzle_terms(const struct tgsi_full_src_register * reg)1545 same_swizzle_terms(const struct tgsi_full_src_register *reg)
1546 {
1547    return (reg->Register.SwizzleX == reg->Register.SwizzleY &&
1548            reg->Register.SwizzleY == reg->Register.SwizzleZ &&
1549            reg->Register.SwizzleZ == reg->Register.SwizzleW);
1550 }
1551 
1552 
1553 /**
1554  * Search the vector for the value 'x' and return its position.
1555  */
1556 static int
find_imm_in_vec4(const union tgsi_immediate_data vec[4],union tgsi_immediate_data x)1557 find_imm_in_vec4(const union tgsi_immediate_data vec[4],
1558                  union tgsi_immediate_data x)
1559 {
1560    unsigned i;
1561    for (i = 0; i < 4; i++) {
1562       if (vec[i].Int == x.Int)
1563          return i;
1564    }
1565    return -1;
1566 }
1567 
1568 
1569 /**
1570  * Helper used by make_immediate_reg(), make_immediate_reg_4().
1571  */
1572 static int
find_immediate(struct svga_shader_emitter_v10 * emit,union tgsi_immediate_data x,unsigned startIndex)1573 find_immediate(struct svga_shader_emitter_v10 *emit,
1574                union tgsi_immediate_data x, unsigned startIndex)
1575 {
1576    const unsigned endIndex = emit->num_immediates;
1577    unsigned i;
1578 
1579    assert(emit->immediates_emitted);
1580 
1581    /* Search immediates for x, y, z, w */
1582    for (i = startIndex; i < endIndex; i++) {
1583       if (x.Int == emit->immediates[i][0].Int ||
1584           x.Int == emit->immediates[i][1].Int ||
1585           x.Int == emit->immediates[i][2].Int ||
1586           x.Int == emit->immediates[i][3].Int) {
1587          return i;
1588       }
1589    }
1590    /* Should never try to use an immediate value that wasn't pre-declared */
1591    assert(!"find_immediate() failed!");
1592    return -1;
1593 }
1594 
1595 
1596 /**
1597  * Return a tgsi_full_src_register for an immediate/literal
1598  * union tgsi_immediate_data[4] value.
1599  * Note: the values must have been previously declared/allocated in
1600  * emit_pre_helpers().  And, all of x,y,z,w must be located in the same
1601  * vec4 immediate.
1602  */
1603 static struct tgsi_full_src_register
make_immediate_reg_4(struct svga_shader_emitter_v10 * emit,const union tgsi_immediate_data imm[4])1604 make_immediate_reg_4(struct svga_shader_emitter_v10 *emit,
1605                      const union tgsi_immediate_data imm[4])
1606 {
1607    struct tgsi_full_src_register reg;
1608    unsigned i;
1609 
1610    for (i = 0; i < emit->num_common_immediates; i++) {
1611       /* search for first component value */
1612       int immpos = find_immediate(emit, imm[0], i);
1613       int x, y, z, w;
1614 
1615       assert(immpos >= 0);
1616 
1617       /* find remaining components within the immediate vector */
1618       x = find_imm_in_vec4(emit->immediates[immpos], imm[0]);
1619       y = find_imm_in_vec4(emit->immediates[immpos], imm[1]);
1620       z = find_imm_in_vec4(emit->immediates[immpos], imm[2]);
1621       w = find_imm_in_vec4(emit->immediates[immpos], imm[3]);
1622 
1623       if (x >=0 &&  y >= 0 && z >= 0 && w >= 0) {
1624          /* found them all */
1625          memset(&reg, 0, sizeof(reg));
1626          reg.Register.File = TGSI_FILE_IMMEDIATE;
1627          reg.Register.Index = immpos;
1628          reg.Register.SwizzleX = x;
1629          reg.Register.SwizzleY = y;
1630          reg.Register.SwizzleZ = z;
1631          reg.Register.SwizzleW = w;
1632          return reg;
1633       }
1634       /* else, keep searching */
1635    }
1636 
1637    assert(!"Failed to find immediate register!");
1638 
1639    /* Just return IMM[0].xxxx */
1640    memset(&reg, 0, sizeof(reg));
1641    reg.Register.File = TGSI_FILE_IMMEDIATE;
1642    return reg;
1643 }
1644 
1645 
1646 /**
1647  * Return a tgsi_full_src_register for an immediate/literal
1648  * union tgsi_immediate_data value of the form {value, value, value, value}.
1649  * \sa make_immediate_reg_4() regarding allowed values.
1650  */
1651 static struct tgsi_full_src_register
make_immediate_reg(struct svga_shader_emitter_v10 * emit,union tgsi_immediate_data value)1652 make_immediate_reg(struct svga_shader_emitter_v10 *emit,
1653                    union tgsi_immediate_data value)
1654 {
1655    struct tgsi_full_src_register reg;
1656    int immpos = find_immediate(emit, value, 0);
1657 
1658    assert(immpos >= 0);
1659 
1660    memset(&reg, 0, sizeof(reg));
1661    reg.Register.File = TGSI_FILE_IMMEDIATE;
1662    reg.Register.Index = immpos;
1663    reg.Register.SwizzleX =
1664    reg.Register.SwizzleY =
1665    reg.Register.SwizzleZ =
1666    reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value);
1667 
1668    return reg;
1669 }
1670 
1671 
1672 /**
1673  * Return a tgsi_full_src_register for an immediate/literal float[4] value.
1674  * \sa make_immediate_reg_4() regarding allowed values.
1675  */
1676 static struct tgsi_full_src_register
make_immediate_reg_float4(struct svga_shader_emitter_v10 * emit,float x,float y,float z,float w)1677 make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit,
1678                           float x, float y, float z, float w)
1679 {
1680    union tgsi_immediate_data imm[4];
1681    imm[0].Float = x;
1682    imm[1].Float = y;
1683    imm[2].Float = z;
1684    imm[3].Float = w;
1685    return make_immediate_reg_4(emit, imm);
1686 }
1687 
1688 
1689 /**
1690  * Return a tgsi_full_src_register for an immediate/literal float value
1691  * of the form {value, value, value, value}.
1692  * \sa make_immediate_reg_4() regarding allowed values.
1693  */
1694 static struct tgsi_full_src_register
make_immediate_reg_float(struct svga_shader_emitter_v10 * emit,float value)1695 make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value)
1696 {
1697    union tgsi_immediate_data imm;
1698    imm.Float = value;
1699    return make_immediate_reg(emit, imm);
1700 }
1701 
1702 
1703 /**
1704  * Return a tgsi_full_src_register for an immediate/literal int[4] vector.
1705  */
1706 static struct tgsi_full_src_register
make_immediate_reg_int4(struct svga_shader_emitter_v10 * emit,int x,int y,int z,int w)1707 make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit,
1708                         int x, int y, int z, int w)
1709 {
1710    union tgsi_immediate_data imm[4];
1711    imm[0].Int = x;
1712    imm[1].Int = y;
1713    imm[2].Int = z;
1714    imm[3].Int = w;
1715    return make_immediate_reg_4(emit, imm);
1716 }
1717 
1718 
1719 /**
1720  * Return a tgsi_full_src_register for an immediate/literal int value
1721  * of the form {value, value, value, value}.
1722  * \sa make_immediate_reg_4() regarding allowed values.
1723  */
1724 static struct tgsi_full_src_register
make_immediate_reg_int(struct svga_shader_emitter_v10 * emit,int value)1725 make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value)
1726 {
1727    union tgsi_immediate_data imm;
1728    imm.Int = value;
1729    return make_immediate_reg(emit, imm);
1730 }
1731 
1732 
1733 /**
1734  * Allocate space for a union tgsi_immediate_data[4] immediate.
1735  * \return  the index/position of the immediate.
1736  */
1737 static unsigned
alloc_immediate_4(struct svga_shader_emitter_v10 * emit,const union tgsi_immediate_data imm[4])1738 alloc_immediate_4(struct svga_shader_emitter_v10 *emit,
1739                   const union tgsi_immediate_data imm[4])
1740 {
1741    unsigned n = emit->num_immediates++;
1742    assert(!emit->immediates_emitted);
1743    assert(n < ARRAY_SIZE(emit->immediates));
1744    emit->immediates[n][0] = imm[0];
1745    emit->immediates[n][1] = imm[1];
1746    emit->immediates[n][2] = imm[2];
1747    emit->immediates[n][3] = imm[3];
1748    return n;
1749 }
1750 
1751 
1752 /**
1753  * Allocate space for a float[4] immediate.
1754  * \return  the index/position of the immediate.
1755  */
1756 static unsigned
alloc_immediate_float4(struct svga_shader_emitter_v10 * emit,float x,float y,float z,float w)1757 alloc_immediate_float4(struct svga_shader_emitter_v10 *emit,
1758                        float x, float y, float z, float w)
1759 {
1760    union tgsi_immediate_data imm[4];
1761    imm[0].Float = x;
1762    imm[1].Float = y;
1763    imm[2].Float = z;
1764    imm[3].Float = w;
1765    return alloc_immediate_4(emit, imm);
1766 }
1767 
1768 
1769 /**
1770  * Allocate space for an int[4] immediate.
1771  * \return  the index/position of the immediate.
1772  */
1773 static unsigned
alloc_immediate_int4(struct svga_shader_emitter_v10 * emit,int x,int y,int z,int w)1774 alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
1775                        int x, int y, int z, int w)
1776 {
1777    union tgsi_immediate_data imm[4];
1778    imm[0].Int = x;
1779    imm[1].Int = y;
1780    imm[2].Int = z;
1781    imm[3].Int = w;
1782    return alloc_immediate_4(emit, imm);
1783 }
1784 
1785 
1786 /**
1787  * Allocate a shader input to store a system value.
1788  */
1789 static unsigned
alloc_system_value_index(struct svga_shader_emitter_v10 * emit,unsigned index)1790 alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
1791 {
1792    const unsigned n = emit->info.file_max[TGSI_FILE_INPUT] + 1 + index;
1793    assert(index < ARRAY_SIZE(emit->system_value_indexes));
1794    emit->system_value_indexes[index] = n;
1795    return n;
1796 }
1797 
1798 
1799 /**
1800  * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10.
1801  */
1802 static boolean
emit_vgpu10_immediate(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_immediate * imm)1803 emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit,
1804                       const struct tgsi_full_immediate *imm)
1805 {
1806    /* We don't actually emit any code here.  We just save the
1807     * immediate values and emit them later.
1808     */
1809    alloc_immediate_4(emit, imm->u);
1810    return TRUE;
1811 }
1812 
1813 
1814 /**
1815  * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block
1816  * containing all the immediate values previously allocated
1817  * with alloc_immediate_4().
1818  */
1819 static boolean
emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 * emit)1820 emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit)
1821 {
1822    VGPU10OpcodeToken0 token;
1823 
1824    assert(!emit->immediates_emitted);
1825 
1826    token.value = 0;
1827    token.opcodeType = VGPU10_OPCODE_CUSTOMDATA;
1828    token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER;
1829 
1830    /* Note: no begin/end_emit_instruction() calls */
1831    emit_dword(emit, token.value);
1832    emit_dword(emit, 2 + 4 * emit->num_immediates);
1833    emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates);
1834 
1835    emit->immediates_emitted = TRUE;
1836 
1837    return TRUE;
1838 }
1839 
1840 
1841 /**
1842  * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10
1843  * interpolation mode.
1844  * \return a VGPU10_INTERPOLATION_x value
1845  */
1846 static unsigned
translate_interpolation(const struct svga_shader_emitter_v10 * emit,enum tgsi_interpolate_mode interp,enum tgsi_interpolate_loc interpolate_loc)1847 translate_interpolation(const struct svga_shader_emitter_v10 *emit,
1848                         enum tgsi_interpolate_mode interp,
1849                         enum tgsi_interpolate_loc interpolate_loc)
1850 {
1851    if (interp == TGSI_INTERPOLATE_COLOR) {
1852       interp = emit->key.fs.flatshade ?
1853          TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
1854    }
1855 
1856    switch (interp) {
1857    case TGSI_INTERPOLATE_CONSTANT:
1858       return VGPU10_INTERPOLATION_CONSTANT;
1859    case TGSI_INTERPOLATE_LINEAR:
1860       return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ?
1861              VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID :
1862              VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE;
1863    case TGSI_INTERPOLATE_PERSPECTIVE:
1864       return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ?
1865              VGPU10_INTERPOLATION_LINEAR_CENTROID :
1866              VGPU10_INTERPOLATION_LINEAR;
1867    default:
1868       assert(!"Unexpected interpolation mode");
1869       return VGPU10_INTERPOLATION_CONSTANT;
1870    }
1871 }
1872 
1873 
1874 /**
1875  * Translate a TGSI property to VGPU10.
1876  * Don't emit any instructions yet, only need to gather the primitive property
1877  * information.  The output primitive topology might be changed later. The
1878  * final property instructions will be emitted as part of the pre-helper code.
1879  */
1880 static boolean
emit_vgpu10_property(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_property * prop)1881 emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
1882                      const struct tgsi_full_property *prop)
1883 {
1884    static const VGPU10_PRIMITIVE primType[] = {
1885       VGPU10_PRIMITIVE_POINT,           /* PIPE_PRIM_POINTS */
1886       VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINES */
1887       VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINE_LOOP */
1888       VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINE_STRIP */
1889       VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLES */
1890       VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLE_STRIP */
1891       VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLE_FAN */
1892       VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_QUADS */
1893       VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_QUAD_STRIP */
1894       VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_POLYGON */
1895       VGPU10_PRIMITIVE_LINE_ADJ,        /* PIPE_PRIM_LINES_ADJACENCY */
1896       VGPU10_PRIMITIVE_LINE_ADJ,        /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
1897       VGPU10_PRIMITIVE_TRIANGLE_ADJ,    /* PIPE_PRIM_TRIANGLES_ADJACENCY */
1898       VGPU10_PRIMITIVE_TRIANGLE_ADJ     /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
1899    };
1900 
1901    static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = {
1902       VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST,     /* PIPE_PRIM_POINTS */
1903       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* PIPE_PRIM_LINES */
1904       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* PIPE_PRIM_LINE_LOOP */
1905       VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP,     /* PIPE_PRIM_LINE_STRIP */
1906       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST,  /* PIPE_PRIM_TRIANGLES */
1907       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */
1908       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */
1909       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_QUADS */
1910       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_QUAD_STRIP */
1911       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_POLYGON */
1912       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* PIPE_PRIM_LINES_ADJACENCY */
1913       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
1914       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */
1915       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
1916    };
1917 
1918    static const unsigned inputArraySize[] = {
1919       0,       /* VGPU10_PRIMITIVE_UNDEFINED */
1920       1,       /* VGPU10_PRIMITIVE_POINT */
1921       2,       /* VGPU10_PRIMITIVE_LINE */
1922       3,       /* VGPU10_PRIMITIVE_TRIANGLE */
1923       0,
1924       0,
1925       4,       /* VGPU10_PRIMITIVE_LINE_ADJ */
1926       6        /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */
1927    };
1928 
1929    switch (prop->Property.PropertyName) {
1930    case TGSI_PROPERTY_GS_INPUT_PRIM:
1931       assert(prop->u[0].Data < ARRAY_SIZE(primType));
1932       emit->gs.prim_type = primType[prop->u[0].Data];
1933       assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED);
1934       emit->gs.input_size = inputArraySize[emit->gs.prim_type];
1935       break;
1936 
1937    case TGSI_PROPERTY_GS_OUTPUT_PRIM:
1938       assert(prop->u[0].Data < ARRAY_SIZE(primTopology));
1939       emit->gs.prim_topology = primTopology[prop->u[0].Data];
1940       assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED);
1941       break;
1942 
1943    case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
1944       emit->gs.max_out_vertices = prop->u[0].Data;
1945       break;
1946 
1947    default:
1948       break;
1949    }
1950 
1951    return TRUE;
1952 }
1953 
1954 
1955 static void
emit_property_instruction(struct svga_shader_emitter_v10 * emit,VGPU10OpcodeToken0 opcode0,unsigned nData,unsigned data)1956 emit_property_instruction(struct svga_shader_emitter_v10 *emit,
1957                           VGPU10OpcodeToken0 opcode0, unsigned nData,
1958                           unsigned data)
1959 {
1960    begin_emit_instruction(emit);
1961    emit_dword(emit, opcode0.value);
1962    if (nData)
1963       emit_dword(emit, data);
1964    end_emit_instruction(emit);
1965 }
1966 
1967 
1968 /**
1969  * Emit property instructions
1970  */
1971 static void
emit_property_instructions(struct svga_shader_emitter_v10 * emit)1972 emit_property_instructions(struct svga_shader_emitter_v10 *emit)
1973 {
1974    VGPU10OpcodeToken0 opcode0;
1975 
1976    assert(emit->unit == PIPE_SHADER_GEOMETRY);
1977 
1978    /* emit input primitive type declaration */
1979    opcode0.value = 0;
1980    opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE;
1981    opcode0.primitive = emit->gs.prim_type;
1982    emit_property_instruction(emit, opcode0, 0, 0);
1983 
1984    /* emit output primitive topology declaration */
1985    opcode0.value = 0;
1986    opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY;
1987    opcode0.primitiveTopology = emit->gs.prim_topology;
1988    emit_property_instruction(emit, opcode0, 0, 0);
1989 
1990    /* emit max output vertices */
1991    opcode0.value = 0;
1992    opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT;
1993    emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices);
1994 }
1995 
1996 
1997 /**
1998  * Emit a vgpu10 declaration "instruction".
1999  * \param index  the register index
2000  * \param size   array size of the operand. In most cases, it is 1,
2001  *               but for inputs to geometry shader, the array size varies
2002  *               depending on the primitive type.
2003  */
2004 static void
emit_decl_instruction(struct svga_shader_emitter_v10 * emit,VGPU10OpcodeToken0 opcode0,VGPU10OperandToken0 operand0,VGPU10NameToken name_token,unsigned index,unsigned size)2005 emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
2006                       VGPU10OpcodeToken0 opcode0,
2007                       VGPU10OperandToken0 operand0,
2008                       VGPU10NameToken name_token,
2009                       unsigned index, unsigned size)
2010 {
2011    assert(opcode0.opcodeType);
2012    assert(operand0.mask);
2013 
2014    begin_emit_instruction(emit);
2015    emit_dword(emit, opcode0.value);
2016 
2017    emit_dword(emit, operand0.value);
2018 
2019    if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) {
2020       /* Next token is the index of the register to declare */
2021       emit_dword(emit, index);
2022    }
2023    else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) {
2024       /* Next token is the size of the register */
2025       emit_dword(emit, size);
2026 
2027       /* Followed by the index of the register */
2028       emit_dword(emit, index);
2029    }
2030 
2031    if (name_token.value) {
2032       emit_dword(emit, name_token.value);
2033    }
2034 
2035    end_emit_instruction(emit);
2036 }
2037 
2038 
2039 /**
2040  * Emit the declaration for a shader input.
2041  * \param opcodeType  opcode type, one of VGPU10_OPCODE_DCL_INPUTx
2042  * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x
2043  * \param dim         index dimension
2044  * \param index       the input register index
2045  * \param size        array size of the operand. In most cases, it is 1,
2046  *                    but for inputs to geometry shader, the array size varies
2047  *                    depending on the primitive type.
2048  * \param name        one of VGPU10_NAME_x
2049  * \parma numComp     number of components
2050  * \param selMode     component selection mode
2051  * \param usageMask   bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
2052  * \param interpMode  interpolation mode
2053  */
2054 static void
emit_input_declaration(struct svga_shader_emitter_v10 * emit,unsigned opcodeType,unsigned operandType,unsigned dim,unsigned index,unsigned size,unsigned name,unsigned numComp,unsigned selMode,unsigned usageMask,unsigned interpMode)2055 emit_input_declaration(struct svga_shader_emitter_v10 *emit,
2056                        unsigned opcodeType, unsigned operandType,
2057                        unsigned dim, unsigned index, unsigned size,
2058                        unsigned name, unsigned numComp,
2059                        unsigned selMode, unsigned usageMask,
2060                        unsigned interpMode)
2061 {
2062    VGPU10OpcodeToken0 opcode0;
2063    VGPU10OperandToken0 operand0;
2064    VGPU10NameToken name_token;
2065 
2066    assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
2067    assert(opcodeType == VGPU10_OPCODE_DCL_INPUT ||
2068           opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV ||
2069           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS ||
2070           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV);
2071    assert(operandType == VGPU10_OPERAND_TYPE_INPUT ||
2072           operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID);
2073    assert(numComp <= VGPU10_OPERAND_4_COMPONENT);
2074    assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
2075    assert(dim <= VGPU10_OPERAND_INDEX_3D);
2076    assert(name == VGPU10_NAME_UNDEFINED ||
2077           name == VGPU10_NAME_POSITION ||
2078           name == VGPU10_NAME_INSTANCE_ID ||
2079           name == VGPU10_NAME_VERTEX_ID ||
2080           name == VGPU10_NAME_PRIMITIVE_ID ||
2081           name == VGPU10_NAME_IS_FRONT_FACE);
2082    assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED ||
2083           interpMode == VGPU10_INTERPOLATION_CONSTANT ||
2084           interpMode == VGPU10_INTERPOLATION_LINEAR ||
2085           interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID ||
2086           interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE ||
2087           interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID);
2088 
2089    check_register_index(emit, opcodeType, index);
2090 
2091    opcode0.value = operand0.value = name_token.value = 0;
2092 
2093    opcode0.opcodeType = opcodeType;
2094    opcode0.interpolationMode = interpMode;
2095 
2096    operand0.operandType = operandType;
2097    operand0.numComponents = numComp;
2098    operand0.selectionMode = selMode;
2099    operand0.mask = usageMask;
2100    operand0.indexDimension = dim;
2101    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
2102    if (dim == VGPU10_OPERAND_INDEX_2D)
2103       operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
2104 
2105    name_token.name = name;
2106 
2107    emit_decl_instruction(emit, opcode0, operand0, name_token, index, size);
2108 }
2109 
2110 
2111 /**
2112  * Emit the declaration for a shader output.
2113  * \param type  one of VGPU10_OPCODE_DCL_OUTPUTx
2114  * \param index  the output register index
2115  * \param name  one of VGPU10_NAME_x
2116  * \param usageMask  bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
2117  */
2118 static void
emit_output_declaration(struct svga_shader_emitter_v10 * emit,unsigned type,unsigned index,unsigned name,unsigned usageMask)2119 emit_output_declaration(struct svga_shader_emitter_v10 *emit,
2120                         unsigned type, unsigned index,
2121                         unsigned name, unsigned usageMask)
2122 {
2123    VGPU10OpcodeToken0 opcode0;
2124    VGPU10OperandToken0 operand0;
2125    VGPU10NameToken name_token;
2126 
2127    assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
2128    assert(type == VGPU10_OPCODE_DCL_OUTPUT ||
2129           type == VGPU10_OPCODE_DCL_OUTPUT_SGV ||
2130           type == VGPU10_OPCODE_DCL_OUTPUT_SIV);
2131    assert(name == VGPU10_NAME_UNDEFINED ||
2132           name == VGPU10_NAME_POSITION ||
2133           name == VGPU10_NAME_PRIMITIVE_ID ||
2134           name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
2135           name == VGPU10_NAME_CLIP_DISTANCE);
2136 
2137    check_register_index(emit, type, index);
2138 
2139    opcode0.value = operand0.value = name_token.value = 0;
2140 
2141    opcode0.opcodeType = type;
2142    operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
2143    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
2144    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
2145    operand0.mask = usageMask;
2146    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2147    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
2148 
2149    name_token.name = name;
2150 
2151    emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
2152 }
2153 
2154 
2155 /**
2156  * Emit the declaration for the fragment depth output.
2157  */
2158 static void
emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 * emit)2159 emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit)
2160 {
2161    VGPU10OpcodeToken0 opcode0;
2162    VGPU10OperandToken0 operand0;
2163    VGPU10NameToken name_token;
2164 
2165    assert(emit->unit == PIPE_SHADER_FRAGMENT);
2166 
2167    opcode0.value = operand0.value = name_token.value = 0;
2168 
2169    opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
2170    operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
2171    operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
2172    operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
2173    operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
2174 
2175    emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
2176 }
2177 
2178 
2179 /**
2180  * Emit the declaration for a system value input/output.
2181  */
2182 static void
emit_system_value_declaration(struct svga_shader_emitter_v10 * emit,enum tgsi_semantic semantic_name,unsigned index)2183 emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
2184                               enum tgsi_semantic semantic_name, unsigned index)
2185 {
2186    switch (semantic_name) {
2187    case TGSI_SEMANTIC_INSTANCEID:
2188       index = alloc_system_value_index(emit, index);
2189       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
2190                              VGPU10_OPERAND_TYPE_INPUT,
2191                              VGPU10_OPERAND_INDEX_1D,
2192                              index, 1,
2193                              VGPU10_NAME_INSTANCE_ID,
2194                              VGPU10_OPERAND_4_COMPONENT,
2195                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
2196                              VGPU10_OPERAND_4_COMPONENT_MASK_X,
2197                              VGPU10_INTERPOLATION_UNDEFINED);
2198       break;
2199    case TGSI_SEMANTIC_VERTEXID:
2200       index = alloc_system_value_index(emit, index);
2201       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
2202                              VGPU10_OPERAND_TYPE_INPUT,
2203                              VGPU10_OPERAND_INDEX_1D,
2204                              index, 1,
2205                              VGPU10_NAME_VERTEX_ID,
2206                              VGPU10_OPERAND_4_COMPONENT,
2207                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
2208                              VGPU10_OPERAND_4_COMPONENT_MASK_X,
2209                              VGPU10_INTERPOLATION_UNDEFINED);
2210       break;
2211    default:
2212       ; /* XXX */
2213    }
2214 }
2215 
2216 /**
2217  * Translate a TGSI declaration to VGPU10.
2218  */
2219 static boolean
emit_vgpu10_declaration(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_declaration * decl)2220 emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
2221                         const struct tgsi_full_declaration *decl)
2222 {
2223    switch (decl->Declaration.File) {
2224    case TGSI_FILE_INPUT:
2225       /* do nothing - see emit_input_declarations() */
2226       return TRUE;
2227 
2228    case TGSI_FILE_OUTPUT:
2229       assert(decl->Range.First == decl->Range.Last);
2230       emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask;
2231       return TRUE;
2232 
2233    case TGSI_FILE_TEMPORARY:
2234       /* Don't declare the temps here.  Just keep track of how many
2235        * and emit the declaration later.
2236        */
2237       if (decl->Declaration.Array) {
2238          /* Indexed temporary array.  Save the start index of the array
2239           * and the size of the array.
2240           */
2241          const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS);
2242          unsigned i;
2243 
2244          assert(arrayID < ARRAY_SIZE(emit->temp_arrays));
2245 
2246          /* Save this array so we can emit the declaration for it later */
2247          emit->temp_arrays[arrayID].start = decl->Range.First;
2248          emit->temp_arrays[arrayID].size =
2249             decl->Range.Last - decl->Range.First + 1;
2250 
2251          emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1);
2252          assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS);
2253          emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS);
2254 
2255          /* Fill in the temp_map entries for this array */
2256          for (i = decl->Range.First; i <= decl->Range.Last; i++) {
2257             emit->temp_map[i].arrayId = arrayID;
2258             emit->temp_map[i].index = i - decl->Range.First;
2259          }
2260       }
2261 
2262       /* for all temps, indexed or not, keep track of highest index */
2263       emit->num_shader_temps = MAX2(emit->num_shader_temps,
2264                                     decl->Range.Last + 1);
2265       return TRUE;
2266 
2267    case TGSI_FILE_CONSTANT:
2268       /* Don't declare constants here.  Just keep track and emit later. */
2269       {
2270          unsigned constbuf = 0, num_consts;
2271          if (decl->Declaration.Dimension) {
2272             constbuf = decl->Dim.Index2D;
2273          }
2274          /* We throw an assertion here when, in fact, the shader should never
2275           * have linked due to constbuf index out of bounds, so we shouldn't
2276           * have reached here.
2277           */
2278          assert(constbuf < ARRAY_SIZE(emit->num_shader_consts));
2279 
2280          num_consts = MAX2(emit->num_shader_consts[constbuf],
2281                            decl->Range.Last + 1);
2282 
2283          if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
2284             debug_printf("Warning: constant buffer is declared to size [%u]"
2285                          " but [%u] is the limit.\n",
2286                          num_consts,
2287                          VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
2288          }
2289          /* The linker doesn't enforce the max UBO size so we clamp here */
2290          emit->num_shader_consts[constbuf] =
2291             MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
2292       }
2293       return TRUE;
2294 
2295    case TGSI_FILE_IMMEDIATE:
2296       assert(!"TGSI_FILE_IMMEDIATE not handled yet!");
2297       return FALSE;
2298 
2299    case TGSI_FILE_SYSTEM_VALUE:
2300       emit_system_value_declaration(emit, decl->Semantic.Name,
2301                                     decl->Range.First);
2302       return TRUE;
2303 
2304    case TGSI_FILE_SAMPLER:
2305       /* Don't declare samplers here.  Just keep track and emit later. */
2306       emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
2307       return TRUE;
2308 
2309 #if 0
2310    case TGSI_FILE_RESOURCE:
2311       /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/
2312       /* XXX more, VGPU10_RETURN_TYPE_FLOAT */
2313       assert(!"TGSI_FILE_RESOURCE not handled yet");
2314       return FALSE;
2315 #endif
2316 
2317    case TGSI_FILE_ADDRESS:
2318       emit->num_address_regs = MAX2(emit->num_address_regs,
2319                                     decl->Range.Last + 1);
2320       return TRUE;
2321 
2322    case TGSI_FILE_SAMPLER_VIEW:
2323       {
2324          unsigned unit = decl->Range.First;
2325          assert(decl->Range.First == decl->Range.Last);
2326          emit->sampler_target[unit] = decl->SamplerView.Resource;
2327          /* Note: we can ignore YZW return types for now */
2328          emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX;
2329          emit->sampler_view[unit] = TRUE;
2330       }
2331       return TRUE;
2332 
2333    default:
2334       assert(!"Unexpected type of declaration");
2335       return FALSE;
2336    }
2337 }
2338 
2339 
2340 
2341 /**
2342  * Emit all input declarations.
2343  */
2344 static boolean
emit_input_declarations(struct svga_shader_emitter_v10 * emit)2345 emit_input_declarations(struct svga_shader_emitter_v10 *emit)
2346 {
2347    unsigned i;
2348 
2349    if (emit->unit == PIPE_SHADER_FRAGMENT) {
2350 
2351       for (i = 0; i < emit->linkage.num_inputs; i++) {
2352          enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
2353          unsigned usage_mask = emit->info.input_usage_mask[i];
2354          unsigned index = emit->linkage.input_map[i];
2355          unsigned type, interpolationMode, name;
2356 
2357          if (usage_mask == 0)
2358             continue;  /* register is not actually used */
2359 
2360          if (semantic_name == TGSI_SEMANTIC_POSITION) {
2361             /* fragment position input */
2362             type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
2363             interpolationMode = VGPU10_INTERPOLATION_LINEAR;
2364             name = VGPU10_NAME_POSITION;
2365             if (usage_mask & TGSI_WRITEMASK_W) {
2366                /* we need to replace use of 'w' with '1/w' */
2367                emit->fs.fragcoord_input_index = i;
2368             }
2369          }
2370          else if (semantic_name == TGSI_SEMANTIC_FACE) {
2371             /* fragment front-facing input */
2372             type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
2373             interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
2374             name = VGPU10_NAME_IS_FRONT_FACE;
2375             emit->fs.face_input_index = i;
2376          }
2377          else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
2378             /* primitive ID */
2379             type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
2380             interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
2381             name = VGPU10_NAME_PRIMITIVE_ID;
2382          }
2383          else {
2384             /* general fragment input */
2385             type = VGPU10_OPCODE_DCL_INPUT_PS;
2386             interpolationMode =
2387                translate_interpolation(emit,
2388                                        emit->info.input_interpolate[i],
2389                                        emit->info.input_interpolate_loc[i]);
2390 
2391             /* keeps track if flat interpolation mode is being used */
2392             emit->uses_flat_interp = emit->uses_flat_interp ||
2393                (interpolationMode == VGPU10_INTERPOLATION_CONSTANT);
2394 
2395             name = VGPU10_NAME_UNDEFINED;
2396          }
2397 
2398          emit_input_declaration(emit, type,
2399                                 VGPU10_OPERAND_TYPE_INPUT,
2400                                 VGPU10_OPERAND_INDEX_1D, index, 1,
2401                                 name,
2402                                 VGPU10_OPERAND_4_COMPONENT,
2403                                 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
2404                                 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
2405                                 interpolationMode);
2406       }
2407    }
2408    else if (emit->unit == PIPE_SHADER_GEOMETRY) {
2409 
2410       for (i = 0; i < emit->info.num_inputs; i++) {
2411          enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
2412          unsigned usage_mask = emit->info.input_usage_mask[i];
2413          unsigned index = emit->linkage.input_map[i];
2414          unsigned opcodeType, operandType;
2415          unsigned numComp, selMode;
2416          unsigned name;
2417          unsigned dim;
2418 
2419          if (usage_mask == 0)
2420             continue;  /* register is not actually used */
2421 
2422          opcodeType = VGPU10_OPCODE_DCL_INPUT;
2423          operandType = VGPU10_OPERAND_TYPE_INPUT;
2424          numComp = VGPU10_OPERAND_4_COMPONENT;
2425          selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
2426          name = VGPU10_NAME_UNDEFINED;
2427 
2428          /* all geometry shader inputs are two dimensional except
2429           * gl_PrimitiveID
2430           */
2431          dim = VGPU10_OPERAND_INDEX_2D;
2432 
2433          if (semantic_name == TGSI_SEMANTIC_PRIMID) {
2434             /* Primitive ID */
2435             operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
2436             dim = VGPU10_OPERAND_INDEX_0D;
2437             numComp = VGPU10_OPERAND_0_COMPONENT;
2438             selMode = 0;
2439 
2440             /* also save the register index so we can check for
2441              * primitive id when emit src register. We need to modify the
2442              * operand type, index dimension when emit primitive id src reg.
2443              */
2444             emit->gs.prim_id_index = i;
2445          }
2446          else if (semantic_name == TGSI_SEMANTIC_POSITION) {
2447             /* vertex position input */
2448             opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV;
2449             name = VGPU10_NAME_POSITION;
2450          }
2451 
2452          emit_input_declaration(emit, opcodeType, operandType,
2453                                 dim, index,
2454                                 emit->gs.input_size,
2455                                 name,
2456                                 numComp, selMode,
2457                                 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
2458                                 VGPU10_INTERPOLATION_UNDEFINED);
2459       }
2460    }
2461    else {
2462       assert(emit->unit == PIPE_SHADER_VERTEX);
2463 
2464       for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) {
2465          unsigned usage_mask = emit->info.input_usage_mask[i];
2466          unsigned index = i;
2467 
2468          if (usage_mask == 0)
2469             continue;  /* register is not actually used */
2470 
2471          emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
2472                                 VGPU10_OPERAND_TYPE_INPUT,
2473                                 VGPU10_OPERAND_INDEX_1D, index, 1,
2474                                 VGPU10_NAME_UNDEFINED,
2475                                 VGPU10_OPERAND_4_COMPONENT,
2476                                 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
2477                                 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
2478                                 VGPU10_INTERPOLATION_UNDEFINED);
2479       }
2480    }
2481 
2482    return TRUE;
2483 }
2484 
2485 
2486 /**
2487  * Emit all output declarations.
2488  */
2489 static boolean
emit_output_declarations(struct svga_shader_emitter_v10 * emit)2490 emit_output_declarations(struct svga_shader_emitter_v10 *emit)
2491 {
2492    unsigned i;
2493 
2494    for (i = 0; i < emit->info.num_outputs; i++) {
2495       /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
2496       const enum tgsi_semantic semantic_name =
2497          emit->info.output_semantic_name[i];
2498       const unsigned semantic_index = emit->info.output_semantic_index[i];
2499       unsigned index = i;
2500 
2501       if (emit->unit == PIPE_SHADER_FRAGMENT) {
2502          if (semantic_name == TGSI_SEMANTIC_COLOR) {
2503             assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index));
2504 
2505             emit->fs.color_out_index[semantic_index] = index;
2506 
2507             emit->fs.num_color_outputs = MAX2(emit->fs.num_color_outputs,
2508                                               index + 1);
2509 
2510             /* The semantic index is the shader's color output/buffer index */
2511             emit_output_declaration(emit,
2512                                     VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
2513                                     VGPU10_NAME_UNDEFINED,
2514                                     VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
2515 
2516             if (semantic_index == 0) {
2517                if (emit->key.fs.write_color0_to_n_cbufs > 1) {
2518                   /* Emit declarations for the additional color outputs
2519                    * for broadcasting.
2520                    */
2521                   unsigned j;
2522                   for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) {
2523                      /* Allocate a new output index */
2524                      unsigned idx = emit->info.num_outputs + j - 1;
2525                      emit->fs.color_out_index[j] = idx;
2526                      emit_output_declaration(emit,
2527                                         VGPU10_OPCODE_DCL_OUTPUT, idx,
2528                                         VGPU10_NAME_UNDEFINED,
2529                                         VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
2530                      emit->info.output_semantic_index[idx] = j;
2531                   }
2532 
2533                   emit->fs.num_color_outputs =
2534                      emit->key.fs.write_color0_to_n_cbufs;
2535                }
2536             }
2537             else {
2538                assert(!emit->key.fs.write_color0_to_n_cbufs);
2539             }
2540          }
2541          else if (semantic_name == TGSI_SEMANTIC_POSITION) {
2542             /* Fragment depth output */
2543             emit_fragdepth_output_declaration(emit);
2544          }
2545          else {
2546             assert(!"Bad output semantic name");
2547          }
2548       }
2549       else {
2550          /* VS or GS */
2551          unsigned name, type;
2552          unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
2553 
2554          switch (semantic_name) {
2555          case TGSI_SEMANTIC_POSITION:
2556             assert(emit->unit != PIPE_SHADER_FRAGMENT);
2557             type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
2558             name = VGPU10_NAME_POSITION;
2559             /* Save the index of the vertex position output register */
2560             emit->vposition.out_index = index;
2561             break;
2562          case TGSI_SEMANTIC_CLIPDIST:
2563             type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
2564             name = VGPU10_NAME_CLIP_DISTANCE;
2565             /* save the starting index of the clip distance output register */
2566             if (semantic_index == 0)
2567                emit->clip_dist_out_index = index;
2568             writemask = emit->output_usage_mask[index];
2569             writemask = apply_clip_plane_mask(emit, writemask, semantic_index);
2570             if (writemask == 0x0) {
2571                continue; /* discard this do-nothing declaration */
2572             }
2573             break;
2574          case TGSI_SEMANTIC_PRIMID:
2575             assert(emit->unit == PIPE_SHADER_GEOMETRY);
2576             type = VGPU10_OPCODE_DCL_OUTPUT_SGV;
2577             name = VGPU10_NAME_PRIMITIVE_ID;
2578             break;
2579          case TGSI_SEMANTIC_LAYER:
2580             assert(emit->unit == PIPE_SHADER_GEOMETRY);
2581             type = VGPU10_OPCODE_DCL_OUTPUT_SGV;
2582             name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX;
2583             break;
2584          case TGSI_SEMANTIC_CLIPVERTEX:
2585             type = VGPU10_OPCODE_DCL_OUTPUT;
2586             name = VGPU10_NAME_UNDEFINED;
2587             emit->clip_vertex_out_index = index;
2588             break;
2589          default:
2590             /* generic output */
2591             type = VGPU10_OPCODE_DCL_OUTPUT;
2592             name = VGPU10_NAME_UNDEFINED;
2593          }
2594 
2595          emit_output_declaration(emit, type, index, name, writemask);
2596       }
2597    }
2598 
2599    if (emit->vposition.so_index != INVALID_INDEX &&
2600        emit->vposition.out_index != INVALID_INDEX) {
2601 
2602       assert(emit->unit != PIPE_SHADER_FRAGMENT);
2603 
2604       /* Emit the declaration for the non-adjusted vertex position
2605        * for stream output purpose
2606        */
2607       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
2608                               emit->vposition.so_index,
2609                               VGPU10_NAME_UNDEFINED,
2610                               VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
2611    }
2612 
2613    if (emit->clip_dist_so_index != INVALID_INDEX &&
2614        emit->clip_dist_out_index != INVALID_INDEX) {
2615 
2616       assert(emit->unit != PIPE_SHADER_FRAGMENT);
2617 
2618       /* Emit the declaration for the clip distance shadow copy which
2619        * will be used for stream output purpose and for clip distance
2620        * varying variable
2621        */
2622       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
2623                               emit->clip_dist_so_index,
2624                               VGPU10_NAME_UNDEFINED,
2625                               emit->output_usage_mask[emit->clip_dist_out_index]);
2626 
2627       if (emit->info.num_written_clipdistance > 4) {
2628          /* for the second clip distance register, each handles 4 planes */
2629          emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
2630                                  emit->clip_dist_so_index + 1,
2631                                  VGPU10_NAME_UNDEFINED,
2632                                  emit->output_usage_mask[emit->clip_dist_out_index+1]);
2633       }
2634    }
2635 
2636    return TRUE;
2637 }
2638 
2639 
2640 /**
2641  * Emit the declaration for the temporary registers.
2642  */
2643 static boolean
emit_temporaries_declaration(struct svga_shader_emitter_v10 * emit)2644 emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
2645 {
2646    unsigned total_temps, reg, i;
2647 
2648    total_temps = emit->num_shader_temps;
2649 
2650    /* If there is indirect access to non-indexable temps in the shader,
2651     * convert those temps to indexable temps. This works around a bug
2652     * in the GLSL->TGSI translator exposed in piglit test
2653     * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test.
2654     * Internal temps added by the driver remain as non-indexable temps.
2655     */
2656    if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) &&
2657        emit->num_temp_arrays == 0) {
2658       unsigned arrayID;
2659 
2660       arrayID = 1;
2661       emit->num_temp_arrays = arrayID + 1;
2662       emit->temp_arrays[arrayID].start = 0;
2663       emit->temp_arrays[arrayID].size = total_temps;
2664 
2665       /* Fill in the temp_map entries for this temp array */
2666       for (i = 0; i < total_temps; i++) {
2667          emit->temp_map[i].arrayId = arrayID;
2668          emit->temp_map[i].index = i;
2669       }
2670    }
2671 
2672    /* Allocate extra temps for specially-implemented instructions,
2673     * such as LIT.
2674     */
2675    total_temps += MAX_INTERNAL_TEMPS;
2676 
2677    if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) {
2678       if (emit->vposition.need_prescale || emit->key.vs.undo_viewport ||
2679           emit->key.clip_plane_enable ||
2680           emit->vposition.so_index != INVALID_INDEX) {
2681          emit->vposition.tmp_index = total_temps;
2682          total_temps += 1;
2683       }
2684 
2685       if (emit->unit == PIPE_SHADER_VERTEX) {
2686          unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 |
2687                                  emit->key.vs.adjust_attrib_itof |
2688                                  emit->key.vs.adjust_attrib_utof |
2689                                  emit->key.vs.attrib_is_bgra |
2690                                  emit->key.vs.attrib_puint_to_snorm |
2691                                  emit->key.vs.attrib_puint_to_uscaled |
2692                                  emit->key.vs.attrib_puint_to_sscaled);
2693          while (attrib_mask) {
2694             unsigned index = u_bit_scan(&attrib_mask);
2695             emit->vs.adjusted_input[index] = total_temps++;
2696          }
2697       }
2698 
2699       if (emit->clip_mode == CLIP_DISTANCE) {
2700          /* We need to write the clip distance to a temporary register
2701           * first. Then it will be copied to the shadow copy for
2702           * the clip distance varying variable and stream output purpose.
2703           * It will also be copied to the actual CLIPDIST register
2704           * according to the enabled clip planes
2705           */
2706          emit->clip_dist_tmp_index = total_temps++;
2707          if (emit->info.num_written_clipdistance > 4)
2708             total_temps++; /* second clip register */
2709       }
2710       else if (emit->clip_mode == CLIP_VERTEX) {
2711          /* We need to convert the TGSI CLIPVERTEX output to one or more
2712           * clip distances.  Allocate a temp reg for the clipvertex here.
2713           */
2714          assert(emit->info.writes_clipvertex > 0);
2715          emit->clip_vertex_tmp_index = total_temps;
2716          total_temps++;
2717       }
2718    }
2719    else if (emit->unit == PIPE_SHADER_FRAGMENT) {
2720       if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS ||
2721           emit->key.fs.write_color0_to_n_cbufs > 1) {
2722          /* Allocate a temp to hold the output color */
2723          emit->fs.color_tmp_index = total_temps;
2724          total_temps += 1;
2725       }
2726 
2727       if (emit->fs.face_input_index != INVALID_INDEX) {
2728          /* Allocate a temp for the +/-1 face register */
2729          emit->fs.face_tmp_index = total_temps;
2730          total_temps += 1;
2731       }
2732 
2733       if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
2734          /* Allocate a temp for modified fragment position register */
2735          emit->fs.fragcoord_tmp_index = total_temps;
2736          total_temps += 1;
2737       }
2738    }
2739 
2740    for (i = 0; i < emit->num_address_regs; i++) {
2741       emit->address_reg_index[i] = total_temps++;
2742    }
2743 
2744    /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10
2745     * temp indexes.  Basically, we compact all the non-array temp register
2746     * indexes into a consecutive series.
2747     *
2748     * Before, we may have some TGSI declarations like:
2749     *   DCL TEMP[0..1], LOCAL
2750     *   DCL TEMP[2..4], ARRAY(1), LOCAL
2751     *   DCL TEMP[5..7], ARRAY(2), LOCAL
2752     *   plus, some extra temps, like TEMP[8], TEMP[9] for misc things
2753     *
2754     * After, we'll have a map like this:
2755     *   temp_map[0] = { array 0, index 0 }
2756     *   temp_map[1] = { array 0, index 1 }
2757     *   temp_map[2] = { array 1, index 0 }
2758     *   temp_map[3] = { array 1, index 1 }
2759     *   temp_map[4] = { array 1, index 2 }
2760     *   temp_map[5] = { array 2, index 0 }
2761     *   temp_map[6] = { array 2, index 1 }
2762     *   temp_map[7] = { array 2, index 2 }
2763     *   temp_map[8] = { array 0, index 2 }
2764     *   temp_map[9] = { array 0, index 3 }
2765     *
2766     * We'll declare two arrays of 3 elements, plus a set of four non-indexed
2767     * temps numbered 0..3
2768     *
2769     * Any time we emit a temporary register index, we'll have to use the
2770     * temp_map[] table to convert the TGSI index to the VGPU10 index.
2771     *
2772     * Finally, we recompute the total_temps value here.
2773     */
2774    reg = 0;
2775    for (i = 0; i < total_temps; i++) {
2776       if (emit->temp_map[i].arrayId == 0) {
2777          emit->temp_map[i].index = reg++;
2778       }
2779    }
2780 
2781    if (0) {
2782       debug_printf("total_temps %u\n", total_temps);
2783       for (i = 0; i < total_temps; i++) {
2784          debug_printf("temp %u ->  array %u  index %u\n",
2785                       i, emit->temp_map[i].arrayId, emit->temp_map[i].index);
2786       }
2787    }
2788 
2789    total_temps = reg;
2790 
2791    /* Emit declaration of ordinary temp registers */
2792    if (total_temps > 0) {
2793       VGPU10OpcodeToken0 opcode0;
2794 
2795       opcode0.value = 0;
2796       opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS;
2797 
2798       begin_emit_instruction(emit);
2799       emit_dword(emit, opcode0.value);
2800       emit_dword(emit, total_temps);
2801       end_emit_instruction(emit);
2802    }
2803 
2804    /* Emit declarations for indexable temp arrays.  Skip 0th entry since
2805     * it's unused.
2806     */
2807    for (i = 1; i < emit->num_temp_arrays; i++) {
2808       unsigned num_temps = emit->temp_arrays[i].size;
2809 
2810       if (num_temps > 0) {
2811          VGPU10OpcodeToken0 opcode0;
2812 
2813          opcode0.value = 0;
2814          opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP;
2815 
2816          begin_emit_instruction(emit);
2817          emit_dword(emit, opcode0.value);
2818          emit_dword(emit, i); /* which array */
2819          emit_dword(emit, num_temps);
2820          emit_dword(emit, 4); /* num components */
2821          end_emit_instruction(emit);
2822 
2823          total_temps += num_temps;
2824       }
2825    }
2826 
2827    /* Check that the grand total of all regular and indexed temps is
2828     * under the limit.
2829     */
2830    check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1);
2831 
2832    return TRUE;
2833 }
2834 
2835 
2836 static boolean
emit_constant_declaration(struct svga_shader_emitter_v10 * emit)2837 emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
2838 {
2839    VGPU10OpcodeToken0 opcode0;
2840    VGPU10OperandToken0 operand0;
2841    unsigned total_consts, i;
2842 
2843    opcode0.value = 0;
2844    opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER;
2845    opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED;
2846    /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */
2847 
2848    operand0.value = 0;
2849    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
2850    operand0.indexDimension = VGPU10_OPERAND_INDEX_2D;
2851    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
2852    operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
2853    operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
2854    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
2855    operand0.swizzleX = 0;
2856    operand0.swizzleY = 1;
2857    operand0.swizzleZ = 2;
2858    operand0.swizzleW = 3;
2859 
2860    /**
2861     * Emit declaration for constant buffer [0].  We also allocate
2862     * room for the extra constants here.
2863     */
2864    total_consts = emit->num_shader_consts[0];
2865 
2866    /* Now, allocate constant slots for the "extra" constants.
2867     * Note: it's critical that these extra constant locations
2868     * exactly match what's emitted by the "extra" constants code
2869     * in svga_state_constants.c
2870     */
2871 
2872    /* Vertex position scale/translation */
2873    if (emit->vposition.need_prescale) {
2874       emit->vposition.prescale_scale_index = total_consts++;
2875       emit->vposition.prescale_trans_index = total_consts++;
2876    }
2877 
2878    if (emit->unit == PIPE_SHADER_VERTEX) {
2879       if (emit->key.vs.undo_viewport) {
2880          emit->vs.viewport_index = total_consts++;
2881       }
2882    }
2883 
2884    /* user-defined clip planes */
2885    if (emit->key.clip_plane_enable) {
2886       unsigned n = util_bitcount(emit->key.clip_plane_enable);
2887       assert(emit->unit == PIPE_SHADER_VERTEX ||
2888              emit->unit == PIPE_SHADER_GEOMETRY);
2889       for (i = 0; i < n; i++) {
2890          emit->clip_plane_const[i] = total_consts++;
2891       }
2892    }
2893 
2894    for (i = 0; i < emit->num_samplers; i++) {
2895 
2896       if (emit->sampler_view[i]) {
2897 
2898          /* Texcoord scale factors for RECT textures */
2899          if (emit->key.tex[i].unnormalized) {
2900             emit->texcoord_scale_index[i] = total_consts++;
2901          }
2902 
2903          /* Texture buffer sizes */
2904          if (emit->sampler_target[i] == TGSI_TEXTURE_BUFFER) {
2905             emit->texture_buffer_size_index[i] = total_consts++;
2906          }
2907       }
2908    }
2909 
2910    if (total_consts > 0) {
2911       begin_emit_instruction(emit);
2912       emit_dword(emit, opcode0.value);
2913       emit_dword(emit, operand0.value);
2914       emit_dword(emit, 0);  /* which const buffer slot */
2915       emit_dword(emit, total_consts);
2916       end_emit_instruction(emit);
2917    }
2918 
2919    /* Declare remaining constant buffers (UBOs) */
2920    for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) {
2921       if (emit->num_shader_consts[i] > 0) {
2922          begin_emit_instruction(emit);
2923          emit_dword(emit, opcode0.value);
2924          emit_dword(emit, operand0.value);
2925          emit_dword(emit, i);  /* which const buffer slot */
2926          emit_dword(emit, emit->num_shader_consts[i]);
2927          end_emit_instruction(emit);
2928       }
2929    }
2930 
2931    return TRUE;
2932 }
2933 
2934 
2935 /**
2936  * Emit declarations for samplers.
2937  */
2938 static boolean
emit_sampler_declarations(struct svga_shader_emitter_v10 * emit)2939 emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
2940 {
2941    unsigned i;
2942 
2943    for (i = 0; i < emit->num_samplers; i++) {
2944       VGPU10OpcodeToken0 opcode0;
2945       VGPU10OperandToken0 operand0;
2946 
2947       opcode0.value = 0;
2948       opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER;
2949       opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT;
2950 
2951       operand0.value = 0;
2952       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
2953       operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
2954       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2955       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
2956 
2957       begin_emit_instruction(emit);
2958       emit_dword(emit, opcode0.value);
2959       emit_dword(emit, operand0.value);
2960       emit_dword(emit, i);
2961       end_emit_instruction(emit);
2962    }
2963 
2964    return TRUE;
2965 }
2966 
2967 
2968 /**
2969  * Translate TGSI_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x.
2970  */
2971 static unsigned
tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,boolean is_array)2972 tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,
2973                                    boolean is_array)
2974 {
2975    switch (target) {
2976    case TGSI_TEXTURE_BUFFER:
2977       return VGPU10_RESOURCE_DIMENSION_BUFFER;
2978    case TGSI_TEXTURE_1D:
2979       return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
2980    case TGSI_TEXTURE_2D:
2981    case TGSI_TEXTURE_RECT:
2982       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
2983    case TGSI_TEXTURE_3D:
2984       return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
2985    case TGSI_TEXTURE_CUBE:
2986       return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
2987    case TGSI_TEXTURE_SHADOW1D:
2988       return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
2989    case TGSI_TEXTURE_SHADOW2D:
2990    case TGSI_TEXTURE_SHADOWRECT:
2991       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
2992    case TGSI_TEXTURE_1D_ARRAY:
2993    case TGSI_TEXTURE_SHADOW1D_ARRAY:
2994       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
2995          : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
2996    case TGSI_TEXTURE_2D_ARRAY:
2997    case TGSI_TEXTURE_SHADOW2D_ARRAY:
2998       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY
2999          : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
3000    case TGSI_TEXTURE_SHADOWCUBE:
3001       return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
3002    case TGSI_TEXTURE_2D_MSAA:
3003       return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
3004    case TGSI_TEXTURE_2D_ARRAY_MSAA:
3005       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
3006          : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
3007    case TGSI_TEXTURE_CUBE_ARRAY:
3008       return VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY;
3009    default:
3010       assert(!"Unexpected resource type");
3011       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
3012    }
3013 }
3014 
3015 
3016 /**
3017  * Given a tgsi_return_type, return true iff it is an integer type.
3018  */
3019 static boolean
is_integer_type(enum tgsi_return_type type)3020 is_integer_type(enum tgsi_return_type type)
3021 {
3022    switch (type) {
3023       case TGSI_RETURN_TYPE_SINT:
3024       case TGSI_RETURN_TYPE_UINT:
3025          return TRUE;
3026       case TGSI_RETURN_TYPE_FLOAT:
3027       case TGSI_RETURN_TYPE_UNORM:
3028       case TGSI_RETURN_TYPE_SNORM:
3029          return FALSE;
3030       case TGSI_RETURN_TYPE_COUNT:
3031       default:
3032          assert(!"is_integer_type: Unknown tgsi_return_type");
3033          return FALSE;
3034    }
3035 }
3036 
3037 
3038 /**
3039  * Emit declarations for resources.
3040  * XXX When we're sure that all TGSI shaders will be generated with
3041  * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may
3042  * rework this code.
3043  */
3044 static boolean
emit_resource_declarations(struct svga_shader_emitter_v10 * emit)3045 emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
3046 {
3047    unsigned i;
3048 
3049    /* Emit resource decl for each sampler */
3050    for (i = 0; i < emit->num_samplers; i++) {
3051       VGPU10OpcodeToken0 opcode0;
3052       VGPU10OperandToken0 operand0;
3053       VGPU10ResourceReturnTypeToken return_type;
3054       VGPU10_RESOURCE_RETURN_TYPE rt;
3055 
3056       opcode0.value = 0;
3057       opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
3058       opcode0.resourceDimension =
3059          tgsi_texture_to_resource_dimension(emit->sampler_target[i],
3060                                             emit->key.tex[i].is_array);
3061       operand0.value = 0;
3062       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
3063       operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
3064       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
3065       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3066 
3067 #if 1
3068       /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */
3069       STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1);
3070       STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1);
3071       STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1);
3072       STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1);
3073       STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1);
3074       assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT);
3075       rt = emit->sampler_return_type[i] + 1;
3076 #else
3077       switch (emit->sampler_return_type[i]) {
3078          case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break;
3079          case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break;
3080          case TGSI_RETURN_TYPE_SINT:  rt = VGPU10_RETURN_TYPE_SINT;  break;
3081          case TGSI_RETURN_TYPE_UINT:  rt = VGPU10_RETURN_TYPE_UINT;  break;
3082          case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break;
3083          case TGSI_RETURN_TYPE_COUNT:
3084          default:
3085             rt = VGPU10_RETURN_TYPE_FLOAT;
3086             assert(!"emit_resource_declarations: Unknown tgsi_return_type");
3087       }
3088 #endif
3089 
3090       return_type.value = 0;
3091       return_type.component0 = rt;
3092       return_type.component1 = rt;
3093       return_type.component2 = rt;
3094       return_type.component3 = rt;
3095 
3096       begin_emit_instruction(emit);
3097       emit_dword(emit, opcode0.value);
3098       emit_dword(emit, operand0.value);
3099       emit_dword(emit, i);
3100       emit_dword(emit, return_type.value);
3101       end_emit_instruction(emit);
3102    }
3103 
3104    return TRUE;
3105 }
3106 
3107 static void
emit_instruction_op1(struct svga_shader_emitter_v10 * emit,unsigned opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src,boolean saturate)3108 emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
3109                      unsigned opcode,
3110                      const struct tgsi_full_dst_register *dst,
3111                      const struct tgsi_full_src_register *src,
3112                      boolean saturate)
3113 {
3114    begin_emit_instruction(emit);
3115    emit_opcode(emit, opcode, saturate);
3116    emit_dst_register(emit, dst);
3117    emit_src_register(emit, src);
3118    end_emit_instruction(emit);
3119 }
3120 
3121 static void
emit_instruction_op2(struct svga_shader_emitter_v10 * emit,unsigned opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2,boolean saturate)3122 emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
3123                      unsigned opcode,
3124                      const struct tgsi_full_dst_register *dst,
3125                      const struct tgsi_full_src_register *src1,
3126                      const struct tgsi_full_src_register *src2,
3127                      boolean saturate)
3128 {
3129    begin_emit_instruction(emit);
3130    emit_opcode(emit, opcode, saturate);
3131    emit_dst_register(emit, dst);
3132    emit_src_register(emit, src1);
3133    emit_src_register(emit, src2);
3134    end_emit_instruction(emit);
3135 }
3136 
3137 static void
emit_instruction_op3(struct svga_shader_emitter_v10 * emit,unsigned opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2,const struct tgsi_full_src_register * src3,boolean saturate)3138 emit_instruction_op3(struct svga_shader_emitter_v10 *emit,
3139                      unsigned opcode,
3140                      const struct tgsi_full_dst_register *dst,
3141                      const struct tgsi_full_src_register *src1,
3142                      const struct tgsi_full_src_register *src2,
3143                      const struct tgsi_full_src_register *src3,
3144                      boolean saturate)
3145 {
3146    begin_emit_instruction(emit);
3147    emit_opcode(emit, opcode, saturate);
3148    emit_dst_register(emit, dst);
3149    emit_src_register(emit, src1);
3150    emit_src_register(emit, src2);
3151    emit_src_register(emit, src3);
3152    end_emit_instruction(emit);
3153 }
3154 
3155 /**
3156  * Emit the actual clip distance instructions to be used for clipping
3157  * by copying the clip distance from the temporary registers to the
3158  * CLIPDIST registers written with the enabled planes mask.
3159  * Also copy the clip distance from the temporary to the clip distance
3160  * shadow copy register which will be referenced by the input shader
3161  */
3162 static void
emit_clip_distance_instructions(struct svga_shader_emitter_v10 * emit)3163 emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
3164 {
3165    struct tgsi_full_src_register tmp_clip_dist_src;
3166    struct tgsi_full_dst_register clip_dist_dst;
3167 
3168    unsigned i;
3169    unsigned clip_plane_enable = emit->key.clip_plane_enable;
3170    unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index;
3171    int num_written_clipdist = emit->info.num_written_clipdistance;
3172 
3173    assert(emit->clip_dist_out_index != INVALID_INDEX);
3174    assert(emit->clip_dist_tmp_index != INVALID_INDEX);
3175 
3176    /**
3177     * Temporary reset the temporary clip dist register index so
3178     * that the copy to the real clip dist register will not
3179     * attempt to copy to the temporary register again
3180     */
3181    emit->clip_dist_tmp_index = INVALID_INDEX;
3182 
3183    for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) {
3184 
3185       tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i);
3186 
3187       /**
3188        * copy to the shadow copy for use by varying variable and
3189        * stream output. All clip distances
3190        * will be written regardless of the enabled clipping planes.
3191        */
3192       clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
3193                                    emit->clip_dist_so_index + i);
3194 
3195       /* MOV clip_dist_so, tmp_clip_dist */
3196       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
3197                            &tmp_clip_dist_src, FALSE);
3198 
3199       /**
3200        * copy those clip distances to enabled clipping planes
3201        * to CLIPDIST registers for clipping
3202        */
3203       if (clip_plane_enable & 0xf) {
3204          clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
3205                                       emit->clip_dist_out_index + i);
3206          clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf);
3207 
3208          /* MOV CLIPDIST, tmp_clip_dist */
3209          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
3210                               &tmp_clip_dist_src, FALSE);
3211       }
3212       /* four clip planes per clip register */
3213       clip_plane_enable >>= 4;
3214    }
3215    /**
3216     * set the temporary clip dist register index back to the
3217     * temporary index for the next vertex
3218     */
3219    emit->clip_dist_tmp_index = clip_dist_tmp_index;
3220 }
3221 
3222 /* Declare clip distance output registers for user-defined clip planes
3223  * or the TGSI_CLIPVERTEX output.
3224  */
3225 static void
emit_clip_distance_declarations(struct svga_shader_emitter_v10 * emit)3226 emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit)
3227 {
3228    unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
3229    unsigned index = emit->num_outputs;
3230    unsigned plane_mask;
3231 
3232    assert(emit->unit == PIPE_SHADER_VERTEX ||
3233           emit->unit == PIPE_SHADER_GEOMETRY);
3234    assert(num_clip_planes <= 8);
3235 
3236    if (emit->clip_mode != CLIP_LEGACY &&
3237        emit->clip_mode != CLIP_VERTEX) {
3238       return;
3239    }
3240 
3241    if (num_clip_planes == 0)
3242       return;
3243 
3244    /* Declare one or two clip output registers.  The number of components
3245     * in the mask reflects the number of clip planes.  For example, if 5
3246     * clip planes are needed, we'll declare outputs similar to:
3247     * dcl_output_siv o2.xyzw, clip_distance
3248     * dcl_output_siv o3.x, clip_distance
3249     */
3250    emit->clip_dist_out_index = index; /* save the starting clip dist reg index */
3251 
3252    plane_mask = (1 << num_clip_planes) - 1;
3253    if (plane_mask & 0xf) {
3254       unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
3255       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index,
3256                               VGPU10_NAME_CLIP_DISTANCE, cmask);
3257       emit->num_outputs++;
3258    }
3259    if (plane_mask & 0xf0) {
3260       unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
3261       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1,
3262                               VGPU10_NAME_CLIP_DISTANCE, cmask);
3263       emit->num_outputs++;
3264    }
3265 }
3266 
3267 
3268 /**
3269  * Emit the instructions for writing to the clip distance registers
3270  * to handle legacy/automatic clip planes.
3271  * For each clip plane, the distance is the dot product of the vertex
3272  * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients.
3273  * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE
3274  * output registers already declared.
3275  */
3276 static void
emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 * emit,unsigned vpos_tmp_index)3277 emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit,
3278                              unsigned vpos_tmp_index)
3279 {
3280    unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
3281 
3282    assert(emit->clip_mode == CLIP_LEGACY);
3283    assert(num_clip_planes <= 8);
3284 
3285    assert(emit->unit == PIPE_SHADER_VERTEX ||
3286           emit->unit == PIPE_SHADER_GEOMETRY);
3287 
3288    for (i = 0; i < num_clip_planes; i++) {
3289       struct tgsi_full_dst_register dst;
3290       struct tgsi_full_src_register plane_src, vpos_src;
3291       unsigned reg_index = emit->clip_dist_out_index + i / 4;
3292       unsigned comp = i % 4;
3293       unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
3294 
3295       /* create dst, src regs */
3296       dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
3297       dst = writemask_dst(&dst, writemask);
3298 
3299       plane_src = make_src_const_reg(emit->clip_plane_const[i]);
3300       vpos_src = make_src_temp_reg(vpos_tmp_index);
3301 
3302       /* DP4 clip_dist, plane, vpos */
3303       emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
3304                            &plane_src, &vpos_src, FALSE);
3305    }
3306 }
3307 
3308 
3309 /**
3310  * Emit the instructions for computing the clip distance results from
3311  * the clip vertex temporary.
3312  * For each clip plane, the distance is the dot product of the clip vertex
3313  * position (found in a temp reg) and the clip plane coefficients.
3314  */
3315 static void
emit_clip_vertex_instructions(struct svga_shader_emitter_v10 * emit)3316 emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit)
3317 {
3318    const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable);
3319    unsigned i;
3320    struct tgsi_full_dst_register dst;
3321    struct tgsi_full_src_register clipvert_src;
3322    const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index;
3323 
3324    assert(emit->unit == PIPE_SHADER_VERTEX ||
3325           emit->unit == PIPE_SHADER_GEOMETRY);
3326 
3327    assert(emit->clip_mode == CLIP_VERTEX);
3328 
3329    clipvert_src = make_src_temp_reg(clip_vertex_tmp);
3330 
3331    for (i = 0; i < num_clip; i++) {
3332       struct tgsi_full_src_register plane_src;
3333       unsigned reg_index = emit->clip_dist_out_index + i / 4;
3334       unsigned comp = i % 4;
3335       unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
3336 
3337       /* create dst, src regs */
3338       dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
3339       dst = writemask_dst(&dst, writemask);
3340 
3341       plane_src = make_src_const_reg(emit->clip_plane_const[i]);
3342 
3343       /* DP4 clip_dist, plane, vpos */
3344       emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
3345                            &plane_src, &clipvert_src, FALSE);
3346    }
3347 
3348    /* copy temporary clip vertex register to the clip vertex register */
3349 
3350    assert(emit->clip_vertex_out_index != INVALID_INDEX);
3351 
3352    /**
3353     * temporary reset the temporary clip vertex register index so
3354     * that copy to the clip vertex register will not attempt
3355     * to copy to the temporary register again
3356     */
3357    emit->clip_vertex_tmp_index = INVALID_INDEX;
3358 
3359    /* MOV clip_vertex, clip_vertex_tmp */
3360    dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index);
3361    emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
3362                         &dst, &clipvert_src, FALSE);
3363 
3364    /**
3365     * set the temporary clip vertex register index back to the
3366     * temporary index for the next vertex
3367     */
3368    emit->clip_vertex_tmp_index = clip_vertex_tmp;
3369 }
3370 
3371 /**
3372  * Emit code to convert RGBA to BGRA
3373  */
3374 static void
emit_swap_r_b(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)3375 emit_swap_r_b(struct svga_shader_emitter_v10 *emit,
3376                      const struct tgsi_full_dst_register *dst,
3377                      const struct tgsi_full_src_register *src)
3378 {
3379    struct tgsi_full_src_register bgra_src =
3380       swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W);
3381 
3382    begin_emit_instruction(emit);
3383    emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
3384    emit_dst_register(emit, dst);
3385    emit_src_register(emit, &bgra_src);
3386    end_emit_instruction(emit);
3387 }
3388 
3389 
3390 /** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */
3391 static void
emit_puint_to_snorm(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)3392 emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit,
3393                     const struct tgsi_full_dst_register *dst,
3394                     const struct tgsi_full_src_register *src)
3395 {
3396    struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f);
3397    struct tgsi_full_src_register two =
3398       make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f);
3399    struct tgsi_full_src_register neg_two =
3400       make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
3401 
3402    unsigned val_tmp = get_temp_index(emit);
3403    struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp);
3404    struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp);
3405 
3406    unsigned bias_tmp = get_temp_index(emit);
3407    struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp);
3408    struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp);
3409 
3410    /* val = src * 2.0 */
3411    emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst,
3412                         src, &two, FALSE);
3413 
3414    /* bias = src > 0.5 */
3415    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst,
3416                         src, &half, FALSE);
3417 
3418    /* bias = bias & -2.0 */
3419    emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst,
3420                         &bias_src, &neg_two, FALSE);
3421 
3422    /* dst = val + bias */
3423    emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst,
3424                         &val_src, &bias_src, FALSE);
3425 
3426    free_temp_indexes(emit);
3427 }
3428 
3429 
3430 /** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */
3431 static void
emit_puint_to_uscaled(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)3432 emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit,
3433                       const struct tgsi_full_dst_register *dst,
3434                       const struct tgsi_full_src_register *src)
3435 {
3436    struct tgsi_full_src_register scale =
3437       make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f);
3438 
3439    /* dst = src * scale */
3440    emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale, FALSE);
3441 }
3442 
3443 
3444 /** Convert from R32_UINT to 10_10_10_2_sscaled */
3445 static void
emit_puint_to_sscaled(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)3446 emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit,
3447                       const struct tgsi_full_dst_register *dst,
3448                       const struct tgsi_full_src_register *src)
3449 {
3450    struct tgsi_full_src_register lshift =
3451       make_immediate_reg_int4(emit, 22, 12, 2, 0);
3452    struct tgsi_full_src_register rshift =
3453       make_immediate_reg_int4(emit, 22, 22, 22, 30);
3454 
3455    struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X);
3456 
3457    unsigned tmp = get_temp_index(emit);
3458    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
3459    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
3460 
3461    /*
3462     * r = (pixel << 22) >> 22;   # signed int in [511, -512]
3463     * g = (pixel << 12) >> 22;   # signed int in [511, -512]
3464     * b = (pixel <<  2) >> 22;   # signed int in [511, -512]
3465     * a = (pixel <<  0) >> 30;   # signed int in [1, -2]
3466     * dst = i_to_f(r,g,b,a);     # convert to float
3467     */
3468    emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst,
3469                         &src_xxxx, &lshift, FALSE);
3470    emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst,
3471                         &tmp_src, &rshift, FALSE);
3472    emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src, FALSE);
3473 
3474    free_temp_indexes(emit);
3475 }
3476 
3477 
3478 /**
3479  * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
3480  */
3481 static boolean
emit_arl_uarl(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)3482 emit_arl_uarl(struct svga_shader_emitter_v10 *emit,
3483               const struct tgsi_full_instruction *inst)
3484 {
3485    unsigned index = inst->Dst[0].Register.Index;
3486    struct tgsi_full_dst_register dst;
3487    unsigned opcode;
3488 
3489    assert(index < MAX_VGPU10_ADDR_REGS);
3490    dst = make_dst_temp_reg(emit->address_reg_index[index]);
3491 
3492    /* ARL dst, s0
3493     * Translates into:
3494     * FTOI address_tmp, s0
3495     *
3496     * UARL dst, s0
3497     * Translates into:
3498     * MOV address_tmp, s0
3499     */
3500    if (inst->Instruction.Opcode == TGSI_OPCODE_ARL)
3501       opcode = VGPU10_OPCODE_FTOI;
3502    else
3503       opcode = VGPU10_OPCODE_MOV;
3504 
3505    emit_instruction_op1(emit, opcode, &dst, &inst->Src[0], FALSE);
3506 
3507    return TRUE;
3508 }
3509 
3510 
3511 /**
3512  * Emit code for TGSI_OPCODE_CAL instruction.
3513  */
3514 static boolean
emit_cal(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)3515 emit_cal(struct svga_shader_emitter_v10 *emit,
3516          const struct tgsi_full_instruction *inst)
3517 {
3518    unsigned label = inst->Label.Label;
3519    VGPU10OperandToken0 operand;
3520    operand.value = 0;
3521    operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
3522 
3523    begin_emit_instruction(emit);
3524    emit_dword(emit, operand.value);
3525    emit_dword(emit, label);
3526    end_emit_instruction(emit);
3527 
3528    return TRUE;
3529 }
3530 
3531 
3532 /**
3533  * Emit code for TGSI_OPCODE_IABS instruction.
3534  */
3535 static boolean
emit_iabs(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)3536 emit_iabs(struct svga_shader_emitter_v10 *emit,
3537           const struct tgsi_full_instruction *inst)
3538 {
3539    /* dst.x = (src0.x < 0) ? -src0.x : src0.x
3540     * dst.y = (src0.y < 0) ? -src0.y : src0.y
3541     * dst.z = (src0.z < 0) ? -src0.z : src0.z
3542     * dst.w = (src0.w < 0) ? -src0.w : src0.w
3543     *
3544     * Translates into
3545     *   IMAX dst, src, neg(src)
3546     */
3547    struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
3548    emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0],
3549                         &inst->Src[0], &neg_src, FALSE);
3550 
3551    return TRUE;
3552 }
3553 
3554 
3555 /**
3556  * Emit code for TGSI_OPCODE_CMP instruction.
3557  */
3558 static boolean
emit_cmp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)3559 emit_cmp(struct svga_shader_emitter_v10 *emit,
3560          const struct tgsi_full_instruction *inst)
3561 {
3562    /* dst.x = (src0.x < 0) ? src1.x : src2.x
3563     * dst.y = (src0.y < 0) ? src1.y : src2.y
3564     * dst.z = (src0.z < 0) ? src1.z : src2.z
3565     * dst.w = (src0.w < 0) ? src1.w : src2.w
3566     *
3567     * Translates into
3568     *   LT tmp, src0, 0.0
3569     *   MOVC dst, tmp, src1, src2
3570     */
3571    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
3572    unsigned tmp = get_temp_index(emit);
3573    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
3574    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
3575 
3576    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst,
3577                         &inst->Src[0], &zero, FALSE);
3578    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0],
3579                         &tmp_src, &inst->Src[1], &inst->Src[2],
3580                         inst->Instruction.Saturate);
3581 
3582    free_temp_indexes(emit);
3583 
3584    return TRUE;
3585 }
3586 
3587 
3588 /**
3589  * Emit code for TGSI_OPCODE_DST instruction.
3590  */
3591 static boolean
emit_dst(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)3592 emit_dst(struct svga_shader_emitter_v10 *emit,
3593          const struct tgsi_full_instruction *inst)
3594 {
3595    /*
3596     * dst.x = 1
3597     * dst.y = src0.y * src1.y
3598     * dst.z = src0.z
3599     * dst.w = src1.w
3600     */
3601 
3602    struct tgsi_full_src_register s0_yyyy =
3603       scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
3604    struct tgsi_full_src_register s0_zzzz =
3605       scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
3606    struct tgsi_full_src_register s1_yyyy =
3607       scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
3608    struct tgsi_full_src_register s1_wwww =
3609       scalar_src(&inst->Src[1], TGSI_SWIZZLE_W);
3610 
3611    /*
3612     * If dst and either src0 and src1 are the same we need
3613     * to create a temporary for it and insert a extra move.
3614     */
3615    unsigned tmp_move = get_temp_index(emit);
3616    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
3617    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
3618 
3619    /* MOV dst.x, 1.0 */
3620    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3621       struct tgsi_full_dst_register dst_x =
3622          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
3623       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
3624 
3625       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE);
3626    }
3627 
3628    /* MUL dst.y, s0.y, s1.y */
3629    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3630       struct tgsi_full_dst_register dst_y =
3631          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
3632 
3633       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy,
3634                            &s1_yyyy, inst->Instruction.Saturate);
3635    }
3636 
3637    /* MOV dst.z, s0.z */
3638    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3639       struct tgsi_full_dst_register dst_z =
3640          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
3641 
3642       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, &s0_zzzz,
3643                            inst->Instruction.Saturate);
3644   }
3645 
3646    /* MOV dst.w, s1.w */
3647    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3648       struct tgsi_full_dst_register dst_w =
3649          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
3650 
3651       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &s1_wwww,
3652                            inst->Instruction.Saturate);
3653    }
3654 
3655    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
3656                         FALSE);
3657    free_temp_indexes(emit);
3658 
3659    return TRUE;
3660 }
3661 
3662 
3663 
3664 /**
3665  * Emit code for TGSI_OPCODE_ENDPRIM (GS only)
3666  */
3667 static boolean
emit_endprim(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)3668 emit_endprim(struct svga_shader_emitter_v10 *emit,
3669              const struct tgsi_full_instruction *inst)
3670 {
3671    assert(emit->unit == PIPE_SHADER_GEOMETRY);
3672 
3673    /* We can't use emit_simple() because the TGSI instruction has one
3674     * operand (vertex stream number) which we must ignore for VGPU10.
3675     */
3676    begin_emit_instruction(emit);
3677    emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE);
3678    end_emit_instruction(emit);
3679    return TRUE;
3680 }
3681 
3682 
3683 /**
3684  * Emit code for TGSI_OPCODE_EX2 (2^x) instruction.
3685  */
3686 static boolean
emit_ex2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)3687 emit_ex2(struct svga_shader_emitter_v10 *emit,
3688          const struct tgsi_full_instruction *inst)
3689 {
3690    /* Note that TGSI_OPCODE_EX2 computes only one value from src.x
3691     * while VGPU10 computes four values.
3692     *
3693     * dst = EX2(src):
3694     *   dst.xyzw = 2.0 ^ src.x
3695     */
3696 
3697    struct tgsi_full_src_register src_xxxx =
3698       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
3699                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
3700 
3701    /* EXP tmp, s0.xxxx */
3702    emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx,
3703                         inst->Instruction.Saturate);
3704 
3705    return TRUE;
3706 }
3707 
3708 
3709 /**
3710  * Emit code for TGSI_OPCODE_EXP instruction.
3711  */
3712 static boolean
emit_exp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)3713 emit_exp(struct svga_shader_emitter_v10 *emit,
3714          const struct tgsi_full_instruction *inst)
3715 {
3716    /*
3717     * dst.x = 2 ^ floor(s0.x)
3718     * dst.y = s0.x - floor(s0.x)
3719     * dst.z = 2 ^ s0.x
3720     * dst.w = 1.0
3721     */
3722 
3723    struct tgsi_full_src_register src_xxxx =
3724       scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
3725    unsigned tmp = get_temp_index(emit);
3726    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
3727    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
3728 
3729    /*
3730     * If dst and src are the same we need to create
3731     * a temporary for it and insert a extra move.
3732     */
3733    unsigned tmp_move = get_temp_index(emit);
3734    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
3735    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
3736 
3737    /* only use X component of temp reg */
3738    tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
3739    tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
3740 
3741    /* ROUND_NI tmp.x, s0.x */
3742    emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
3743                         &src_xxxx, FALSE); /* round to -infinity */
3744 
3745    /* EXP dst.x, tmp.x */
3746    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3747       struct tgsi_full_dst_register dst_x =
3748          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
3749 
3750       emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src,
3751                            inst->Instruction.Saturate);
3752    }
3753 
3754    /* ADD dst.y, s0.x, -tmp */
3755    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3756       struct tgsi_full_dst_register dst_y =
3757          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
3758       struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src);
3759 
3760       emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx,
3761                            &neg_tmp_src, inst->Instruction.Saturate);
3762    }
3763 
3764    /* EXP dst.z, s0.x */
3765    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3766       struct tgsi_full_dst_register dst_z =
3767          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
3768 
3769       emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx,
3770                            inst->Instruction.Saturate);
3771    }
3772 
3773    /* MOV dst.w, 1.0 */
3774    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3775       struct tgsi_full_dst_register dst_w =
3776          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
3777       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
3778 
3779       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one,
3780                            FALSE);
3781    }
3782 
3783    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
3784                         FALSE);
3785 
3786    free_temp_indexes(emit);
3787 
3788    return TRUE;
3789 }
3790 
3791 
3792 /**
3793  * Emit code for TGSI_OPCODE_IF instruction.
3794  */
3795 static boolean
emit_if(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)3796 emit_if(struct svga_shader_emitter_v10 *emit,
3797         const struct tgsi_full_instruction *inst)
3798 {
3799    VGPU10OpcodeToken0 opcode0;
3800 
3801    /* The src register should be a scalar */
3802    assert(inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleY &&
3803           inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleZ &&
3804           inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleW);
3805 
3806    /* The only special thing here is that we need to set the
3807     * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if
3808     * src.x is non-zero.
3809     */
3810    opcode0.value = 0;
3811    opcode0.opcodeType = VGPU10_OPCODE_IF;
3812    opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
3813 
3814    begin_emit_instruction(emit);
3815    emit_dword(emit, opcode0.value);
3816    emit_src_register(emit, &inst->Src[0]);
3817    end_emit_instruction(emit);
3818 
3819    return TRUE;
3820 }
3821 
3822 
3823 /**
3824  * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of
3825  * the register components are negative).
3826  */
3827 static boolean
emit_kill_if(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)3828 emit_kill_if(struct svga_shader_emitter_v10 *emit,
3829              const struct tgsi_full_instruction *inst)
3830 {
3831    unsigned tmp = get_temp_index(emit);
3832    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
3833    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
3834 
3835    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
3836 
3837    struct tgsi_full_dst_register tmp_dst_x =
3838       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
3839    struct tgsi_full_src_register tmp_src_xxxx =
3840       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
3841 
3842    /* tmp = src[0] < 0.0 */
3843    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
3844                         &zero, FALSE);
3845 
3846    if (!same_swizzle_terms(&inst->Src[0])) {
3847       /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to
3848        * logically OR the swizzle terms.  Most uses of KILL_IF only
3849        * test one channel so it's good to avoid these extra steps.
3850        */
3851       struct tgsi_full_src_register tmp_src_yyyy =
3852          scalar_src(&tmp_src, TGSI_SWIZZLE_Y);
3853       struct tgsi_full_src_register tmp_src_zzzz =
3854          scalar_src(&tmp_src, TGSI_SWIZZLE_Z);
3855       struct tgsi_full_src_register tmp_src_wwww =
3856          scalar_src(&tmp_src, TGSI_SWIZZLE_W);
3857 
3858       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
3859                            &tmp_src_yyyy, FALSE);
3860       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
3861                            &tmp_src_zzzz, FALSE);
3862       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
3863                            &tmp_src_wwww, FALSE);
3864    }
3865 
3866    begin_emit_instruction(emit);
3867    emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */
3868    emit_src_register(emit, &tmp_src_xxxx);
3869    end_emit_instruction(emit);
3870 
3871    free_temp_indexes(emit);
3872 
3873    return TRUE;
3874 }
3875 
3876 
3877 /**
3878  * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard).
3879  */
3880 static boolean
emit_kill(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)3881 emit_kill(struct svga_shader_emitter_v10 *emit,
3882           const struct tgsi_full_instruction *inst)
3883 {
3884    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
3885 
3886    /* DISCARD if 0.0 is zero */
3887    begin_emit_instruction(emit);
3888    emit_discard_opcode(emit, FALSE);
3889    emit_src_register(emit, &zero);
3890    end_emit_instruction(emit);
3891 
3892    return TRUE;
3893 }
3894 
3895 
3896 /**
3897  * Emit code for TGSI_OPCODE_LG2 instruction.
3898  */
3899 static boolean
emit_lg2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)3900 emit_lg2(struct svga_shader_emitter_v10 *emit,
3901          const struct tgsi_full_instruction *inst)
3902 {
3903    /* Note that TGSI_OPCODE_LG2 computes only one value from src.x
3904     * while VGPU10 computes four values.
3905     *
3906     * dst = LG2(src):
3907     *   dst.xyzw = log2(src.x)
3908     */
3909 
3910    struct tgsi_full_src_register src_xxxx =
3911       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
3912                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
3913 
3914    /* LOG tmp, s0.xxxx */
3915    emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &inst->Dst[0], &src_xxxx,
3916                         inst->Instruction.Saturate);
3917 
3918    return TRUE;
3919 }
3920 
3921 
3922 /**
3923  * Emit code for TGSI_OPCODE_LIT instruction.
3924  */
3925 static boolean
emit_lit(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)3926 emit_lit(struct svga_shader_emitter_v10 *emit,
3927          const struct tgsi_full_instruction *inst)
3928 {
3929    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
3930 
3931    /*
3932     * If dst and src are the same we need to create
3933     * a temporary for it and insert a extra move.
3934     */
3935    unsigned tmp_move = get_temp_index(emit);
3936    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
3937    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
3938 
3939    /*
3940     * dst.x = 1
3941     * dst.y = max(src.x, 0)
3942     * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
3943     * dst.w = 1
3944     */
3945 
3946    /* MOV dst.x, 1.0 */
3947    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3948       struct tgsi_full_dst_register dst_x =
3949          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
3950       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE);
3951    }
3952 
3953    /* MOV dst.w, 1.0 */
3954    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3955       struct tgsi_full_dst_register dst_w =
3956          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
3957       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE);
3958    }
3959 
3960    /* MAX dst.y, src.x, 0.0 */
3961    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3962       struct tgsi_full_dst_register dst_y =
3963          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
3964       struct tgsi_full_src_register zero =
3965          make_immediate_reg_float(emit, 0.0f);
3966       struct tgsi_full_src_register src_xxxx =
3967          swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
3968                      TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
3969 
3970       emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx,
3971                            &zero, inst->Instruction.Saturate);
3972    }
3973 
3974    /*
3975     * tmp1 = clamp(src.w, -128, 128);
3976     *   MAX tmp1, src.w, -128
3977     *   MIN tmp1, tmp1, 128
3978     *
3979     * tmp2 = max(tmp2, 0);
3980     *   MAX tmp2, src.y, 0
3981     *
3982     * tmp1 = pow(tmp2, tmp1);
3983     *   LOG tmp2, tmp2
3984     *   MUL tmp1, tmp2, tmp1
3985     *   EXP tmp1, tmp1
3986     *
3987     * tmp1 = (src.w == 0) ? 1 : tmp1;
3988     *   EQ tmp2, 0, src.w
3989     *   MOVC tmp1, tmp2, 1.0, tmp1
3990     *
3991     * dst.z = (0 < src.x) ? tmp1 : 0;
3992     *   LT tmp2, 0, src.x
3993     *   MOVC dst.z, tmp2, tmp1, 0.0
3994     */
3995    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3996       struct tgsi_full_dst_register dst_z =
3997          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
3998 
3999       unsigned tmp1 = get_temp_index(emit);
4000       struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
4001       struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
4002       unsigned tmp2 = get_temp_index(emit);
4003       struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
4004       struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
4005 
4006       struct tgsi_full_src_register src_xxxx =
4007          scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
4008       struct tgsi_full_src_register src_yyyy =
4009          scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
4010       struct tgsi_full_src_register src_wwww =
4011          scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
4012 
4013       struct tgsi_full_src_register zero =
4014          make_immediate_reg_float(emit, 0.0f);
4015       struct tgsi_full_src_register lowerbound =
4016          make_immediate_reg_float(emit, -128.0f);
4017       struct tgsi_full_src_register upperbound =
4018          make_immediate_reg_float(emit, 128.0f);
4019 
4020       emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww,
4021                            &lowerbound, FALSE);
4022       emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src,
4023                            &upperbound, FALSE);
4024       emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy,
4025                            &zero, FALSE);
4026 
4027       /* POW tmp1, tmp2, tmp1 */
4028       /* LOG tmp2, tmp2 */
4029       emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src,
4030                            FALSE);
4031 
4032       /* MUL tmp1, tmp2, tmp1 */
4033       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src,
4034                            &tmp1_src, FALSE);
4035 
4036       /* EXP tmp1, tmp1 */
4037       emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src,
4038                            FALSE);
4039 
4040       /* EQ tmp2, 0, src.w */
4041       emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero,
4042                            &src_wwww, FALSE);
4043       /* MOVC tmp1.z, tmp2, tmp1, 1.0 */
4044       emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst,
4045                            &tmp2_src, &one, &tmp1_src, FALSE);
4046 
4047       /* LT tmp2, 0, src.x */
4048       emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero,
4049                            &src_xxxx, FALSE);
4050       /* MOVC dst.z, tmp2, tmp1, 0.0 */
4051       emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z,
4052                            &tmp2_src, &tmp1_src, &zero, FALSE);
4053    }
4054 
4055    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
4056                         FALSE);
4057    free_temp_indexes(emit);
4058 
4059    return TRUE;
4060 }
4061 
4062 
4063 /**
4064  * Emit code for TGSI_OPCODE_LOG instruction.
4065  */
4066 static boolean
emit_log(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)4067 emit_log(struct svga_shader_emitter_v10 *emit,
4068          const struct tgsi_full_instruction *inst)
4069 {
4070    /*
4071     * dst.x = floor(lg2(abs(s0.x)))
4072     * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x))))
4073     * dst.z = lg2(abs(s0.x))
4074     * dst.w = 1.0
4075     */
4076 
4077    struct tgsi_full_src_register src_xxxx =
4078       scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
4079    unsigned tmp = get_temp_index(emit);
4080    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4081    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4082    struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx);
4083 
4084    /* only use X component of temp reg */
4085    tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
4086    tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
4087 
4088    /* LOG tmp.x, abs(s0.x) */
4089    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
4090       emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst,
4091                           &abs_src_xxxx, FALSE);
4092    }
4093 
4094    /* MOV dst.z, tmp.x */
4095    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
4096       struct tgsi_full_dst_register dst_z =
4097          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z);
4098 
4099       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z,
4100                            &tmp_src, inst->Instruction.Saturate);
4101    }
4102 
4103    /* FLR tmp.x, tmp.x */
4104    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
4105       emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
4106                            &tmp_src, FALSE);
4107    }
4108 
4109    /* MOV dst.x, tmp.x */
4110    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
4111       struct tgsi_full_dst_register dst_x =
4112          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
4113 
4114       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &tmp_src,
4115                            inst->Instruction.Saturate);
4116    }
4117 
4118    /* EXP tmp.x, tmp.x */
4119    /* DIV dst.y, abs(s0.x), tmp.x */
4120    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
4121       struct tgsi_full_dst_register dst_y =
4122          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
4123 
4124       emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src,
4125                            FALSE);
4126       emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx,
4127                            &tmp_src, inst->Instruction.Saturate);
4128    }
4129 
4130    /* MOV dst.w, 1.0 */
4131    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
4132       struct tgsi_full_dst_register dst_w =
4133          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W);
4134       struct tgsi_full_src_register one =
4135          make_immediate_reg_float(emit, 1.0f);
4136 
4137       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE);
4138    }
4139 
4140    free_temp_indexes(emit);
4141 
4142    return TRUE;
4143 }
4144 
4145 
4146 /**
4147  * Emit code for TGSI_OPCODE_LRP instruction.
4148  */
4149 static boolean
emit_lrp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)4150 emit_lrp(struct svga_shader_emitter_v10 *emit,
4151          const struct tgsi_full_instruction *inst)
4152 {
4153    /* dst = LRP(s0, s1, s2):
4154     *   dst = s0 * (s1 - s2) + s2
4155     * Translates into:
4156     *   SUB tmp, s1, s2;        tmp = s1 - s2
4157     *   MAD dst, s0, tmp, s2;   dst = s0 * t1 + s2
4158     */
4159    unsigned tmp = get_temp_index(emit);
4160    struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp);
4161    struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp);
4162    struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]);
4163 
4164    /* ADD tmp, s1, -s2 */
4165    emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_tmp,
4166                         &inst->Src[1], &neg_src2, FALSE);
4167 
4168    /* MAD dst, s1, tmp, s3 */
4169    emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &inst->Dst[0],
4170                         &inst->Src[0], &src_tmp, &inst->Src[2],
4171                         inst->Instruction.Saturate);
4172 
4173    free_temp_indexes(emit);
4174 
4175    return TRUE;
4176 }
4177 
4178 
4179 /**
4180  * Emit code for TGSI_OPCODE_POW instruction.
4181  */
4182 static boolean
emit_pow(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)4183 emit_pow(struct svga_shader_emitter_v10 *emit,
4184          const struct tgsi_full_instruction *inst)
4185 {
4186    /* Note that TGSI_OPCODE_POW computes only one value from src0.x and
4187     * src1.x while VGPU10 computes four values.
4188     *
4189     * dst = POW(src0, src1):
4190     *   dst.xyzw = src0.x ^ src1.x
4191     */
4192    unsigned tmp = get_temp_index(emit);
4193    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4194    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4195    struct tgsi_full_src_register src0_xxxx =
4196       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
4197                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
4198    struct tgsi_full_src_register src1_xxxx =
4199       swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
4200                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
4201 
4202    /* LOG tmp, s0.xxxx */
4203    emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &src0_xxxx,
4204                         FALSE);
4205 
4206    /* MUL tmp, tmp, s1.xxxx */
4207    emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, &tmp_src,
4208                         &src1_xxxx, FALSE);
4209 
4210    /* EXP tmp, s0.xxxx */
4211    emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0],
4212                         &tmp_src, inst->Instruction.Saturate);
4213 
4214    /* free tmp */
4215    free_temp_indexes(emit);
4216 
4217    return TRUE;
4218 }
4219 
4220 
4221 /**
4222  * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction.
4223  */
4224 static boolean
emit_rcp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)4225 emit_rcp(struct svga_shader_emitter_v10 *emit,
4226          const struct tgsi_full_instruction *inst)
4227 {
4228    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
4229 
4230    unsigned tmp = get_temp_index(emit);
4231    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4232    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4233 
4234    struct tgsi_full_dst_register tmp_dst_x =
4235       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
4236    struct tgsi_full_src_register tmp_src_xxxx =
4237       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
4238 
4239    /* DIV tmp.x, 1.0, s0 */
4240    emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst_x, &one,
4241                         &inst->Src[0], FALSE);
4242 
4243    /* MOV dst, tmp.xxxx */
4244    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
4245                         &tmp_src_xxxx, inst->Instruction.Saturate);
4246 
4247    free_temp_indexes(emit);
4248 
4249    return TRUE;
4250 }
4251 
4252 
4253 /**
4254  * Emit code for TGSI_OPCODE_RSQ instruction.
4255  */
4256 static boolean
emit_rsq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)4257 emit_rsq(struct svga_shader_emitter_v10 *emit,
4258          const struct tgsi_full_instruction *inst)
4259 {
4260    /* dst = RSQ(src):
4261     *   dst.xyzw = 1 / sqrt(src.x)
4262     * Translates into:
4263     *   RSQ tmp, src.x
4264     *   MOV dst, tmp.xxxx
4265     */
4266 
4267    unsigned tmp = get_temp_index(emit);
4268    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4269    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4270 
4271    struct tgsi_full_dst_register tmp_dst_x =
4272       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
4273    struct tgsi_full_src_register tmp_src_xxxx =
4274       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
4275 
4276    /* RSQ tmp, src.x */
4277    emit_instruction_op1(emit, VGPU10_OPCODE_RSQ, &tmp_dst_x,
4278                         &inst->Src[0], FALSE);
4279 
4280    /* MOV dst, tmp.xxxx */
4281    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
4282                         &tmp_src_xxxx, inst->Instruction.Saturate);
4283 
4284    /* free tmp */
4285    free_temp_indexes(emit);
4286 
4287    return TRUE;
4288 }
4289 
4290 
4291 /**
4292  * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
4293  */
4294 static boolean
emit_seq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)4295 emit_seq(struct svga_shader_emitter_v10 *emit,
4296          const struct tgsi_full_instruction *inst)
4297 {
4298    /* dst = SEQ(s0, s1):
4299     *   dst = s0 == s1 ? 1.0 : 0.0  (per component)
4300     * Translates into:
4301     *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
4302     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
4303     */
4304    unsigned tmp = get_temp_index(emit);
4305    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4306    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4307    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
4308    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
4309 
4310    /* EQ tmp, s0, s1 */
4311    emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0],
4312                         &inst->Src[1], FALSE);
4313 
4314    /* MOVC dst, tmp, one, zero */
4315    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
4316                         &one, &zero, FALSE);
4317 
4318    free_temp_indexes(emit);
4319 
4320    return TRUE;
4321 }
4322 
4323 
4324 /**
4325  * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction.
4326  */
4327 static boolean
emit_sge(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)4328 emit_sge(struct svga_shader_emitter_v10 *emit,
4329          const struct tgsi_full_instruction *inst)
4330 {
4331    /* dst = SGE(s0, s1):
4332     *   dst = s0 >= s1 ? 1.0 : 0.0  (per component)
4333     * Translates into:
4334     *   GE tmp, s0, s1;           tmp = s0 >= s1 : 0xffffffff : 0 (per comp)
4335     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
4336     */
4337    unsigned tmp = get_temp_index(emit);
4338    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4339    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4340    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
4341    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
4342 
4343    /* GE tmp, s0, s1 */
4344    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0],
4345                         &inst->Src[1], FALSE);
4346 
4347    /* MOVC dst, tmp, one, zero */
4348    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
4349                         &one, &zero, FALSE);
4350 
4351    free_temp_indexes(emit);
4352 
4353    return TRUE;
4354 }
4355 
4356 
4357 /**
4358  * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction.
4359  */
4360 static boolean
emit_sgt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)4361 emit_sgt(struct svga_shader_emitter_v10 *emit,
4362          const struct tgsi_full_instruction *inst)
4363 {
4364    /* dst = SGT(s0, s1):
4365     *   dst = s0 > s1 ? 1.0 : 0.0  (per component)
4366     * Translates into:
4367     *   LT tmp, s1, s0;           tmp = s1 < s0 ? 0xffffffff : 0 (per comp)
4368     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
4369     */
4370    unsigned tmp = get_temp_index(emit);
4371    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4372    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4373    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
4374    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
4375 
4376    /* LT tmp, s1, s0 */
4377    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1],
4378                         &inst->Src[0], FALSE);
4379 
4380    /* MOVC dst, tmp, one, zero */
4381    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
4382                         &one, &zero, FALSE);
4383 
4384    free_temp_indexes(emit);
4385 
4386    return TRUE;
4387 }
4388 
4389 
4390 /**
4391  * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions.
4392  */
4393 static boolean
emit_sincos(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)4394 emit_sincos(struct svga_shader_emitter_v10 *emit,
4395          const struct tgsi_full_instruction *inst)
4396 {
4397    unsigned tmp = get_temp_index(emit);
4398    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4399    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4400 
4401    struct tgsi_full_src_register tmp_src_xxxx =
4402       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
4403    struct tgsi_full_dst_register tmp_dst_x =
4404       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
4405 
4406    begin_emit_instruction(emit);
4407    emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE);
4408 
4409    if(inst->Instruction.Opcode == TGSI_OPCODE_SIN)
4410    {
4411       emit_dst_register(emit, &tmp_dst_x);  /* first destination register */
4412       emit_null_dst_register(emit);  /* second destination register */
4413    }
4414    else {
4415       emit_null_dst_register(emit);
4416       emit_dst_register(emit, &tmp_dst_x);
4417    }
4418 
4419    emit_src_register(emit, &inst->Src[0]);
4420    end_emit_instruction(emit);
4421 
4422    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
4423                         &tmp_src_xxxx, inst->Instruction.Saturate);
4424 
4425    free_temp_indexes(emit);
4426 
4427    return TRUE;
4428 }
4429 
4430 
4431 /**
4432  * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction.
4433  */
4434 static boolean
emit_sle(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)4435 emit_sle(struct svga_shader_emitter_v10 *emit,
4436          const struct tgsi_full_instruction *inst)
4437 {
4438    /* dst = SLE(s0, s1):
4439     *   dst = s0 <= s1 ? 1.0 : 0.0  (per component)
4440     * Translates into:
4441     *   GE tmp, s1, s0;           tmp = s1 >= s0 : 0xffffffff : 0 (per comp)
4442     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
4443     */
4444    unsigned tmp = get_temp_index(emit);
4445    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4446    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4447    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
4448    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
4449 
4450    /* GE tmp, s1, s0 */
4451    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1],
4452                         &inst->Src[0], FALSE);
4453 
4454    /* MOVC dst, tmp, one, zero */
4455    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
4456                         &one, &zero, FALSE);
4457 
4458    free_temp_indexes(emit);
4459 
4460    return TRUE;
4461 }
4462 
4463 
4464 /**
4465  * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction.
4466  */
4467 static boolean
emit_slt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)4468 emit_slt(struct svga_shader_emitter_v10 *emit,
4469          const struct tgsi_full_instruction *inst)
4470 {
4471    /* dst = SLT(s0, s1):
4472     *   dst = s0 < s1 ? 1.0 : 0.0  (per component)
4473     * Translates into:
4474     *   LT tmp, s0, s1;           tmp = s0 < s1 ? 0xffffffff : 0 (per comp)
4475     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
4476     */
4477    unsigned tmp = get_temp_index(emit);
4478    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4479    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4480    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
4481    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
4482 
4483    /* LT tmp, s0, s1 */
4484    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
4485                         &inst->Src[1], FALSE);
4486 
4487    /* MOVC dst, tmp, one, zero */
4488    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
4489                         &one, &zero, FALSE);
4490 
4491    free_temp_indexes(emit);
4492 
4493    return TRUE;
4494 }
4495 
4496 
4497 /**
4498  * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction.
4499  */
4500 static boolean
emit_sne(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)4501 emit_sne(struct svga_shader_emitter_v10 *emit,
4502          const struct tgsi_full_instruction *inst)
4503 {
4504    /* dst = SNE(s0, s1):
4505     *   dst = s0 != s1 ? 1.0 : 0.0  (per component)
4506     * Translates into:
4507     *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
4508     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
4509     */
4510    unsigned tmp = get_temp_index(emit);
4511    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4512    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4513    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
4514    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
4515 
4516    /* NE tmp, s0, s1 */
4517    emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0],
4518                         &inst->Src[1], FALSE);
4519 
4520    /* MOVC dst, tmp, one, zero */
4521    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
4522                         &one, &zero, FALSE);
4523 
4524    free_temp_indexes(emit);
4525 
4526    return TRUE;
4527 }
4528 
4529 
4530 /**
4531  * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction.
4532  */
4533 static boolean
emit_ssg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)4534 emit_ssg(struct svga_shader_emitter_v10 *emit,
4535          const struct tgsi_full_instruction *inst)
4536 {
4537    /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
4538     * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
4539     * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
4540     * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
4541     * Translates into:
4542     *   LT tmp1, src, zero;           tmp1 = src < zero ? 0xffffffff : 0 (per comp)
4543     *   MOVC tmp2, tmp1, -1.0, 0.0;   tmp2 = tmp1 ? -1.0 : 0.0 (per component)
4544     *   LT tmp1, zero, src;           tmp1 = zero < src ? 0xffffffff : 0 (per comp)
4545     *   MOVC dst, tmp1, 1.0, tmp2;    dst = tmp1 ? 1.0 : tmp2 (per component)
4546     */
4547    struct tgsi_full_src_register zero =
4548       make_immediate_reg_float(emit, 0.0f);
4549    struct tgsi_full_src_register one =
4550       make_immediate_reg_float(emit, 1.0f);
4551    struct tgsi_full_src_register neg_one =
4552       make_immediate_reg_float(emit, -1.0f);
4553 
4554    unsigned tmp1 = get_temp_index(emit);
4555    struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
4556    struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
4557 
4558    unsigned tmp2 = get_temp_index(emit);
4559    struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
4560    struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
4561 
4562    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0],
4563                         &zero, FALSE);
4564    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src,
4565                         &neg_one, &zero, FALSE);
4566    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero,
4567                         &inst->Src[0], FALSE);
4568    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src,
4569                         &one, &tmp2_src, FALSE);
4570 
4571    free_temp_indexes(emit);
4572 
4573    return TRUE;
4574 }
4575 
4576 
4577 /**
4578  * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction.
4579  */
4580 static boolean
emit_issg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)4581 emit_issg(struct svga_shader_emitter_v10 *emit,
4582           const struct tgsi_full_instruction *inst)
4583 {
4584    /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
4585     * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
4586     * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
4587     * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
4588     * Translates into:
4589     *   ILT tmp1, src, 0              tmp1 = src < 0 ? -1 : 0 (per component)
4590     *   ILT tmp2, 0, src              tmp2 = 0 < src ? -1 : 0 (per component)
4591     *   IADD dst, tmp1, neg(tmp2)     dst  = tmp1 - tmp2      (per component)
4592     */
4593    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
4594 
4595    unsigned tmp1 = get_temp_index(emit);
4596    struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
4597    struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
4598 
4599    unsigned tmp2 = get_temp_index(emit);
4600    struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
4601    struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
4602 
4603    struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src);
4604 
4605    emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst,
4606                         &inst->Src[0], &zero, FALSE);
4607    emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst,
4608                         &zero, &inst->Src[0], FALSE);
4609    emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0],
4610                         &tmp1_src, &neg_tmp2, FALSE);
4611 
4612    free_temp_indexes(emit);
4613 
4614    return TRUE;
4615 }
4616 
4617 
4618 /**
4619  * Emit a comparison instruction.  The dest register will get
4620  * 0 or ~0 values depending on the outcome of comparing src0 to src1.
4621  */
4622 static void
emit_comparison(struct svga_shader_emitter_v10 * emit,SVGA3dCmpFunc func,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src0,const struct tgsi_full_src_register * src1)4623 emit_comparison(struct svga_shader_emitter_v10 *emit,
4624                 SVGA3dCmpFunc func,
4625                 const struct tgsi_full_dst_register *dst,
4626                 const struct tgsi_full_src_register *src0,
4627                 const struct tgsi_full_src_register *src1)
4628 {
4629    struct tgsi_full_src_register immediate;
4630    VGPU10OpcodeToken0 opcode0;
4631    boolean swapSrc = FALSE;
4632 
4633    /* Sanity checks for svga vs. gallium enums */
4634    STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1));
4635    STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1));
4636 
4637    opcode0.value = 0;
4638 
4639    switch (func) {
4640    case SVGA3D_CMP_NEVER:
4641       immediate = make_immediate_reg_int(emit, 0);
4642       /* MOV dst, {0} */
4643       begin_emit_instruction(emit);
4644       emit_dword(emit, VGPU10_OPCODE_MOV);
4645       emit_dst_register(emit, dst);
4646       emit_src_register(emit, &immediate);
4647       end_emit_instruction(emit);
4648       return;
4649    case SVGA3D_CMP_ALWAYS:
4650       immediate = make_immediate_reg_int(emit, -1);
4651       /* MOV dst, {-1} */
4652       begin_emit_instruction(emit);
4653       emit_dword(emit, VGPU10_OPCODE_MOV);
4654       emit_dst_register(emit, dst);
4655       emit_src_register(emit, &immediate);
4656       end_emit_instruction(emit);
4657       return;
4658    case SVGA3D_CMP_LESS:
4659       opcode0.opcodeType = VGPU10_OPCODE_LT;
4660       break;
4661    case SVGA3D_CMP_EQUAL:
4662       opcode0.opcodeType = VGPU10_OPCODE_EQ;
4663       break;
4664    case SVGA3D_CMP_LESSEQUAL:
4665       opcode0.opcodeType = VGPU10_OPCODE_GE;
4666       swapSrc = TRUE;
4667       break;
4668    case SVGA3D_CMP_GREATER:
4669       opcode0.opcodeType = VGPU10_OPCODE_LT;
4670       swapSrc = TRUE;
4671       break;
4672    case SVGA3D_CMP_NOTEQUAL:
4673       opcode0.opcodeType = VGPU10_OPCODE_NE;
4674       break;
4675    case SVGA3D_CMP_GREATEREQUAL:
4676       opcode0.opcodeType = VGPU10_OPCODE_GE;
4677       break;
4678    default:
4679       assert(!"Unexpected comparison mode");
4680       opcode0.opcodeType = VGPU10_OPCODE_EQ;
4681    }
4682 
4683    begin_emit_instruction(emit);
4684    emit_dword(emit, opcode0.value);
4685    emit_dst_register(emit, dst);
4686    if (swapSrc) {
4687       emit_src_register(emit, src1);
4688       emit_src_register(emit, src0);
4689    }
4690    else {
4691       emit_src_register(emit, src0);
4692       emit_src_register(emit, src1);
4693    }
4694    end_emit_instruction(emit);
4695 }
4696 
4697 
4698 /**
4699  * Get texel/address offsets for a texture instruction.
4700  */
4701 static void
get_texel_offsets(const struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,int offsets[3])4702 get_texel_offsets(const struct svga_shader_emitter_v10 *emit,
4703                   const struct tgsi_full_instruction *inst, int offsets[3])
4704 {
4705    if (inst->Texture.NumOffsets == 1) {
4706       /* According to OpenGL Shader Language spec the offsets are only
4707        * fetched from a previously-declared immediate/literal.
4708        */
4709       const struct tgsi_texture_offset *off = inst->TexOffsets;
4710       const unsigned index = off[0].Index;
4711       const unsigned swizzleX = off[0].SwizzleX;
4712       const unsigned swizzleY = off[0].SwizzleY;
4713       const unsigned swizzleZ = off[0].SwizzleZ;
4714       const union tgsi_immediate_data *imm = emit->immediates[index];
4715 
4716       assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE);
4717 
4718       offsets[0] = imm[swizzleX].Int;
4719       offsets[1] = imm[swizzleY].Int;
4720       offsets[2] = imm[swizzleZ].Int;
4721    }
4722    else {
4723       offsets[0] = offsets[1] = offsets[2] = 0;
4724    }
4725 }
4726 
4727 
4728 /**
4729  * Set up the coordinate register for texture sampling.
4730  * When we're sampling from a RECT texture we have to scale the
4731  * unnormalized coordinate to a normalized coordinate.
4732  * We do that by multiplying the coordinate by an "extra" constant.
4733  * An alternative would be to use the RESINFO instruction to query the
4734  * texture's size.
4735  */
4736 static struct tgsi_full_src_register
setup_texcoord(struct svga_shader_emitter_v10 * emit,unsigned unit,const struct tgsi_full_src_register * coord)4737 setup_texcoord(struct svga_shader_emitter_v10 *emit,
4738                unsigned unit,
4739                const struct tgsi_full_src_register *coord)
4740 {
4741    if (emit->key.tex[unit].unnormalized) {
4742       unsigned scale_index = emit->texcoord_scale_index[unit];
4743       unsigned tmp = get_temp_index(emit);
4744       struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4745       struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4746       struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index);
4747 
4748       if (emit->key.tex[unit].texel_bias) {
4749          /* to fix texture coordinate rounding issue, 0.0001 offset is
4750           * been added. This fixes piglit test fbo-blit-scaled-linear. */
4751          struct tgsi_full_src_register offset =
4752             make_immediate_reg_float(emit, 0.0001f);
4753 
4754          /* ADD tmp, coord, offset */
4755          emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_dst,
4756                               coord, &offset, FALSE);
4757          /* MUL tmp, tmp, scale */
4758          emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
4759                               &tmp_src, &scale_src, FALSE);
4760       }
4761       else {
4762          /* MUL tmp, coord, const[] */
4763          emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
4764                               coord, &scale_src, FALSE);
4765       }
4766       return tmp_src;
4767    }
4768    else {
4769       /* use texcoord as-is */
4770       return *coord;
4771    }
4772 }
4773 
4774 
4775 /**
4776  * For SAMPLE_C instructions, emit the extra src register which indicates
4777  * the reference/comparision value.
4778  */
4779 static void
emit_tex_compare_refcoord(struct svga_shader_emitter_v10 * emit,enum tgsi_texture_type target,const struct tgsi_full_src_register * coord)4780 emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit,
4781                           enum tgsi_texture_type target,
4782                           const struct tgsi_full_src_register *coord)
4783 {
4784    struct tgsi_full_src_register coord_src_ref;
4785    int component;
4786 
4787    assert(tgsi_is_shadow_target(target));
4788 
4789    component = tgsi_util_get_shadow_ref_src_index(target) % 4;
4790    assert(component >= 0);
4791 
4792    coord_src_ref = scalar_src(coord, component);
4793 
4794    emit_src_register(emit, &coord_src_ref);
4795 }
4796 
4797 
4798 /**
4799  * Info for implementing texture swizzles.
4800  * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle()
4801  * functions use this to encapsulate the extra steps needed to perform
4802  * a texture swizzle, or shadow/depth comparisons.
4803  * The shadow/depth comparison is only done here if for the cases where
4804  * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare).
4805  */
4806 struct tex_swizzle_info
4807 {
4808    boolean swizzled;
4809    boolean shadow_compare;
4810    unsigned unit;
4811    enum tgsi_texture_type texture_target;  /**< TGSI_TEXTURE_x */
4812    struct tgsi_full_src_register tmp_src;
4813    struct tgsi_full_dst_register tmp_dst;
4814    const struct tgsi_full_dst_register *inst_dst;
4815    const struct tgsi_full_src_register *coord_src;
4816 };
4817 
4818 
4819 /**
4820  * Do setup for handling texture swizzles or shadow compares.
4821  * \param unit  the texture unit
4822  * \param inst  the TGSI texture instruction
4823  * \param shadow_compare  do shadow/depth comparison?
4824  * \param swz  returns the swizzle info
4825  */
4826 static void
begin_tex_swizzle(struct svga_shader_emitter_v10 * emit,unsigned unit,const struct tgsi_full_instruction * inst,boolean shadow_compare,struct tex_swizzle_info * swz)4827 begin_tex_swizzle(struct svga_shader_emitter_v10 *emit,
4828                   unsigned unit,
4829                   const struct tgsi_full_instruction *inst,
4830                   boolean shadow_compare,
4831                   struct tex_swizzle_info *swz)
4832 {
4833    swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X ||
4834                     emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y ||
4835                     emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z ||
4836                     emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W);
4837 
4838    swz->shadow_compare = shadow_compare;
4839    swz->texture_target = inst->Texture.Texture;
4840 
4841    if (swz->swizzled || shadow_compare) {
4842       /* Allocate temp register for the result of the SAMPLE instruction
4843        * and the source of the MOV/compare/swizzle instructions.
4844        */
4845       unsigned tmp = get_temp_index(emit);
4846       swz->tmp_src = make_src_temp_reg(tmp);
4847       swz->tmp_dst = make_dst_temp_reg(tmp);
4848 
4849       swz->unit = unit;
4850    }
4851    swz->inst_dst = &inst->Dst[0];
4852    swz->coord_src = &inst->Src[0];
4853 
4854    emit->fs.shadow_compare_units |= shadow_compare << unit;
4855 }
4856 
4857 
4858 /**
4859  * Returns the register to put the SAMPLE instruction results into.
4860  * This will either be the original instruction dst reg (if no swizzle
4861  * and no shadow comparison) or a temporary reg if there is a swizzle.
4862  */
4863 static const struct tgsi_full_dst_register *
get_tex_swizzle_dst(const struct tex_swizzle_info * swz)4864 get_tex_swizzle_dst(const struct tex_swizzle_info *swz)
4865 {
4866    return (swz->swizzled || swz->shadow_compare)
4867       ? &swz->tmp_dst : swz->inst_dst;
4868 }
4869 
4870 
4871 /**
4872  * This emits the MOV instruction that actually implements a texture swizzle
4873  * and/or shadow comparison.
4874  */
4875 static void
end_tex_swizzle(struct svga_shader_emitter_v10 * emit,const struct tex_swizzle_info * swz)4876 end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
4877                 const struct tex_swizzle_info *swz)
4878 {
4879    if (swz->shadow_compare) {
4880       /* Emit extra instructions to compare the fetched texel value against
4881        * a texture coordinate component.  The result of the comparison
4882        * is 0.0 or 1.0.
4883        */
4884       struct tgsi_full_src_register coord_src;
4885       struct tgsi_full_src_register texel_src =
4886          scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X);
4887       struct tgsi_full_src_register one =
4888          make_immediate_reg_float(emit, 1.0f);
4889       /* convert gallium comparison func to SVGA comparison func */
4890       SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1;
4891 
4892       assert(emit->unit == PIPE_SHADER_FRAGMENT);
4893 
4894       int component =
4895          tgsi_util_get_shadow_ref_src_index(swz->texture_target) % 4;
4896       assert(component >= 0);
4897       coord_src = scalar_src(swz->coord_src, component);
4898 
4899       /* COMPARE tmp, coord, texel */
4900       emit_comparison(emit, compare_func,
4901                       &swz->tmp_dst, &coord_src, &texel_src);
4902 
4903       /* AND dest, tmp, {1.0} */
4904       begin_emit_instruction(emit);
4905       emit_opcode(emit, VGPU10_OPCODE_AND, FALSE);
4906       if (swz->swizzled) {
4907          emit_dst_register(emit, &swz->tmp_dst);
4908       }
4909       else {
4910          emit_dst_register(emit, swz->inst_dst);
4911       }
4912       emit_src_register(emit, &swz->tmp_src);
4913       emit_src_register(emit, &one);
4914       end_emit_instruction(emit);
4915    }
4916 
4917    if (swz->swizzled) {
4918       unsigned swz_r = emit->key.tex[swz->unit].swizzle_r;
4919       unsigned swz_g = emit->key.tex[swz->unit].swizzle_g;
4920       unsigned swz_b = emit->key.tex[swz->unit].swizzle_b;
4921       unsigned swz_a = emit->key.tex[swz->unit].swizzle_a;
4922       unsigned writemask_0 = 0, writemask_1 = 0;
4923       boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]);
4924 
4925       /* Swizzle w/out zero/one terms */
4926       struct tgsi_full_src_register src_swizzled =
4927          swizzle_src(&swz->tmp_src,
4928                      swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X,
4929                      swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y,
4930                      swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z,
4931                      swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W);
4932 
4933       /* MOV dst, color(tmp).<swizzle> */
4934       emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
4935                            swz->inst_dst, &src_swizzled, FALSE);
4936 
4937       /* handle swizzle zero terms */
4938       writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) |
4939                      ((swz_g == PIPE_SWIZZLE_0) << 1) |
4940                      ((swz_b == PIPE_SWIZZLE_0) << 2) |
4941                      ((swz_a == PIPE_SWIZZLE_0) << 3));
4942       writemask_0 &= swz->inst_dst->Register.WriteMask;
4943 
4944       if (writemask_0) {
4945          struct tgsi_full_src_register zero = int_tex ?
4946             make_immediate_reg_int(emit, 0) :
4947             make_immediate_reg_float(emit, 0.0f);
4948          struct tgsi_full_dst_register dst =
4949             writemask_dst(swz->inst_dst, writemask_0);
4950 
4951          /* MOV dst.writemask_0, {0,0,0,0} */
4952          emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
4953                               &dst, &zero, FALSE);
4954       }
4955 
4956       /* handle swizzle one terms */
4957       writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) |
4958                      ((swz_g == PIPE_SWIZZLE_1) << 1) |
4959                      ((swz_b == PIPE_SWIZZLE_1) << 2) |
4960                      ((swz_a == PIPE_SWIZZLE_1) << 3));
4961       writemask_1 &= swz->inst_dst->Register.WriteMask;
4962 
4963       if (writemask_1) {
4964          struct tgsi_full_src_register one = int_tex ?
4965             make_immediate_reg_int(emit, 1) :
4966             make_immediate_reg_float(emit, 1.0f);
4967          struct tgsi_full_dst_register dst =
4968             writemask_dst(swz->inst_dst, writemask_1);
4969 
4970          /* MOV dst.writemask_1, {1,1,1,1} */
4971          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one, FALSE);
4972       }
4973    }
4974 }
4975 
4976 
4977 /**
4978  * Emit code for TGSI_OPCODE_SAMPLE instruction.
4979  */
4980 static boolean
emit_sample(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)4981 emit_sample(struct svga_shader_emitter_v10 *emit,
4982             const struct tgsi_full_instruction *inst)
4983 {
4984    const unsigned resource_unit = inst->Src[1].Register.Index;
4985    const unsigned sampler_unit = inst->Src[2].Register.Index;
4986    struct tgsi_full_src_register coord;
4987    int offsets[3];
4988    struct tex_swizzle_info swz_info;
4989 
4990    begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info);
4991 
4992    get_texel_offsets(emit, inst, offsets);
4993 
4994    coord = setup_texcoord(emit, resource_unit, &inst->Src[0]);
4995 
4996    /* SAMPLE dst, coord(s0), resource, sampler */
4997    begin_emit_instruction(emit);
4998 
4999    /* NOTE: for non-fragment shaders, we should use VGPU10_OPCODE_SAMPLE_L
5000     * with LOD=0.  But our virtual GPU accepts this as-is.
5001     */
5002    emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE,
5003                       inst->Instruction.Saturate, offsets);
5004    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
5005    emit_src_register(emit, &coord);
5006    emit_resource_register(emit, resource_unit);
5007    emit_sampler_register(emit, sampler_unit);
5008    end_emit_instruction(emit);
5009 
5010    end_tex_swizzle(emit, &swz_info);
5011 
5012    free_temp_indexes(emit);
5013 
5014    return TRUE;
5015 }
5016 
5017 
5018 /**
5019  * Check if a texture instruction is valid.
5020  * An example of an invalid texture instruction is doing shadow comparison
5021  * with an integer-valued texture.
5022  * If we detect an invalid texture instruction, we replace it with:
5023  *   MOV dst, {1,1,1,1};
5024  * \return TRUE if valid, FALSE if invalid.
5025  */
5026 static boolean
is_valid_tex_instruction(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)5027 is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit,
5028                          const struct tgsi_full_instruction *inst)
5029 {
5030    const unsigned unit = inst->Src[1].Register.Index;
5031    const enum tgsi_texture_type target = inst->Texture.Texture;
5032    boolean valid = TRUE;
5033 
5034    if (tgsi_is_shadow_target(target) &&
5035        is_integer_type(emit->sampler_return_type[unit])) {
5036       debug_printf("Invalid SAMPLE_C with an integer texture!\n");
5037       valid = FALSE;
5038    }
5039    /* XXX might check for other conditions in the future here */
5040 
5041    if (!valid) {
5042       /* emit a MOV dst, {1,1,1,1} instruction. */
5043       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
5044       begin_emit_instruction(emit);
5045       emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
5046       emit_dst_register(emit, &inst->Dst[0]);
5047       emit_src_register(emit, &one);
5048       end_emit_instruction(emit);
5049    }
5050 
5051    return valid;
5052 }
5053 
5054 
5055 /**
5056  * Emit code for TGSI_OPCODE_TEX (simple texture lookup)
5057  */
5058 static boolean
emit_tex(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)5059 emit_tex(struct svga_shader_emitter_v10 *emit,
5060          const struct tgsi_full_instruction *inst)
5061 {
5062    const uint unit = inst->Src[1].Register.Index;
5063    const enum tgsi_texture_type target = inst->Texture.Texture;
5064    unsigned opcode;
5065    struct tgsi_full_src_register coord;
5066    int offsets[3];
5067    struct tex_swizzle_info swz_info;
5068 
5069    /* check that the sampler returns a float */
5070    if (!is_valid_tex_instruction(emit, inst))
5071       return TRUE;
5072 
5073    begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
5074 
5075    get_texel_offsets(emit, inst, offsets);
5076 
5077    coord = setup_texcoord(emit, unit, &inst->Src[0]);
5078 
5079    /* SAMPLE dst, coord(s0), resource, sampler */
5080    begin_emit_instruction(emit);
5081 
5082    if (tgsi_is_shadow_target(target))
5083       opcode = VGPU10_OPCODE_SAMPLE_C;
5084    else
5085       opcode = VGPU10_OPCODE_SAMPLE;
5086 
5087    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
5088    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
5089    emit_src_register(emit, &coord);
5090    emit_resource_register(emit, unit);
5091    emit_sampler_register(emit, unit);
5092    if (opcode == VGPU10_OPCODE_SAMPLE_C) {
5093       emit_tex_compare_refcoord(emit, target, &coord);
5094    }
5095    end_emit_instruction(emit);
5096 
5097    end_tex_swizzle(emit, &swz_info);
5098 
5099    free_temp_indexes(emit);
5100 
5101    return TRUE;
5102 }
5103 
5104 
5105 /**
5106  * Emit code for TGSI_OPCODE_TXP (projective texture)
5107  */
5108 static boolean
emit_txp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)5109 emit_txp(struct svga_shader_emitter_v10 *emit,
5110          const struct tgsi_full_instruction *inst)
5111 {
5112    const uint unit = inst->Src[1].Register.Index;
5113    const enum tgsi_texture_type target = inst->Texture.Texture;
5114    unsigned opcode;
5115    int offsets[3];
5116    unsigned tmp = get_temp_index(emit);
5117    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
5118    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
5119    struct tgsi_full_src_register src0_wwww =
5120       scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
5121    struct tgsi_full_src_register coord;
5122    struct tex_swizzle_info swz_info;
5123 
5124    /* check that the sampler returns a float */
5125    if (!is_valid_tex_instruction(emit, inst))
5126       return TRUE;
5127 
5128    begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
5129 
5130    get_texel_offsets(emit, inst, offsets);
5131 
5132    coord = setup_texcoord(emit, unit, &inst->Src[0]);
5133 
5134    /* DIV tmp, coord, coord.wwww */
5135    emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst,
5136                         &coord, &src0_wwww, FALSE);
5137 
5138    /* SAMPLE dst, coord(tmp), resource, sampler */
5139    begin_emit_instruction(emit);
5140 
5141    if (tgsi_is_shadow_target(target))
5142       /* NOTE: for non-fragment shaders, we should use
5143        * VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is.
5144        */
5145       opcode = VGPU10_OPCODE_SAMPLE_C;
5146    else
5147       opcode = VGPU10_OPCODE_SAMPLE;
5148 
5149    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
5150    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
5151    emit_src_register(emit, &tmp_src);  /* projected coord */
5152    emit_resource_register(emit, unit);
5153    emit_sampler_register(emit, unit);
5154    if (opcode == VGPU10_OPCODE_SAMPLE_C) {
5155       emit_tex_compare_refcoord(emit, target, &tmp_src);
5156    }
5157    end_emit_instruction(emit);
5158 
5159    end_tex_swizzle(emit, &swz_info);
5160 
5161    free_temp_indexes(emit);
5162 
5163    return TRUE;
5164 }
5165 
5166 
5167 /**
5168  * Emit code for TGSI_OPCODE_TXD (explicit derivatives)
5169  */
5170 static boolean
emit_txd(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)5171 emit_txd(struct svga_shader_emitter_v10 *emit,
5172          const struct tgsi_full_instruction *inst)
5173 {
5174    const uint unit = inst->Src[3].Register.Index;
5175    const enum tgsi_texture_type target = inst->Texture.Texture;
5176    int offsets[3];
5177    struct tgsi_full_src_register coord;
5178    struct tex_swizzle_info swz_info;
5179 
5180    begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
5181                      &swz_info);
5182 
5183    get_texel_offsets(emit, inst, offsets);
5184 
5185    coord = setup_texcoord(emit, unit, &inst->Src[0]);
5186 
5187    /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */
5188    begin_emit_instruction(emit);
5189    emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D,
5190                       inst->Instruction.Saturate, offsets);
5191    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
5192    emit_src_register(emit, &coord);
5193    emit_resource_register(emit, unit);
5194    emit_sampler_register(emit, unit);
5195    emit_src_register(emit, &inst->Src[1]);  /* Xderiv */
5196    emit_src_register(emit, &inst->Src[2]);  /* Yderiv */
5197    end_emit_instruction(emit);
5198 
5199    end_tex_swizzle(emit, &swz_info);
5200 
5201    free_temp_indexes(emit);
5202 
5203    return TRUE;
5204 }
5205 
5206 
5207 /**
5208  * Emit code for TGSI_OPCODE_TXF (texel fetch)
5209  */
5210 static boolean
emit_txf(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)5211 emit_txf(struct svga_shader_emitter_v10 *emit,
5212          const struct tgsi_full_instruction *inst)
5213 {
5214    const uint unit = inst->Src[1].Register.Index;
5215    const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture);
5216    int offsets[3];
5217    struct tex_swizzle_info swz_info;
5218 
5219    begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
5220 
5221    get_texel_offsets(emit, inst, offsets);
5222 
5223    if (msaa) {
5224       /* Fetch one sample from an MSAA texture */
5225       struct tgsi_full_src_register sampleIndex =
5226          scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
5227       /* LD_MS dst, coord(s0), resource, sampleIndex */
5228       begin_emit_instruction(emit);
5229       emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS,
5230                          inst->Instruction.Saturate, offsets);
5231       emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
5232       emit_src_register(emit, &inst->Src[0]);
5233       emit_resource_register(emit, unit);
5234       emit_src_register(emit, &sampleIndex);
5235       end_emit_instruction(emit);
5236    }
5237    else {
5238       /* Fetch one texel specified by integer coordinate */
5239       /* LD dst, coord(s0), resource */
5240       begin_emit_instruction(emit);
5241       emit_sample_opcode(emit, VGPU10_OPCODE_LD,
5242                          inst->Instruction.Saturate, offsets);
5243       emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
5244       emit_src_register(emit, &inst->Src[0]);
5245       emit_resource_register(emit, unit);
5246       end_emit_instruction(emit);
5247    }
5248 
5249    end_tex_swizzle(emit, &swz_info);
5250 
5251    free_temp_indexes(emit);
5252 
5253    return TRUE;
5254 }
5255 
5256 
5257 /**
5258  * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias)
5259  * or TGSI_OPCODE_TXB2 (for cube shadow maps).
5260  */
5261 static boolean
emit_txl_txb(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)5262 emit_txl_txb(struct svga_shader_emitter_v10 *emit,
5263              const struct tgsi_full_instruction *inst)
5264 {
5265    const enum tgsi_texture_type target = inst->Texture.Texture;
5266    unsigned opcode, unit;
5267    int offsets[3];
5268    struct tgsi_full_src_register coord, lod_bias;
5269    struct tex_swizzle_info swz_info;
5270 
5271    assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
5272           inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
5273           inst->Instruction.Opcode == TGSI_OPCODE_TXB2);
5274 
5275    if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
5276       lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
5277       unit = inst->Src[2].Register.Index;
5278    }
5279    else {
5280       lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
5281       unit = inst->Src[1].Register.Index;
5282    }
5283 
5284    begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
5285                      &swz_info);
5286 
5287    get_texel_offsets(emit, inst, offsets);
5288 
5289    coord = setup_texcoord(emit, unit, &inst->Src[0]);
5290 
5291    /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */
5292    begin_emit_instruction(emit);
5293    if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
5294       opcode = VGPU10_OPCODE_SAMPLE_L;
5295    }
5296    else {
5297       opcode = VGPU10_OPCODE_SAMPLE_B;
5298    }
5299    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
5300    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
5301    emit_src_register(emit, &coord);
5302    emit_resource_register(emit, unit);
5303    emit_sampler_register(emit, unit);
5304    emit_src_register(emit, &lod_bias);
5305    end_emit_instruction(emit);
5306 
5307    end_tex_swizzle(emit, &swz_info);
5308 
5309    free_temp_indexes(emit);
5310 
5311    return TRUE;
5312 }
5313 
5314 
5315 /**
5316  * Emit code for TGSI_OPCODE_TXQ (texture query) instruction.
5317  */
5318 static boolean
emit_txq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)5319 emit_txq(struct svga_shader_emitter_v10 *emit,
5320          const struct tgsi_full_instruction *inst)
5321 {
5322    const uint unit = inst->Src[1].Register.Index;
5323 
5324    if (emit->sampler_target[unit] == TGSI_TEXTURE_BUFFER) {
5325       /* RESINFO does not support querying texture buffers, so we instead
5326        * store texture buffer sizes in shader constants, then copy them to
5327        * implement TXQ instead of emitting RESINFO.
5328        * MOV dst, const[texture_buffer_size_index[unit]]
5329        */
5330       struct tgsi_full_src_register size_src =
5331          make_src_const_reg(emit->texture_buffer_size_index[unit]);
5332       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src,
5333                            FALSE);
5334    } else {
5335       /* RESINFO dst, srcMipLevel, resource */
5336       begin_emit_instruction(emit);
5337       emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
5338       emit_dst_register(emit, &inst->Dst[0]);
5339       emit_src_register(emit, &inst->Src[0]);
5340       emit_resource_register(emit, unit);
5341       end_emit_instruction(emit);
5342    }
5343 
5344    free_temp_indexes(emit);
5345 
5346    return TRUE;
5347 }
5348 
5349 
5350 /**
5351  * Emit a simple instruction (like ADD, MUL, MIN, etc).
5352  */
5353 static boolean
emit_simple(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)5354 emit_simple(struct svga_shader_emitter_v10 *emit,
5355             const struct tgsi_full_instruction *inst)
5356 {
5357    const unsigned opcode = inst->Instruction.Opcode;
5358    const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
5359    unsigned i;
5360 
5361    begin_emit_instruction(emit);
5362    emit_opcode(emit, translate_opcode(inst->Instruction.Opcode),
5363                inst->Instruction.Saturate);
5364    for (i = 0; i < op->num_dst; i++) {
5365       emit_dst_register(emit, &inst->Dst[i]);
5366    }
5367    for (i = 0; i < op->num_src; i++) {
5368       emit_src_register(emit, &inst->Src[i]);
5369    }
5370    end_emit_instruction(emit);
5371 
5372    return TRUE;
5373 }
5374 
5375 
5376 /**
5377  * We only special case the MOV instruction to try to detect constant
5378  * color writes in the fragment shader.
5379  */
5380 static boolean
emit_mov(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)5381 emit_mov(struct svga_shader_emitter_v10 *emit,
5382          const struct tgsi_full_instruction *inst)
5383 {
5384    const struct tgsi_full_src_register *src = &inst->Src[0];
5385    const struct tgsi_full_dst_register *dst = &inst->Dst[0];
5386 
5387    if (emit->unit == PIPE_SHADER_FRAGMENT &&
5388        dst->Register.File == TGSI_FILE_OUTPUT &&
5389        dst->Register.Index == 0 &&
5390        src->Register.File == TGSI_FILE_CONSTANT &&
5391        !src->Register.Indirect) {
5392       emit->constant_color_output = TRUE;
5393    }
5394 
5395    return emit_simple(emit, inst);
5396 }
5397 
5398 
5399 /**
5400  * Emit a simple VGPU10 instruction which writes to multiple dest registers,
5401  * where TGSI only uses one dest register.
5402  */
5403 static boolean
emit_simple_1dst(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,unsigned dst_count,unsigned dst_index)5404 emit_simple_1dst(struct svga_shader_emitter_v10 *emit,
5405                  const struct tgsi_full_instruction *inst,
5406                  unsigned dst_count,
5407                  unsigned dst_index)
5408 {
5409    const unsigned opcode = inst->Instruction.Opcode;
5410    const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
5411    unsigned i;
5412 
5413    begin_emit_instruction(emit);
5414    emit_opcode(emit, translate_opcode(inst->Instruction.Opcode),
5415                inst->Instruction.Saturate);
5416 
5417    for (i = 0; i < dst_count; i++) {
5418       if (i == dst_index) {
5419          emit_dst_register(emit, &inst->Dst[0]);
5420       } else {
5421          emit_null_dst_register(emit);
5422       }
5423    }
5424 
5425    for (i = 0; i < op->num_src; i++) {
5426       emit_src_register(emit, &inst->Src[i]);
5427    }
5428    end_emit_instruction(emit);
5429 
5430    return TRUE;
5431 }
5432 
5433 
5434 /**
5435  * Translate a single TGSI instruction to VGPU10.
5436  */
5437 static boolean
emit_vgpu10_instruction(struct svga_shader_emitter_v10 * emit,unsigned inst_number,const struct tgsi_full_instruction * inst)5438 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
5439                         unsigned inst_number,
5440                         const struct tgsi_full_instruction *inst)
5441 {
5442    const unsigned opcode = inst->Instruction.Opcode;
5443 
5444    switch (opcode) {
5445    case TGSI_OPCODE_ADD:
5446    case TGSI_OPCODE_AND:
5447    case TGSI_OPCODE_BGNLOOP:
5448    case TGSI_OPCODE_BRK:
5449    case TGSI_OPCODE_CEIL:
5450    case TGSI_OPCODE_CONT:
5451    case TGSI_OPCODE_DDX:
5452    case TGSI_OPCODE_DDY:
5453    case TGSI_OPCODE_DIV:
5454    case TGSI_OPCODE_DP2:
5455    case TGSI_OPCODE_DP3:
5456    case TGSI_OPCODE_DP4:
5457    case TGSI_OPCODE_ELSE:
5458    case TGSI_OPCODE_ENDIF:
5459    case TGSI_OPCODE_ENDLOOP:
5460    case TGSI_OPCODE_ENDSUB:
5461    case TGSI_OPCODE_F2I:
5462    case TGSI_OPCODE_F2U:
5463    case TGSI_OPCODE_FLR:
5464    case TGSI_OPCODE_FRC:
5465    case TGSI_OPCODE_FSEQ:
5466    case TGSI_OPCODE_FSGE:
5467    case TGSI_OPCODE_FSLT:
5468    case TGSI_OPCODE_FSNE:
5469    case TGSI_OPCODE_I2F:
5470    case TGSI_OPCODE_IMAX:
5471    case TGSI_OPCODE_IMIN:
5472    case TGSI_OPCODE_INEG:
5473    case TGSI_OPCODE_ISGE:
5474    case TGSI_OPCODE_ISHR:
5475    case TGSI_OPCODE_ISLT:
5476    case TGSI_OPCODE_MAD:
5477    case TGSI_OPCODE_MAX:
5478    case TGSI_OPCODE_MIN:
5479    case TGSI_OPCODE_MUL:
5480    case TGSI_OPCODE_NOP:
5481    case TGSI_OPCODE_NOT:
5482    case TGSI_OPCODE_OR:
5483    case TGSI_OPCODE_RET:
5484    case TGSI_OPCODE_UADD:
5485    case TGSI_OPCODE_USEQ:
5486    case TGSI_OPCODE_USGE:
5487    case TGSI_OPCODE_USLT:
5488    case TGSI_OPCODE_UMIN:
5489    case TGSI_OPCODE_UMAD:
5490    case TGSI_OPCODE_UMAX:
5491    case TGSI_OPCODE_ROUND:
5492    case TGSI_OPCODE_SQRT:
5493    case TGSI_OPCODE_SHL:
5494    case TGSI_OPCODE_TRUNC:
5495    case TGSI_OPCODE_U2F:
5496    case TGSI_OPCODE_UCMP:
5497    case TGSI_OPCODE_USHR:
5498    case TGSI_OPCODE_USNE:
5499    case TGSI_OPCODE_XOR:
5500       /* simple instructions */
5501       return emit_simple(emit, inst);
5502 
5503    case TGSI_OPCODE_MOV:
5504       return emit_mov(emit, inst);
5505    case TGSI_OPCODE_EMIT:
5506       return emit_vertex(emit, inst);
5507    case TGSI_OPCODE_ENDPRIM:
5508       return emit_endprim(emit, inst);
5509    case TGSI_OPCODE_IABS:
5510       return emit_iabs(emit, inst);
5511    case TGSI_OPCODE_ARL:
5512       /* fall-through */
5513    case TGSI_OPCODE_UARL:
5514       return emit_arl_uarl(emit, inst);
5515    case TGSI_OPCODE_BGNSUB:
5516       /* no-op */
5517       return TRUE;
5518    case TGSI_OPCODE_CAL:
5519       return emit_cal(emit, inst);
5520    case TGSI_OPCODE_CMP:
5521       return emit_cmp(emit, inst);
5522    case TGSI_OPCODE_COS:
5523       return emit_sincos(emit, inst);
5524    case TGSI_OPCODE_DST:
5525       return emit_dst(emit, inst);
5526    case TGSI_OPCODE_EX2:
5527       return emit_ex2(emit, inst);
5528    case TGSI_OPCODE_EXP:
5529       return emit_exp(emit, inst);
5530    case TGSI_OPCODE_IF:
5531       return emit_if(emit, inst);
5532    case TGSI_OPCODE_KILL:
5533       return emit_kill(emit, inst);
5534    case TGSI_OPCODE_KILL_IF:
5535       return emit_kill_if(emit, inst);
5536    case TGSI_OPCODE_LG2:
5537       return emit_lg2(emit, inst);
5538    case TGSI_OPCODE_LIT:
5539       return emit_lit(emit, inst);
5540    case TGSI_OPCODE_LOG:
5541       return emit_log(emit, inst);
5542    case TGSI_OPCODE_LRP:
5543       return emit_lrp(emit, inst);
5544    case TGSI_OPCODE_POW:
5545       return emit_pow(emit, inst);
5546    case TGSI_OPCODE_RCP:
5547       return emit_rcp(emit, inst);
5548    case TGSI_OPCODE_RSQ:
5549       return emit_rsq(emit, inst);
5550    case TGSI_OPCODE_SAMPLE:
5551       return emit_sample(emit, inst);
5552    case TGSI_OPCODE_SEQ:
5553       return emit_seq(emit, inst);
5554    case TGSI_OPCODE_SGE:
5555       return emit_sge(emit, inst);
5556    case TGSI_OPCODE_SGT:
5557       return emit_sgt(emit, inst);
5558    case TGSI_OPCODE_SIN:
5559       return emit_sincos(emit, inst);
5560    case TGSI_OPCODE_SLE:
5561       return emit_sle(emit, inst);
5562    case TGSI_OPCODE_SLT:
5563       return emit_slt(emit, inst);
5564    case TGSI_OPCODE_SNE:
5565       return emit_sne(emit, inst);
5566    case TGSI_OPCODE_SSG:
5567       return emit_ssg(emit, inst);
5568    case TGSI_OPCODE_ISSG:
5569       return emit_issg(emit, inst);
5570    case TGSI_OPCODE_TEX:
5571       return emit_tex(emit, inst);
5572    case TGSI_OPCODE_TXP:
5573       return emit_txp(emit, inst);
5574    case TGSI_OPCODE_TXB:
5575    case TGSI_OPCODE_TXB2:
5576    case TGSI_OPCODE_TXL:
5577       return emit_txl_txb(emit, inst);
5578    case TGSI_OPCODE_TXD:
5579       return emit_txd(emit, inst);
5580    case TGSI_OPCODE_TXF:
5581       return emit_txf(emit, inst);
5582    case TGSI_OPCODE_TXQ:
5583       return emit_txq(emit, inst);
5584    case TGSI_OPCODE_UIF:
5585       return emit_if(emit, inst);
5586    case TGSI_OPCODE_UMUL_HI:
5587    case TGSI_OPCODE_IMUL_HI:
5588    case TGSI_OPCODE_UDIV:
5589    case TGSI_OPCODE_IDIV:
5590       /* These cases use only the FIRST of two destination registers */
5591       return emit_simple_1dst(emit, inst, 2, 0);
5592    case TGSI_OPCODE_UMUL:
5593    case TGSI_OPCODE_UMOD:
5594    case TGSI_OPCODE_MOD:
5595       /* These cases use only the SECOND of two destination registers */
5596       return emit_simple_1dst(emit, inst, 2, 1);
5597    case TGSI_OPCODE_END:
5598       if (!emit_post_helpers(emit))
5599          return FALSE;
5600       return emit_simple(emit, inst);
5601 
5602    default:
5603       debug_printf("Unimplemented tgsi instruction %s\n",
5604                    tgsi_get_opcode_name(opcode));
5605       return FALSE;
5606    }
5607 
5608    return TRUE;
5609 }
5610 
5611 
5612 /**
5613  * Emit the extra instructions to adjust the vertex position.
5614  * There are two possible adjustments:
5615  * 1. Converting from Gallium to VGPU10 coordinate space by applying the
5616  *    "prescale" and "pretranslate" values.
5617  * 2. Undoing the viewport transformation when we use the swtnl/draw path.
5618  * \param vs_pos_tmp_index  which temporary register contains the vertex pos.
5619  */
5620 static void
emit_vpos_instructions(struct svga_shader_emitter_v10 * emit,unsigned vs_pos_tmp_index)5621 emit_vpos_instructions(struct svga_shader_emitter_v10 *emit,
5622                        unsigned vs_pos_tmp_index)
5623 {
5624    struct tgsi_full_src_register tmp_pos_src;
5625    struct tgsi_full_dst_register pos_dst;
5626 
5627    /* Don't bother to emit any extra vertex instructions if vertex position is
5628     * not written out
5629     */
5630    if (emit->vposition.out_index == INVALID_INDEX)
5631       return;
5632 
5633    tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index);
5634    pos_dst = make_dst_output_reg(emit->vposition.out_index);
5635 
5636    /* If non-adjusted vertex position register index
5637     * is valid, copy the vertex position from the temporary
5638     * vertex position register before it is modified by the
5639     * prescale computation.
5640     */
5641    if (emit->vposition.so_index != INVALID_INDEX) {
5642       struct tgsi_full_dst_register pos_so_dst =
5643          make_dst_output_reg(emit->vposition.so_index);
5644 
5645       /* MOV pos_so, tmp_pos */
5646       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst,
5647                            &tmp_pos_src, FALSE);
5648    }
5649 
5650    if (emit->vposition.need_prescale) {
5651       /* This code adjusts the vertex position to match the VGPU10 convention.
5652        * If p is the position computed by the shader (usually by applying the
5653        * modelview and projection matrices), the new position q is computed by:
5654        *
5655        * q.x = p.w * trans.x + p.x * scale.x
5656        * q.y = p.w * trans.y + p.y * scale.y
5657        * q.z = p.w * trans.z + p.z * scale.z;
5658        * q.w = p.w * trans.w + p.w;
5659        */
5660       struct tgsi_full_src_register tmp_pos_src_w =
5661          scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
5662       struct tgsi_full_dst_register tmp_pos_dst =
5663          make_dst_temp_reg(vs_pos_tmp_index);
5664       struct tgsi_full_dst_register tmp_pos_dst_xyz =
5665          writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ);
5666 
5667       struct tgsi_full_src_register prescale_scale =
5668          make_src_const_reg(emit->vposition.prescale_scale_index);
5669       struct tgsi_full_src_register prescale_trans =
5670          make_src_const_reg(emit->vposition.prescale_trans_index);
5671 
5672       /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */
5673       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz,
5674                            &tmp_pos_src, &prescale_scale, FALSE);
5675 
5676       /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */
5677       emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w,
5678                            &prescale_trans, &tmp_pos_src, FALSE);
5679    }
5680    else if (emit->key.vs.undo_viewport) {
5681       /* This code computes the final vertex position from the temporary
5682        * vertex position by undoing the viewport transformation and the
5683        * divide-by-W operation (we convert window coords back to clip coords).
5684        * This is needed when we use the 'draw' module for fallbacks.
5685        * If p is the temp pos in window coords, then the NDC coord q is:
5686        *   q.x = (p.x - vp.x_trans) / vp.x_scale * p.w
5687        *   q.y = (p.y - vp.y_trans) / vp.y_scale * p.w
5688        *   q.z = p.z * p.w
5689        *   q.w = p.w
5690        * CONST[vs_viewport_index] contains:
5691        *   { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans }
5692        */
5693       struct tgsi_full_dst_register tmp_pos_dst =
5694          make_dst_temp_reg(vs_pos_tmp_index);
5695       struct tgsi_full_dst_register tmp_pos_dst_xy =
5696          writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY);
5697       struct tgsi_full_src_register tmp_pos_src_wwww =
5698          scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
5699 
5700       struct tgsi_full_dst_register pos_dst_xyz =
5701          writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ);
5702       struct tgsi_full_dst_register pos_dst_w =
5703          writemask_dst(&pos_dst, TGSI_WRITEMASK_W);
5704 
5705       struct tgsi_full_src_register vp_xyzw =
5706          make_src_const_reg(emit->vs.viewport_index);
5707       struct tgsi_full_src_register vp_zwww =
5708          swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
5709                      TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
5710 
5711       /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */
5712       emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy,
5713                            &tmp_pos_src, &vp_zwww, FALSE);
5714 
5715       /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */
5716       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy,
5717                            &tmp_pos_src, &vp_xyzw, FALSE);
5718 
5719       /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */
5720       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz,
5721                            &tmp_pos_src, &tmp_pos_src_wwww, FALSE);
5722 
5723       /* MOV pos.w, tmp_pos.w */
5724       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w,
5725                            &tmp_pos_src, FALSE);
5726    }
5727    else if (vs_pos_tmp_index != INVALID_INDEX) {
5728       /* This code is to handle the case where the temporary vertex
5729        * position register is created when the vertex shader has stream
5730        * output and prescale is disabled because rasterization is to be
5731        * discarded.
5732        */
5733       struct tgsi_full_dst_register pos_dst =
5734          make_dst_output_reg(emit->vposition.out_index);
5735 
5736       /* MOV pos, tmp_pos */
5737       begin_emit_instruction(emit);
5738       emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
5739       emit_dst_register(emit, &pos_dst);
5740       emit_src_register(emit, &tmp_pos_src);
5741       end_emit_instruction(emit);
5742    }
5743 }
5744 
5745 static void
emit_clipping_instructions(struct svga_shader_emitter_v10 * emit)5746 emit_clipping_instructions(struct svga_shader_emitter_v10 *emit)
5747 {
5748    if (emit->clip_mode == CLIP_DISTANCE) {
5749       /* Copy from copy distance temporary to CLIPDIST & the shadow copy */
5750       emit_clip_distance_instructions(emit);
5751 
5752    } else if (emit->clip_mode == CLIP_VERTEX) {
5753       /* Convert TGSI CLIPVERTEX to CLIPDIST */
5754       emit_clip_vertex_instructions(emit);
5755    }
5756 
5757    /**
5758     * Emit vertex position and take care of legacy user planes only if
5759     * there is a valid vertex position register index.
5760     * This is to take care of the case
5761     * where the shader doesn't output vertex position. Then in
5762     * this case, don't bother to emit more vertex instructions.
5763     */
5764    if (emit->vposition.out_index == INVALID_INDEX)
5765       return;
5766 
5767    /**
5768     * Emit per-vertex clipping instructions for legacy user defined clip planes.
5769     * NOTE: we must emit the clip distance instructions before the
5770     * emit_vpos_instructions() call since the later function will change
5771     * the TEMP[vs_pos_tmp_index] value.
5772     */
5773    if (emit->clip_mode == CLIP_LEGACY) {
5774       /* Emit CLIPDIST for legacy user defined clip planes */
5775       emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index);
5776    }
5777 }
5778 
5779 
5780 /**
5781  * Emit extra per-vertex instructions.  This includes clip-coordinate
5782  * space conversion and computing clip distances.  This is called for
5783  * each GS emit-vertex instruction and at the end of VS translation.
5784  */
5785 static void
emit_vertex_instructions(struct svga_shader_emitter_v10 * emit)5786 emit_vertex_instructions(struct svga_shader_emitter_v10 *emit)
5787 {
5788    const unsigned vs_pos_tmp_index = emit->vposition.tmp_index;
5789 
5790    /* Emit clipping instructions based on clipping mode */
5791    emit_clipping_instructions(emit);
5792 
5793    /**
5794     * Reset the temporary vertex position register index
5795     * so that emit_dst_register() will use the real vertex position output
5796     */
5797    emit->vposition.tmp_index = INVALID_INDEX;
5798 
5799    /* Emit vertex position instructions */
5800    emit_vpos_instructions(emit, vs_pos_tmp_index);
5801 
5802    /* Restore original vposition.tmp_index value for the next GS vertex.
5803     * It doesn't matter for VS.
5804     */
5805    emit->vposition.tmp_index = vs_pos_tmp_index;
5806 }
5807 
5808 /**
5809  * Translate the TGSI_OPCODE_EMIT GS instruction.
5810  */
5811 static boolean
emit_vertex(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)5812 emit_vertex(struct svga_shader_emitter_v10 *emit,
5813             const struct tgsi_full_instruction *inst)
5814 {
5815    unsigned ret = TRUE;
5816 
5817    assert(emit->unit == PIPE_SHADER_GEOMETRY);
5818 
5819    emit_vertex_instructions(emit);
5820 
5821    /* We can't use emit_simple() because the TGSI instruction has one
5822     * operand (vertex stream number) which we must ignore for VGPU10.
5823     */
5824    begin_emit_instruction(emit);
5825    emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE);
5826    end_emit_instruction(emit);
5827 
5828    return ret;
5829 }
5830 
5831 
5832 /**
5833  * Emit the extra code to convert from VGPU10's boolean front-face
5834  * register to TGSI's signed front-face register.
5835  *
5836  * TODO: Make temporary front-face register a scalar.
5837  */
5838 static void
emit_frontface_instructions(struct svga_shader_emitter_v10 * emit)5839 emit_frontface_instructions(struct svga_shader_emitter_v10 *emit)
5840 {
5841    assert(emit->unit == PIPE_SHADER_FRAGMENT);
5842 
5843    if (emit->fs.face_input_index != INVALID_INDEX) {
5844       /* convert vgpu10 boolean face register to gallium +/-1 value */
5845       struct tgsi_full_dst_register tmp_dst =
5846          make_dst_temp_reg(emit->fs.face_tmp_index);
5847       struct tgsi_full_src_register one =
5848          make_immediate_reg_float(emit, 1.0f);
5849       struct tgsi_full_src_register neg_one =
5850          make_immediate_reg_float(emit, -1.0f);
5851 
5852       /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */
5853       begin_emit_instruction(emit);
5854       emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE);
5855       emit_dst_register(emit, &tmp_dst);
5856       emit_face_register(emit);
5857       emit_src_register(emit, &one);
5858       emit_src_register(emit, &neg_one);
5859       end_emit_instruction(emit);
5860    }
5861 }
5862 
5863 
5864 /**
5865  * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w.
5866  */
5867 static void
emit_fragcoord_instructions(struct svga_shader_emitter_v10 * emit)5868 emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit)
5869 {
5870    assert(emit->unit == PIPE_SHADER_FRAGMENT);
5871 
5872    if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
5873       struct tgsi_full_dst_register tmp_dst =
5874          make_dst_temp_reg(emit->fs.fragcoord_tmp_index);
5875       struct tgsi_full_dst_register tmp_dst_xyz =
5876          writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ);
5877       struct tgsi_full_dst_register tmp_dst_w =
5878          writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
5879       struct tgsi_full_src_register one =
5880          make_immediate_reg_float(emit, 1.0f);
5881       struct tgsi_full_src_register fragcoord =
5882          make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index);
5883 
5884       /* save the input index */
5885       unsigned fragcoord_input_index = emit->fs.fragcoord_input_index;
5886       /* set to invalid to prevent substitution in emit_src_register() */
5887       emit->fs.fragcoord_input_index = INVALID_INDEX;
5888 
5889       /* MOV fragcoord_tmp.xyz, fragcoord.xyz */
5890       begin_emit_instruction(emit);
5891       emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
5892       emit_dst_register(emit, &tmp_dst_xyz);
5893       emit_src_register(emit, &fragcoord);
5894       end_emit_instruction(emit);
5895 
5896       /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */
5897       begin_emit_instruction(emit);
5898       emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE);
5899       emit_dst_register(emit, &tmp_dst_w);
5900       emit_src_register(emit, &one);
5901       emit_src_register(emit, &fragcoord);
5902       end_emit_instruction(emit);
5903 
5904       /* restore saved value */
5905       emit->fs.fragcoord_input_index = fragcoord_input_index;
5906    }
5907 }
5908 
5909 
5910 /**
5911  * Emit extra instructions to adjust VS inputs/attributes.  This can
5912  * mean casting a vertex attribute from int to float or setting the
5913  * W component to 1, or both.
5914  */
5915 static void
emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 * emit)5916 emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
5917 {
5918    const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1;
5919    const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof;
5920    const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof;
5921    const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra;
5922    const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm;
5923    const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled;
5924    const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled;
5925 
5926    unsigned adjust_mask = (save_w_1_mask |
5927                            save_itof_mask |
5928                            save_utof_mask |
5929                            save_is_bgra_mask |
5930                            save_puint_to_snorm_mask |
5931                            save_puint_to_uscaled_mask |
5932                            save_puint_to_sscaled_mask);
5933 
5934    assert(emit->unit == PIPE_SHADER_VERTEX);
5935 
5936    if (adjust_mask) {
5937       struct tgsi_full_src_register one =
5938          make_immediate_reg_float(emit, 1.0f);
5939 
5940       struct tgsi_full_src_register one_int =
5941          make_immediate_reg_int(emit, 1);
5942 
5943       /* We need to turn off these bitmasks while emitting the
5944        * instructions below, then restore them afterward.
5945        */
5946       emit->key.vs.adjust_attrib_w_1 = 0;
5947       emit->key.vs.adjust_attrib_itof = 0;
5948       emit->key.vs.adjust_attrib_utof = 0;
5949       emit->key.vs.attrib_is_bgra = 0;
5950       emit->key.vs.attrib_puint_to_snorm = 0;
5951       emit->key.vs.attrib_puint_to_uscaled = 0;
5952       emit->key.vs.attrib_puint_to_sscaled = 0;
5953 
5954       while (adjust_mask) {
5955          unsigned index = u_bit_scan(&adjust_mask);
5956 
5957          /* skip the instruction if this vertex attribute is not being used */
5958          if (emit->info.input_usage_mask[index] == 0)
5959             continue;
5960 
5961          unsigned tmp = emit->vs.adjusted_input[index];
5962          struct tgsi_full_src_register input_src =
5963             make_src_reg(TGSI_FILE_INPUT, index);
5964 
5965          struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
5966          struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
5967          struct tgsi_full_dst_register tmp_dst_w =
5968             writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
5969 
5970          /* ITOF/UTOF/MOV tmp, input[index] */
5971          if (save_itof_mask & (1 << index)) {
5972             emit_instruction_op1(emit, VGPU10_OPCODE_ITOF,
5973                                  &tmp_dst, &input_src, FALSE);
5974          }
5975          else if (save_utof_mask & (1 << index)) {
5976             emit_instruction_op1(emit, VGPU10_OPCODE_UTOF,
5977                                  &tmp_dst, &input_src, FALSE);
5978          }
5979          else if (save_puint_to_snorm_mask & (1 << index)) {
5980             emit_puint_to_snorm(emit, &tmp_dst, &input_src);
5981          }
5982          else if (save_puint_to_uscaled_mask & (1 << index)) {
5983             emit_puint_to_uscaled(emit, &tmp_dst, &input_src);
5984          }
5985          else if (save_puint_to_sscaled_mask & (1 << index)) {
5986             emit_puint_to_sscaled(emit, &tmp_dst, &input_src);
5987          }
5988          else {
5989             assert((save_w_1_mask | save_is_bgra_mask) & (1 << index));
5990             emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
5991                                  &tmp_dst, &input_src, FALSE);
5992          }
5993 
5994          if (save_is_bgra_mask & (1 << index)) {
5995             emit_swap_r_b(emit, &tmp_dst, &tmp_src);
5996          }
5997 
5998          if (save_w_1_mask & (1 << index)) {
5999             /* MOV tmp.w, 1.0 */
6000             if (emit->key.vs.attrib_is_pure_int & (1 << index)) {
6001                emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
6002                                     &tmp_dst_w, &one_int, FALSE);
6003             }
6004             else {
6005                emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
6006                                     &tmp_dst_w, &one, FALSE);
6007             }
6008          }
6009       }
6010 
6011       emit->key.vs.adjust_attrib_w_1 = save_w_1_mask;
6012       emit->key.vs.adjust_attrib_itof = save_itof_mask;
6013       emit->key.vs.adjust_attrib_utof = save_utof_mask;
6014       emit->key.vs.attrib_is_bgra = save_is_bgra_mask;
6015       emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask;
6016       emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask;
6017       emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask;
6018    }
6019 }
6020 
6021 
6022 /**
6023  * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed
6024  * to implement some instructions.  We pre-allocate those values here
6025  * in the immediate constant buffer.
6026  */
6027 static void
alloc_common_immediates(struct svga_shader_emitter_v10 * emit)6028 alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
6029 {
6030    unsigned n = 0;
6031 
6032    emit->common_immediate_pos[n++] =
6033       alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
6034 
6035    if (emit->info.opcode_count[TGSI_OPCODE_LIT] > 0) {
6036       emit->common_immediate_pos[n++] =
6037          alloc_immediate_float4(emit, 128.0f, -128.0f, 0.0f, 0.0f);
6038    }
6039 
6040    emit->common_immediate_pos[n++] =
6041       alloc_immediate_int4(emit, 0, 1, 0, -1);
6042 
6043    if (emit->key.vs.attrib_puint_to_snorm) {
6044       emit->common_immediate_pos[n++] =
6045          alloc_immediate_float4(emit, -2.0f, 2.0f, 3.0f, -1.66666f);
6046    }
6047 
6048    if (emit->key.vs.attrib_puint_to_uscaled) {
6049       emit->common_immediate_pos[n++] =
6050          alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f);
6051    }
6052 
6053    if (emit->key.vs.attrib_puint_to_sscaled) {
6054       emit->common_immediate_pos[n++] =
6055          alloc_immediate_int4(emit, 22, 12, 2, 0);
6056 
6057       emit->common_immediate_pos[n++] =
6058          alloc_immediate_int4(emit, 22, 30, 0, 0);
6059    }
6060 
6061    unsigned i;
6062 
6063    for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
6064       if (emit->key.tex[i].texel_bias) {
6065          /* Replace 0.0f if more immediate float value is needed */
6066          emit->common_immediate_pos[n++] =
6067             alloc_immediate_float4(emit, 0.0001f, 0.0f, 0.0f, 0.0f);
6068          break;
6069       }
6070    }
6071 
6072    assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
6073    emit->num_common_immediates = n;
6074 }
6075 
6076 
6077 /**
6078  * Emit any extra/helper declarations/code that we might need between
6079  * the declaration section and code section.
6080  */
6081 static boolean
emit_pre_helpers(struct svga_shader_emitter_v10 * emit)6082 emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
6083 {
6084    /* Properties */
6085    if (emit->unit == PIPE_SHADER_GEOMETRY)
6086       emit_property_instructions(emit);
6087 
6088    /* Declare inputs */
6089    if (!emit_input_declarations(emit))
6090       return FALSE;
6091 
6092    /* Declare outputs */
6093    if (!emit_output_declarations(emit))
6094       return FALSE;
6095 
6096    /* Declare temporary registers */
6097    emit_temporaries_declaration(emit);
6098 
6099    /* Declare constant registers */
6100    emit_constant_declaration(emit);
6101 
6102    /* Declare samplers and resources */
6103    emit_sampler_declarations(emit);
6104    emit_resource_declarations(emit);
6105 
6106    /* Declare clip distance output registers */
6107    if (emit->unit == PIPE_SHADER_VERTEX ||
6108        emit->unit == PIPE_SHADER_GEOMETRY) {
6109       emit_clip_distance_declarations(emit);
6110    }
6111 
6112    alloc_common_immediates(emit);
6113 
6114    if (emit->unit == PIPE_SHADER_FRAGMENT &&
6115        emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
6116       float alpha = emit->key.fs.alpha_ref;
6117       emit->fs.alpha_ref_index =
6118          alloc_immediate_float4(emit, alpha, alpha, alpha, alpha);
6119    }
6120 
6121    /* Now, emit the constant block containing all the immediates
6122     * declared by shader, as well as the extra ones seen above.
6123     */
6124    emit_vgpu10_immediates_block(emit);
6125 
6126    if (emit->unit == PIPE_SHADER_FRAGMENT) {
6127       emit_frontface_instructions(emit);
6128       emit_fragcoord_instructions(emit);
6129    }
6130    else if (emit->unit == PIPE_SHADER_VERTEX) {
6131       emit_vertex_attrib_instructions(emit);
6132    }
6133 
6134    return TRUE;
6135 }
6136 
6137 
6138 /**
6139  * The device has no direct support for the pipe_blend_state::alpha_to_one
6140  * option so we implement it here with shader code.
6141  *
6142  * Note that this is kind of pointless, actually.  Here we're clobbering
6143  * the alpha value with 1.0.  So if alpha-to-coverage is enabled, we'll wind
6144  * up with 100% coverage.  That's almost certainly not what the user wants.
6145  * The work-around is to add extra shader code to compute coverage from alpha
6146  * and write it to the coverage output register (if the user's shader doesn't
6147  * do so already).  We'll probably do that in the future.
6148  */
6149 static void
emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)6150 emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 *emit,
6151                                unsigned fs_color_tmp_index)
6152 {
6153    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
6154    unsigned i;
6155 
6156    /* Note: it's not 100% clear from the spec if we're supposed to clobber
6157     * the alpha for all render targets.  But that's what NVIDIA does and
6158     * that's what Piglit tests.
6159     */
6160    for (i = 0; i < emit->fs.num_color_outputs; i++) {
6161       struct tgsi_full_dst_register color_dst;
6162 
6163       if (fs_color_tmp_index != INVALID_INDEX && i == 0) {
6164          /* write to the temp color register */
6165          color_dst = make_dst_temp_reg(fs_color_tmp_index);
6166       }
6167       else {
6168          /* write directly to the color[i] output */
6169          color_dst = make_dst_output_reg(emit->fs.color_out_index[i]);
6170       }
6171 
6172       color_dst = writemask_dst(&color_dst, TGSI_WRITEMASK_W);
6173 
6174       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &one, FALSE);
6175    }
6176 }
6177 
6178 
6179 /**
6180  * Emit alpha test code.  This compares TEMP[fs_color_tmp_index].w
6181  * against the alpha reference value and discards the fragment if the
6182  * comparison fails.
6183  */
6184 static void
emit_alpha_test_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)6185 emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
6186                              unsigned fs_color_tmp_index)
6187 {
6188    /* compare output color's alpha to alpha ref and kill */
6189    unsigned tmp = get_temp_index(emit);
6190    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6191    struct tgsi_full_src_register tmp_src_x =
6192       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
6193    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6194    struct tgsi_full_src_register color_src =
6195       make_src_temp_reg(fs_color_tmp_index);
6196    struct tgsi_full_src_register color_src_w =
6197       scalar_src(&color_src, TGSI_SWIZZLE_W);
6198    struct tgsi_full_src_register ref_src =
6199       make_src_immediate_reg(emit->fs.alpha_ref_index);
6200    struct tgsi_full_dst_register color_dst =
6201       make_dst_output_reg(emit->fs.color_out_index[0]);
6202 
6203    assert(emit->unit == PIPE_SHADER_FRAGMENT);
6204 
6205    /* dst = src0 'alpha_func' src1 */
6206    emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst,
6207                    &color_src_w, &ref_src);
6208 
6209    /* DISCARD if dst.x == 0 */
6210    begin_emit_instruction(emit);
6211    emit_discard_opcode(emit, FALSE);  /* discard if src0.x is zero */
6212    emit_src_register(emit, &tmp_src_x);
6213    end_emit_instruction(emit);
6214 
6215    /* If we don't need to broadcast the color below, emit the final color here.
6216     */
6217    if (emit->key.fs.write_color0_to_n_cbufs <= 1) {
6218       /* MOV output.color, tempcolor */
6219       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
6220                            &color_src, FALSE);     /* XXX saturate? */
6221    }
6222 
6223    free_temp_indexes(emit);
6224 }
6225 
6226 
6227 /**
6228  * Emit instructions for writing a single color output to multiple
6229  * color buffers.
6230  * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or
6231  * when key.fs.white_fragments is true).
6232  * property is set and the number of render targets is greater than one.
6233  * \param fs_color_tmp_index  index of the temp register that holds the
6234  *                            color to broadcast.
6235  */
6236 static void
emit_broadcast_color_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)6237 emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit,
6238                                  unsigned fs_color_tmp_index)
6239 {
6240    const unsigned n = emit->key.fs.write_color0_to_n_cbufs;
6241    unsigned i;
6242    struct tgsi_full_src_register color_src;
6243 
6244    if (emit->key.fs.white_fragments) {
6245       /* set all color outputs to white */
6246       color_src = make_immediate_reg_float(emit, 1.0f);
6247    }
6248    else {
6249       /* set all color outputs to TEMP[fs_color_tmp_index] */
6250       assert(fs_color_tmp_index != INVALID_INDEX);
6251       color_src = make_src_temp_reg(fs_color_tmp_index);
6252    }
6253 
6254    assert(emit->unit == PIPE_SHADER_FRAGMENT);
6255 
6256    for (i = 0; i < n; i++) {
6257       unsigned output_reg = emit->fs.color_out_index[i];
6258       struct tgsi_full_dst_register color_dst =
6259          make_dst_output_reg(output_reg);
6260 
6261       /* Fill in this semantic here since we'll use it later in
6262        * emit_dst_register().
6263        */
6264       emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR;
6265 
6266       /* MOV output.color[i], tempcolor */
6267       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
6268                            &color_src, FALSE);     /* XXX saturate? */
6269    }
6270 }
6271 
6272 
6273 /**
6274  * Emit extra helper code after the original shader code, but before the
6275  * last END/RET instruction.
6276  * For vertex shaders this means emitting the extra code to apply the
6277  * prescale scale/translation.
6278  */
6279 static boolean
emit_post_helpers(struct svga_shader_emitter_v10 * emit)6280 emit_post_helpers(struct svga_shader_emitter_v10 *emit)
6281 {
6282    if (emit->unit == PIPE_SHADER_VERTEX) {
6283       emit_vertex_instructions(emit);
6284    }
6285    else if (emit->unit == PIPE_SHADER_FRAGMENT) {
6286       const unsigned fs_color_tmp_index = emit->fs.color_tmp_index;
6287 
6288       assert(!(emit->key.fs.white_fragments &&
6289                emit->key.fs.write_color0_to_n_cbufs == 0));
6290 
6291       /* We no longer want emit_dst_register() to substitute the
6292        * temporary fragment color register for the real color output.
6293        */
6294       emit->fs.color_tmp_index = INVALID_INDEX;
6295 
6296       if (emit->key.fs.alpha_to_one) {
6297          emit_alpha_to_one_instructions(emit, fs_color_tmp_index);
6298       }
6299       if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
6300          emit_alpha_test_instructions(emit, fs_color_tmp_index);
6301       }
6302       if (emit->key.fs.write_color0_to_n_cbufs > 1 ||
6303           emit->key.fs.white_fragments) {
6304          emit_broadcast_color_instructions(emit, fs_color_tmp_index);
6305       }
6306    }
6307 
6308    return TRUE;
6309 }
6310 
6311 
6312 /**
6313  * Translate the TGSI tokens into VGPU10 tokens.
6314  */
6315 static boolean
emit_vgpu10_instructions(struct svga_shader_emitter_v10 * emit,const struct tgsi_token * tokens)6316 emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit,
6317                          const struct tgsi_token *tokens)
6318 {
6319    struct tgsi_parse_context parse;
6320    boolean ret = TRUE;
6321    boolean pre_helpers_emitted = FALSE;
6322    unsigned inst_number = 0;
6323 
6324    tgsi_parse_init(&parse, tokens);
6325 
6326    while (!tgsi_parse_end_of_tokens(&parse)) {
6327       tgsi_parse_token(&parse);
6328 
6329       switch (parse.FullToken.Token.Type) {
6330       case TGSI_TOKEN_TYPE_IMMEDIATE:
6331          ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate);
6332          if (!ret)
6333             goto done;
6334          break;
6335 
6336       case TGSI_TOKEN_TYPE_DECLARATION:
6337          ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration);
6338          if (!ret)
6339             goto done;
6340          break;
6341 
6342       case TGSI_TOKEN_TYPE_INSTRUCTION:
6343          if (!pre_helpers_emitted) {
6344             ret = emit_pre_helpers(emit);
6345             if (!ret)
6346                goto done;
6347             pre_helpers_emitted = TRUE;
6348          }
6349          ret = emit_vgpu10_instruction(emit, inst_number++,
6350                                        &parse.FullToken.FullInstruction);
6351          if (!ret)
6352             goto done;
6353          break;
6354 
6355       case TGSI_TOKEN_TYPE_PROPERTY:
6356          ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty);
6357          if (!ret)
6358             goto done;
6359          break;
6360 
6361       default:
6362          break;
6363       }
6364    }
6365 
6366 done:
6367    tgsi_parse_free(&parse);
6368    return ret;
6369 }
6370 
6371 
6372 /**
6373  * Emit the first VGPU10 shader tokens.
6374  */
6375 static boolean
emit_vgpu10_header(struct svga_shader_emitter_v10 * emit)6376 emit_vgpu10_header(struct svga_shader_emitter_v10 *emit)
6377 {
6378    VGPU10ProgramToken ptoken;
6379 
6380    /* First token: VGPU10ProgramToken  (version info, program type (VS,GS,PS)) */
6381    ptoken.majorVersion = 4;
6382    ptoken.minorVersion = 0;
6383    ptoken.programType = translate_shader_type(emit->unit);
6384    if (!emit_dword(emit, ptoken.value))
6385       return FALSE;
6386 
6387    /* Second token: total length of shader, in tokens.  We can't fill this
6388     * in until we're all done.  Emit zero for now.
6389     */
6390    return emit_dword(emit, 0);
6391 }
6392 
6393 
6394 static boolean
emit_vgpu10_tail(struct svga_shader_emitter_v10 * emit)6395 emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit)
6396 {
6397    VGPU10ProgramToken *tokens;
6398 
6399    /* Replace the second token with total shader length */
6400    tokens = (VGPU10ProgramToken *) emit->buf;
6401    tokens[1].value = emit_get_num_tokens(emit);
6402 
6403    return TRUE;
6404 }
6405 
6406 
6407 /**
6408  * Modify the FS to read the BCOLORs and use the FACE register
6409  * to choose between the front/back colors.
6410  */
6411 static const struct tgsi_token *
transform_fs_twoside(const struct tgsi_token * tokens)6412 transform_fs_twoside(const struct tgsi_token *tokens)
6413 {
6414    if (0) {
6415       debug_printf("Before tgsi_add_two_side ------------------\n");
6416       tgsi_dump(tokens,0);
6417    }
6418    tokens = tgsi_add_two_side(tokens);
6419    if (0) {
6420       debug_printf("After tgsi_add_two_side ------------------\n");
6421       tgsi_dump(tokens, 0);
6422    }
6423    return tokens;
6424 }
6425 
6426 
6427 /**
6428  * Modify the FS to do polygon stipple.
6429  */
6430 static const struct tgsi_token *
transform_fs_pstipple(struct svga_shader_emitter_v10 * emit,const struct tgsi_token * tokens)6431 transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
6432                       const struct tgsi_token *tokens)
6433 {
6434    const struct tgsi_token *new_tokens;
6435    unsigned unit;
6436 
6437    if (0) {
6438       debug_printf("Before pstipple ------------------\n");
6439       tgsi_dump(tokens,0);
6440    }
6441 
6442    new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
6443                                                      TGSI_FILE_INPUT);
6444 
6445    emit->fs.pstipple_sampler_unit = unit;
6446 
6447    /* Setup texture state for stipple */
6448    emit->sampler_target[unit] = TGSI_TEXTURE_2D;
6449    emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
6450    emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
6451    emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
6452    emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
6453 
6454    if (0) {
6455       debug_printf("After pstipple ------------------\n");
6456       tgsi_dump(new_tokens, 0);
6457    }
6458 
6459    return new_tokens;
6460 }
6461 
6462 /**
6463  * Modify the FS to support anti-aliasing point.
6464  */
6465 static const struct tgsi_token *
transform_fs_aapoint(const struct tgsi_token * tokens,int aa_coord_index)6466 transform_fs_aapoint(const struct tgsi_token *tokens,
6467                      int aa_coord_index)
6468 {
6469    if (0) {
6470       debug_printf("Before tgsi_add_aa_point ------------------\n");
6471       tgsi_dump(tokens,0);
6472    }
6473    tokens = tgsi_add_aa_point(tokens, aa_coord_index);
6474    if (0) {
6475       debug_printf("After tgsi_add_aa_point ------------------\n");
6476       tgsi_dump(tokens, 0);
6477    }
6478    return tokens;
6479 }
6480 
6481 /**
6482  * This is the main entrypoint for the TGSI -> VPGU10 translator.
6483  */
6484 struct svga_shader_variant *
svga_tgsi_vgpu10_translate(struct svga_context * svga,const struct svga_shader * shader,const struct svga_compile_key * key,unsigned unit)6485 svga_tgsi_vgpu10_translate(struct svga_context *svga,
6486                            const struct svga_shader *shader,
6487                            const struct svga_compile_key *key,
6488                            unsigned unit)
6489 {
6490    struct svga_shader_variant *variant = NULL;
6491    struct svga_shader_emitter_v10 *emit;
6492    const struct tgsi_token *tokens = shader->tokens;
6493    struct svga_vertex_shader *vs = svga->curr.vs;
6494    struct svga_geometry_shader *gs = svga->curr.gs;
6495 
6496    assert(unit == PIPE_SHADER_VERTEX ||
6497           unit == PIPE_SHADER_GEOMETRY ||
6498           unit == PIPE_SHADER_FRAGMENT);
6499 
6500    /* These two flags cannot be used together */
6501    assert(key->vs.need_prescale + key->vs.undo_viewport <= 1);
6502 
6503    SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_TGSIVGPU10TRANSLATE);
6504    /*
6505     * Setup the code emitter
6506     */
6507    emit = alloc_emitter();
6508    if (!emit)
6509       goto done;
6510 
6511    emit->unit = unit;
6512    emit->key = *key;
6513 
6514    emit->vposition.need_prescale = (emit->key.vs.need_prescale ||
6515                                    emit->key.gs.need_prescale);
6516    emit->vposition.tmp_index = INVALID_INDEX;
6517    emit->vposition.so_index = INVALID_INDEX;
6518    emit->vposition.out_index = INVALID_INDEX;
6519 
6520    emit->fs.color_tmp_index = INVALID_INDEX;
6521    emit->fs.face_input_index = INVALID_INDEX;
6522    emit->fs.fragcoord_input_index = INVALID_INDEX;
6523 
6524    emit->gs.prim_id_index = INVALID_INDEX;
6525 
6526    emit->clip_dist_out_index = INVALID_INDEX;
6527    emit->clip_dist_tmp_index = INVALID_INDEX;
6528    emit->clip_dist_so_index = INVALID_INDEX;
6529    emit->clip_vertex_out_index = INVALID_INDEX;
6530 
6531    if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) {
6532       emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS;
6533    }
6534 
6535    if (unit == PIPE_SHADER_FRAGMENT) {
6536       if (key->fs.light_twoside) {
6537          tokens = transform_fs_twoside(tokens);
6538       }
6539       if (key->fs.pstipple) {
6540          const struct tgsi_token *new_tokens =
6541             transform_fs_pstipple(emit, tokens);
6542          if (tokens != shader->tokens) {
6543             /* free the two-sided shader tokens */
6544             tgsi_free_tokens(tokens);
6545          }
6546          tokens = new_tokens;
6547       }
6548       if (key->fs.aa_point) {
6549          tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index);
6550       }
6551    }
6552 
6553    if (SVGA_DEBUG & DEBUG_TGSI) {
6554       debug_printf("#####################################\n");
6555       debug_printf("### TGSI Shader %u\n", shader->id);
6556       tgsi_dump(tokens, 0);
6557    }
6558 
6559    /**
6560     * Rescan the header if the token string is different from the one
6561     * included in the shader; otherwise, the header info is already up-to-date
6562     */
6563    if (tokens != shader->tokens) {
6564       tgsi_scan_shader(tokens, &emit->info);
6565    } else {
6566       emit->info = shader->info;
6567    }
6568 
6569    emit->num_outputs = emit->info.num_outputs;
6570 
6571    if (unit == PIPE_SHADER_FRAGMENT) {
6572       /* Compute FS input remapping to match the output from VS/GS */
6573       if (gs) {
6574          svga_link_shaders(&gs->base.info, &emit->info, &emit->linkage);
6575       } else {
6576          assert(vs);
6577          svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage);
6578       }
6579    } else if (unit == PIPE_SHADER_GEOMETRY) {
6580       assert(vs);
6581       svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage);
6582    }
6583 
6584    determine_clipping_mode(emit);
6585 
6586    if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX) {
6587       if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) {
6588          /* if there is stream output declarations associated
6589           * with this shader or the shader writes to ClipDistance
6590           * then reserve extra registers for the non-adjusted vertex position
6591           * and the ClipDistance shadow copy
6592           */
6593          emit->vposition.so_index = emit->num_outputs++;
6594 
6595          if (emit->clip_mode == CLIP_DISTANCE) {
6596             emit->clip_dist_so_index = emit->num_outputs++;
6597             if (emit->info.num_written_clipdistance > 4)
6598                emit->num_outputs++;
6599          }
6600       }
6601    }
6602 
6603    /*
6604     * Do actual shader translation.
6605     */
6606    if (!emit_vgpu10_header(emit)) {
6607       debug_printf("svga: emit VGPU10 header failed\n");
6608       goto cleanup;
6609    }
6610 
6611    if (!emit_vgpu10_instructions(emit, tokens)) {
6612       debug_printf("svga: emit VGPU10 instructions failed\n");
6613       goto cleanup;
6614    }
6615 
6616    if (!emit_vgpu10_tail(emit)) {
6617       debug_printf("svga: emit VGPU10 tail failed\n");
6618       goto cleanup;
6619    }
6620 
6621    if (emit->register_overflow) {
6622       goto cleanup;
6623    }
6624 
6625    /*
6626     * Create, initialize the 'variant' object.
6627     */
6628    variant = svga_new_shader_variant(svga);
6629    if (!variant)
6630       goto cleanup;
6631 
6632    variant->shader = shader;
6633    variant->nr_tokens = emit_get_num_tokens(emit);
6634    variant->tokens = (const unsigned *)emit->buf;
6635    emit->buf = NULL;  /* buffer is no longer owed by emitter context */
6636    memcpy(&variant->key, key, sizeof(*key));
6637    variant->id = UTIL_BITMASK_INVALID_INDEX;
6638 
6639    /* The extra constant starting offset starts with the number of
6640     * shader constants declared in the shader.
6641     */
6642    variant->extra_const_start = emit->num_shader_consts[0];
6643    if (key->gs.wide_point) {
6644       /**
6645        * The extra constant added in the transformed shader
6646        * for inverse viewport scale is to be supplied by the driver.
6647        * So the extra constant starting offset needs to be reduced by 1.
6648        */
6649       assert(variant->extra_const_start > 0);
6650       variant->extra_const_start--;
6651    }
6652 
6653    variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
6654 
6655    /* If there was exactly one write to a fragment shader output register
6656     * and it came from a constant buffer, we know all fragments will have
6657     * the same color (except for blending).
6658     */
6659    variant->constant_color_output =
6660       emit->constant_color_output && emit->num_output_writes == 1;
6661 
6662    /** keep track in the variant if flat interpolation is used
6663     *  for any of the varyings.
6664     */
6665    variant->uses_flat_interp = emit->uses_flat_interp;
6666 
6667    variant->fs_shadow_compare_units = emit->fs.shadow_compare_units;
6668 
6669    if (tokens != shader->tokens) {
6670       tgsi_free_tokens(tokens);
6671    }
6672 
6673 cleanup:
6674    free_emitter(emit);
6675 
6676 done:
6677    SVGA_STATS_TIME_POP(svga_sws(svga));
6678    return variant;
6679 }
6680