1 /*
2  * Copyright © 2013 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file brw_vec4_tes.cpp
26  *
27  * Tessellaton evaluation shader specific code derived from the vec4_visitor class.
28  */
29 
30 #include "brw_vec4_tes.h"
31 #include "brw_cfg.h"
32 #include "common/gen_debug.h"
33 
34 namespace brw {
35 
vec4_tes_visitor(const struct brw_compiler * compiler,void * log_data,const struct brw_tes_prog_key * key,struct brw_tes_prog_data * prog_data,const nir_shader * shader,void * mem_ctx,int shader_time_index)36 vec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler *compiler,
37                                   void *log_data,
38                                   const struct brw_tes_prog_key *key,
39                                   struct brw_tes_prog_data *prog_data,
40                                   const nir_shader *shader,
41                                   void *mem_ctx,
42                                   int shader_time_index)
43    : vec4_visitor(compiler, log_data, &key->tex, &prog_data->base,
44                   shader, mem_ctx, false, shader_time_index)
45 {
46 }
47 
48 void
setup_payload()49 vec4_tes_visitor::setup_payload()
50 {
51    int reg = 0;
52 
53    /* The payload always contains important data in r0 and r1, which contains
54     * the URB handles that are passed on to the URB write at the end
55     * of the thread.
56     */
57    reg += 2;
58 
59    reg = setup_uniforms(reg);
60 
61    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
62       for (int i = 0; i < 3; i++) {
63          if (inst->src[i].file != ATTR)
64             continue;
65 
66          bool is_64bit = type_sz(inst->src[i].type) == 8;
67 
68          unsigned slot = inst->src[i].nr + inst->src[i].offset / 16;
69          struct brw_reg grf = brw_vec4_grf(reg + slot / 2, 4 * (slot % 2));
70          grf = stride(grf, 0, is_64bit ? 2 : 4, 1);
71          grf.swizzle = inst->src[i].swizzle;
72          grf.type = inst->src[i].type;
73          grf.abs = inst->src[i].abs;
74          grf.negate = inst->src[i].negate;
75 
76          /* For 64-bit attributes we can end up with components XY in the
77           * second half of a register and components ZW in the first half
78           * of the next. Fix it up here.
79           */
80          if (is_64bit && grf.subnr > 0) {
81             /* We can't do swizzles that mix XY and ZW channels in this case.
82              * Such cases should have been handled by the scalarization pass.
83              */
84             assert((brw_mask_for_swizzle(grf.swizzle) & 0x3) ^
85                    (brw_mask_for_swizzle(grf.swizzle) & 0xc));
86             if (brw_mask_for_swizzle(grf.swizzle) & 0xc) {
87                grf.subnr = 0;
88                grf.nr++;
89                grf.swizzle -= BRW_SWIZZLE_ZZZZ;
90             }
91          }
92 
93          inst->src[i] = grf;
94       }
95    }
96 
97    reg += 8 * prog_data->urb_read_length;
98 
99    this->first_non_payload_grf = reg;
100 }
101 
102 
103 void
emit_prolog()104 vec4_tes_visitor::emit_prolog()
105 {
106    input_read_header = src_reg(this, glsl_type::uvec4_type);
107    emit(TES_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header));
108 
109    this->current_annotation = NULL;
110 }
111 
112 
113 void
emit_urb_write_header(int mrf)114 vec4_tes_visitor::emit_urb_write_header(int mrf)
115 {
116    /* No need to do anything for DS; an implied write to this MRF will be
117     * performed by VS_OPCODE_URB_WRITE.
118     */
119    (void) mrf;
120 }
121 
122 
123 vec4_instruction *
emit_urb_write_opcode(bool complete)124 vec4_tes_visitor::emit_urb_write_opcode(bool complete)
125 {
126    /* For DS, the URB writes end the thread. */
127    if (complete) {
128       if (INTEL_DEBUG & DEBUG_SHADER_TIME)
129          emit_shader_time_end();
130    }
131 
132    vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
133    inst->urb_write_flags = complete ?
134       BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS;
135 
136    return inst;
137 }
138 
139 void
nir_emit_intrinsic(nir_intrinsic_instr * instr)140 vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
141 {
142    const struct brw_tes_prog_data *tes_prog_data =
143       (const struct brw_tes_prog_data *) prog_data;
144 
145    switch (instr->intrinsic) {
146    case nir_intrinsic_load_tess_coord:
147       /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
148       emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
149                src_reg(brw_vec8_grf(1, 0))));
150       break;
151    case nir_intrinsic_load_tess_level_outer:
152       if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) {
153          emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
154                   swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
155                           BRW_SWIZZLE_ZWZW)));
156       } else {
157          emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
158                   swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
159                           BRW_SWIZZLE_WZYX)));
160       }
161       break;
162    case nir_intrinsic_load_tess_level_inner:
163       if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
164          emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
165                   swizzle(src_reg(ATTR, 0, glsl_type::vec4_type),
166                           BRW_SWIZZLE_WZYX)));
167       } else {
168          emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
169                   src_reg(ATTR, 1, glsl_type::float_type)));
170       }
171       break;
172    case nir_intrinsic_load_primitive_id:
173       emit(TES_OPCODE_GET_PRIMITIVE_ID,
174            get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
175       break;
176 
177    case nir_intrinsic_load_input:
178    case nir_intrinsic_load_per_vertex_input: {
179       src_reg indirect_offset = get_indirect_offset(instr);
180       unsigned imm_offset = instr->const_index[0];
181       src_reg header = input_read_header;
182       bool is_64bit = nir_dest_bit_size(instr->dest) == 64;
183       unsigned first_component = nir_intrinsic_component(instr);
184       if (is_64bit)
185          first_component /= 2;
186 
187       if (indirect_offset.file != BAD_FILE) {
188          header = src_reg(this, glsl_type::uvec4_type);
189          emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
190               input_read_header, indirect_offset);
191       } else {
192          /* Arbitrarily only push up to 24 vec4 slots worth of data,
193           * which is 12 registers (since each holds 2 vec4 slots).
194           */
195          const unsigned max_push_slots = 24;
196          if (imm_offset < max_push_slots) {
197             const glsl_type *src_glsl_type =
198                is_64bit ? glsl_type::dvec4_type : glsl_type::ivec4_type;
199             src_reg src = src_reg(ATTR, imm_offset, src_glsl_type);
200             src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
201 
202             const brw_reg_type dst_reg_type =
203                is_64bit ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_D;
204             emit(MOV(get_nir_dest(instr->dest, dst_reg_type), src));
205 
206             prog_data->urb_read_length =
207                MAX2(prog_data->urb_read_length,
208                     DIV_ROUND_UP(imm_offset + (is_64bit ? 2 : 1), 2));
209             break;
210          }
211       }
212 
213       if (!is_64bit) {
214          dst_reg temp(this, glsl_type::ivec4_type);
215          vec4_instruction *read =
216             emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
217          read->offset = imm_offset;
218          read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
219 
220          src_reg src = src_reg(temp);
221          src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
222 
223          /* Copy to target.  We might end up with some funky writemasks landing
224           * in here, but we really don't want them in the above pseudo-ops.
225           */
226          dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
227          dst.writemask = brw_writemask_for_size(instr->num_components);
228          emit(MOV(dst, src));
229       } else {
230          /* For 64-bit we need to load twice as many 32-bit components, and for
231           * dvec3/4 we need to emit 2 URB Read messages
232           */
233          dst_reg temp(this, glsl_type::dvec4_type);
234          dst_reg temp_d = retype(temp, BRW_REGISTER_TYPE_D);
235 
236          vec4_instruction *read =
237             emit(VEC4_OPCODE_URB_READ, temp_d, src_reg(header));
238          read->offset = imm_offset;
239          read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
240 
241          if (instr->num_components > 2) {
242             read = emit(VEC4_OPCODE_URB_READ, byte_offset(temp_d, REG_SIZE),
243                         src_reg(header));
244             read->offset = imm_offset + 1;
245             read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
246          }
247 
248          src_reg temp_as_src = src_reg(temp);
249          temp_as_src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
250 
251          dst_reg shuffled(this, glsl_type::dvec4_type);
252          shuffle_64bit_data(shuffled, temp_as_src, false);
253 
254          dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF);
255          dst.writemask = brw_writemask_for_size(instr->num_components);
256          emit(MOV(dst, src_reg(shuffled)));
257       }
258       break;
259    }
260    default:
261       vec4_visitor::nir_emit_intrinsic(instr);
262    }
263 }
264 
265 
266 void
emit_thread_end()267 vec4_tes_visitor::emit_thread_end()
268 {
269    /* For DS, we always end the thread by emitting a single vertex.
270     * emit_urb_write_opcode() will take care of setting the eot flag on the
271     * SEND instruction.
272     */
273    emit_vertex();
274 }
275 
276 } /* namespace brw */
277