1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef BRW_VEC4_H
25 #define BRW_VEC4_H
26 
27 #include "brw_shader.h"
28 
29 #ifdef __cplusplus
30 #include "brw_ir_vec4.h"
31 #include "brw_ir_performance.h"
32 #include "brw_vec4_builder.h"
33 #include "brw_vec4_live_variables.h"
34 #endif
35 
36 #include "compiler/glsl/ir.h"
37 #include "compiler/nir/nir.h"
38 
39 
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
43 
44 const unsigned *
45 brw_vec4_generate_assembly(const struct brw_compiler *compiler,
46                            void *log_data,
47                            void *mem_ctx,
48                            const nir_shader *nir,
49                            struct brw_vue_prog_data *prog_data,
50                            const struct cfg_t *cfg,
51                            const brw::performance &perf,
52                            struct brw_compile_stats *stats);
53 
54 #ifdef __cplusplus
55 } /* extern "C" */
56 
57 namespace brw {
58 /**
59  * The vertex shader front-end.
60  *
61  * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
62  * fixed-function) into VS IR.
63  */
64 class vec4_visitor : public backend_shader
65 {
66 public:
67    vec4_visitor(const struct brw_compiler *compiler,
68                 void *log_data,
69                 const struct brw_sampler_prog_key_data *key,
70                 struct brw_vue_prog_data *prog_data,
71                 const nir_shader *shader,
72 		void *mem_ctx,
73                 bool no_spills,
74                 int shader_time_index);
75 
dst_null_f()76    dst_reg dst_null_f()
77    {
78       return dst_reg(brw_null_reg());
79    }
80 
dst_null_df()81    dst_reg dst_null_df()
82    {
83       return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_DF));
84    }
85 
dst_null_d()86    dst_reg dst_null_d()
87    {
88       return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
89    }
90 
dst_null_ud()91    dst_reg dst_null_ud()
92    {
93       return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
94    }
95 
96    const struct brw_sampler_prog_key_data * const key_tex;
97    struct brw_vue_prog_data * const prog_data;
98    char *fail_msg;
99    bool failed;
100 
101    /**
102     * GLSL IR currently being processed, which is associated with our
103     * driver IR instructions for debugging purposes.
104     */
105    const void *base_ir;
106    const char *current_annotation;
107 
108    int first_non_payload_grf;
109    unsigned int max_grf;
110    brw_analysis<brw::vec4_live_variables, backend_shader> live_analysis;
111    brw_analysis<brw::performance, vec4_visitor> performance_analysis;
112 
113    bool need_all_constants_in_pull_buffer;
114 
115    /* Regs for vertex results.  Generated at ir_variable visiting time
116     * for the ir->location's used.
117     */
118    dst_reg output_reg[VARYING_SLOT_TESS_MAX][4];
119    unsigned output_num_components[VARYING_SLOT_TESS_MAX][4];
120    const char *output_reg_annotation[VARYING_SLOT_TESS_MAX];
121    int uniforms;
122 
123    src_reg shader_start_time;
124 
125    bool run();
126    void fail(const char *msg, ...);
127 
128    int setup_uniforms(int payload_reg);
129 
130    bool reg_allocate_trivial();
131    bool reg_allocate();
132    void evaluate_spill_costs(float *spill_costs, bool *no_spill);
133    int choose_spill_reg(struct ra_graph *g);
134    void spill_reg(unsigned spill_reg);
135    void move_grf_array_access_to_scratch();
136    void move_uniform_array_access_to_pull_constants();
137    void move_push_constants_to_pull_constants();
138    void split_uniform_registers();
139    void pack_uniform_registers();
140    virtual void invalidate_analysis(brw::analysis_dependency_class c);
141    void split_virtual_grfs();
142    bool opt_vector_float();
143    bool opt_reduce_swizzle();
144    bool dead_code_eliminate();
145    bool opt_cmod_propagation();
146    bool opt_copy_propagation(bool do_constant_prop = true);
147    bool opt_cse_local(bblock_t *block, const vec4_live_variables &live);
148    bool opt_cse();
149    bool opt_algebraic();
150    bool opt_register_coalesce();
151    bool eliminate_find_live_channel();
152    bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
153    void opt_set_dependency_control();
154    void opt_schedule_instructions();
155    void convert_to_hw_regs();
156    void fixup_3src_null_dest();
157 
158    bool is_supported_64bit_region(vec4_instruction *inst, unsigned arg);
159    bool lower_simd_width();
160    bool scalarize_df();
161    bool lower_64bit_mad_to_mul_add();
162    void apply_logical_swizzle(struct brw_reg *hw_reg,
163                               vec4_instruction *inst, int arg);
164 
165    vec4_instruction *emit(vec4_instruction *inst);
166 
167    vec4_instruction *emit(enum opcode opcode);
168    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
169    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
170                           const src_reg &src0);
171    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
172                           const src_reg &src0, const src_reg &src1);
173    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
174                           const src_reg &src0, const src_reg &src1,
175                           const src_reg &src2);
176 
177    vec4_instruction *emit_before(bblock_t *block,
178                                  vec4_instruction *inst,
179 				 vec4_instruction *new_inst);
180 
181 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
182 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
183 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
184    EMIT1(MOV)
185    EMIT1(NOT)
186    EMIT1(RNDD)
187    EMIT1(RNDE)
188    EMIT1(RNDZ)
189    EMIT1(FRC)
190    EMIT1(F32TO16)
191    EMIT1(F16TO32)
192    EMIT2(ADD)
193    EMIT2(MUL)
194    EMIT2(MACH)
195    EMIT2(MAC)
196    EMIT2(AND)
197    EMIT2(OR)
198    EMIT2(XOR)
199    EMIT2(DP3)
200    EMIT2(DP4)
201    EMIT2(DPH)
202    EMIT2(SHL)
203    EMIT2(SHR)
204    EMIT2(ASR)
205    vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
206 			 enum brw_conditional_mod condition);
207    vec4_instruction *IF(src_reg src0, src_reg src1,
208                         enum brw_conditional_mod condition);
209    vec4_instruction *IF(enum brw_predicate predicate);
210    EMIT1(SCRATCH_READ)
211    EMIT2(SCRATCH_WRITE)
212    EMIT3(LRP)
213    EMIT1(BFREV)
214    EMIT3(BFE)
215    EMIT2(BFI1)
216    EMIT3(BFI2)
217    EMIT1(FBH)
218    EMIT1(FBL)
219    EMIT1(CBIT)
220    EMIT3(MAD)
221    EMIT2(ADDC)
222    EMIT2(SUBB)
223    EMIT1(DIM)
224 
225 #undef EMIT1
226 #undef EMIT2
227 #undef EMIT3
228 
229    vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
230                                  src_reg src0, src_reg src1);
231 
232    /**
233     * Copy any live channel from \p src to the first channel of the
234     * result.
235     */
236    src_reg emit_uniformize(const src_reg &src);
237 
238    /** Fix all float operands of a 3-source instruction. */
239    void fix_float_operands(src_reg op[3], nir_alu_instr *instr);
240 
241    src_reg fix_3src_operand(const src_reg &src);
242 
243    vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
244                                const src_reg &src1 = src_reg());
245 
246    src_reg fix_math_operand(const src_reg &src);
247 
248    void emit_pack_half_2x16(dst_reg dst, src_reg src0);
249    void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
250    void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
251    void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
252    void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
253    void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
254 
255    void emit_texture(ir_texture_opcode op,
256                      dst_reg dest,
257                      const glsl_type *dest_type,
258                      src_reg coordinate,
259                      int coord_components,
260                      src_reg shadow_comparator,
261                      src_reg lod, src_reg lod2,
262                      src_reg sample_index,
263                      uint32_t constant_offset,
264                      src_reg offset_value,
265                      src_reg mcs,
266                      uint32_t surface, src_reg surface_reg,
267                      src_reg sampler_reg);
268 
269    src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
270                           src_reg surface);
271    void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
272 
273    void emit_ndc_computation();
274    void emit_psiz_and_flags(dst_reg reg);
275    vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying, int comp);
276    virtual void emit_urb_slot(dst_reg reg, int varying);
277 
278    void emit_shader_time_begin();
279    void emit_shader_time_end();
280    void emit_shader_time_write(int shader_time_subindex, src_reg value);
281 
282    src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
283 			      src_reg *reladdr, int reg_offset);
284    void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
285 			  dst_reg dst,
286 			  src_reg orig_src,
287 			  int base_offset);
288    void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
289 			   int base_offset);
290    void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
291 				dst_reg dst,
292 				src_reg orig_src,
293                                 int base_offset,
294                                 src_reg indirect);
295    void emit_pull_constant_load_reg(dst_reg dst,
296                                     src_reg surf_index,
297                                     src_reg offset,
298                                     bblock_t *before_block,
299                                     vec4_instruction *before_inst);
300    src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
301                                 vec4_instruction *inst, src_reg src);
302 
303    void resolve_ud_negate(src_reg *reg);
304 
305    bool lower_minmax();
306 
307    src_reg get_timestamp();
308 
309    void dump_instruction(const backend_instruction *inst) const;
310    void dump_instruction(const backend_instruction *inst, FILE *file) const;
311 
312    bool is_high_sampler(src_reg sampler);
313 
314    bool optimize_predicate(nir_alu_instr *instr, enum brw_predicate *predicate);
315 
316    void emit_conversion_from_double(dst_reg dst, src_reg src);
317    void emit_conversion_to_double(dst_reg dst, src_reg src);
318 
319    vec4_instruction *shuffle_64bit_data(dst_reg dst, src_reg src,
320                                         bool for_write,
321                                         bblock_t *block = NULL,
322                                         vec4_instruction *ref = NULL);
323 
324    virtual void emit_nir_code();
325    virtual void nir_setup_uniforms();
326    virtual void nir_emit_impl(nir_function_impl *impl);
327    virtual void nir_emit_cf_list(exec_list *list);
328    virtual void nir_emit_if(nir_if *if_stmt);
329    virtual void nir_emit_loop(nir_loop *loop);
330    virtual void nir_emit_block(nir_block *block);
331    virtual void nir_emit_instr(nir_instr *instr);
332    virtual void nir_emit_load_const(nir_load_const_instr *instr);
333    src_reg get_nir_ssbo_intrinsic_index(nir_intrinsic_instr *instr);
334    virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
335    virtual void nir_emit_alu(nir_alu_instr *instr);
336    virtual void nir_emit_jump(nir_jump_instr *instr);
337    virtual void nir_emit_texture(nir_tex_instr *instr);
338    virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
339    virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
340 
341    dst_reg get_nir_dest(const nir_dest &dest, enum brw_reg_type type);
342    dst_reg get_nir_dest(const nir_dest &dest, nir_alu_type type);
343    dst_reg get_nir_dest(const nir_dest &dest);
344    src_reg get_nir_src(const nir_src &src, enum brw_reg_type type,
345                        unsigned num_components = 4);
346    src_reg get_nir_src(const nir_src &src, nir_alu_type type,
347                        unsigned num_components = 4);
348    src_reg get_nir_src(const nir_src &src,
349                        unsigned num_components = 4);
350    src_reg get_nir_src_imm(const nir_src &src);
351    src_reg get_indirect_offset(nir_intrinsic_instr *instr);
352 
353    dst_reg *nir_locals;
354    dst_reg *nir_ssa_values;
355 
356 protected:
357    void emit_vertex();
358    void setup_payload_interference(struct ra_graph *g, int first_payload_node,
359                                    int reg_node_count);
360    virtual void setup_payload() = 0;
361    virtual void emit_prolog() = 0;
362    virtual void emit_thread_end() = 0;
363    virtual void emit_urb_write_header(int mrf) = 0;
364    virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
365    virtual void gs_emit_vertex(int stream_id);
366    virtual void gs_end_primitive();
367 
368 private:
369    /**
370     * If true, then register allocation should fail instead of spilling.
371     */
372    const bool no_spills;
373 
374    int shader_time_index;
375 
376    unsigned last_scratch; /**< measured in 32-byte (register size) units */
377 };
378 
379 } /* namespace brw */
380 #endif /* __cplusplus */
381 
382 #endif /* BRW_VEC4_H */
383