1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27 
28 #pragma once
29 
30 #include "brw_shader.h"
31 
32 extern "C" {
33 
34 #include <sys/types.h>
35 
36 #include "main/macros.h"
37 #include "main/shaderobj.h"
38 #include "main/uniforms.h"
39 #include "program/prog_parameter.h"
40 #include "program/prog_print.h"
41 #include "program/prog_optimize.h"
42 #include "program/register_allocate.h"
43 #include "program/sampler.h"
44 #include "program/hash_table.h"
45 #include "brw_context.h"
46 #include "brw_eu.h"
47 #include "brw_wm.h"
48 }
49 #include "glsl/glsl_types.h"
50 #include "glsl/ir.h"
51 
52 class fs_bblock;
53 namespace {
54    class acp_entry;
55 }
56 
57 enum register_file {
58    BAD_FILE,
59    ARF,
60    GRF,
61    MRF,
62    IMM,
63    FIXED_HW_REG, /* a struct brw_reg */
64    UNIFORM, /* prog_data->params[reg] */
65 };
66 
67 class fs_reg {
68 public:
69    /* Callers of this ralloc-based new need not call delete. It's
70     * easier to just ralloc_free 'ctx' (or any of its ancestors). */
new(size_t size,void * ctx)71    static void* operator new(size_t size, void *ctx)
72    {
73       void *node;
74 
75       node = ralloc_size(ctx, size);
76       assert(node != NULL);
77 
78       return node;
79    }
80 
81    void init();
82 
83    fs_reg();
84    fs_reg(float f);
85    fs_reg(int32_t i);
86    fs_reg(uint32_t u);
87    fs_reg(struct brw_reg fixed_hw_reg);
88    fs_reg(enum register_file file, int reg);
89    fs_reg(enum register_file file, int reg, uint32_t type);
90    fs_reg(class fs_visitor *v, const struct glsl_type *type);
91 
92    bool equals(const fs_reg &r) const;
93 
94    /** Register file: ARF, GRF, MRF, IMM. */
95    enum register_file file;
96    /**
97     * Register number.  For ARF/MRF, it's the hardware register.  For
98     * GRF, it's a virtual register number until register allocation
99     */
100    int reg;
101    /**
102     * For virtual registers, this is a hardware register offset from
103     * the start of the register block (for example, a constant index
104     * in an array access).
105     */
106    int reg_offset;
107    /** Register type.  BRW_REGISTER_TYPE_* */
108    int type;
109    bool negate;
110    bool abs;
111    bool sechalf;
112    struct brw_reg fixed_hw_reg;
113    int smear; /* -1, or a channel of the reg to smear to all channels. */
114 
115    /** Value for file == IMM */
116    union {
117       int32_t i;
118       uint32_t u;
119       float f;
120    } imm;
121 };
122 
123 static const fs_reg reg_undef;
124 static const fs_reg reg_null_f(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_F);
125 static const fs_reg reg_null_d(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_D);
126 
127 class fs_inst : public exec_node {
128 public:
129    /* Callers of this ralloc-based new need not call delete. It's
130     * easier to just ralloc_free 'ctx' (or any of its ancestors). */
new(size_t size,void * ctx)131    static void* operator new(size_t size, void *ctx)
132    {
133       void *node;
134 
135       node = rzalloc_size(ctx, size);
136       assert(node != NULL);
137 
138       return node;
139    }
140 
141    void init();
142 
143    fs_inst();
144    fs_inst(enum opcode opcode);
145    fs_inst(enum opcode opcode, fs_reg dst);
146    fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0);
147    fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1);
148    fs_inst(enum opcode opcode, fs_reg dst,
149            fs_reg src0, fs_reg src1,fs_reg src2);
150 
151    bool equals(fs_inst *inst);
152    int regs_written();
153    bool overwrites_reg(const fs_reg &reg);
154    bool is_tex();
155    bool is_math();
156 
157    enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
158    fs_reg dst;
159    fs_reg src[3];
160    bool saturate;
161    bool predicated;
162    bool predicate_inverse;
163    int conditional_mod; /**< BRW_CONDITIONAL_* */
164 
165    int mlen; /**< SEND message length */
166    int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
167    uint32_t texture_offset; /**< Texture offset bitfield */
168    int sampler;
169    int target; /**< MRT target. */
170    bool eot;
171    bool header_present;
172    bool shadow_compare;
173    bool force_uncompressed;
174    bool force_sechalf;
175    uint32_t offset; /* spill/unspill offset */
176 
177    /** @{
178     * Annotation for the generated IR.  One of the two can be set.
179     */
180    ir_instruction *ir;
181    const char *annotation;
182    /** @} */
183 };
184 
185 class fs_visitor : public ir_visitor
186 {
187 public:
188 
189    fs_visitor(struct brw_wm_compile *c, struct gl_shader_program *prog,
190               struct brw_shader *shader);
191    ~fs_visitor();
192 
193    fs_reg *variable_storage(ir_variable *var);
194    int virtual_grf_alloc(int size);
195    void import_uniforms(fs_visitor *v);
196 
197    void visit(ir_variable *ir);
198    void visit(ir_assignment *ir);
199    void visit(ir_dereference_variable *ir);
200    void visit(ir_dereference_record *ir);
201    void visit(ir_dereference_array *ir);
202    void visit(ir_expression *ir);
203    void visit(ir_texture *ir);
204    void visit(ir_if *ir);
205    void visit(ir_constant *ir);
206    void visit(ir_swizzle *ir);
207    void visit(ir_return *ir);
208    void visit(ir_loop *ir);
209    void visit(ir_loop_jump *ir);
210    void visit(ir_discard *ir);
211    void visit(ir_call *ir);
212    void visit(ir_function *ir);
213    void visit(ir_function_signature *ir);
214 
215    void swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler);
216 
217    fs_inst *emit(fs_inst inst);
218 
219    fs_inst *emit(enum opcode opcode);
220    fs_inst *emit(enum opcode opcode, fs_reg dst);
221    fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0);
222    fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1);
223    fs_inst *emit(enum opcode opcode, fs_reg dst,
224                  fs_reg src0, fs_reg src1, fs_reg src2);
225 
226    int type_size(const struct glsl_type *type);
227    fs_inst *get_instruction_generating_reg(fs_inst *start,
228 					   fs_inst *end,
229 					   fs_reg reg);
230 
231    bool run();
232    void setup_paramvalues_refs();
233    void assign_curb_setup();
234    void calculate_urb_setup();
235    void assign_urb_setup();
236    bool assign_regs();
237    void assign_regs_trivial();
238    int choose_spill_reg(struct ra_graph *g);
239    void spill_reg(int spill_reg);
240    void split_virtual_grfs();
241    void setup_pull_constants();
242    void calculate_live_intervals();
243    bool propagate_constants();
244    bool opt_algebraic();
245    bool opt_cse();
246    bool opt_cse_local(fs_bblock *block, exec_list *aeb);
247    bool opt_copy_propagate();
248    bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);
249    bool opt_copy_propagate_local(void *mem_ctx, fs_bblock *block,
250 				 exec_list *acp);
251    bool register_coalesce();
252    bool register_coalesce_2();
253    bool compute_to_mrf();
254    bool dead_code_eliminate();
255    bool remove_dead_constants();
256    bool remove_duplicate_mrf_writes();
257    bool virtual_grf_interferes(int a, int b);
258    void schedule_instructions();
259    void fail(const char *msg, ...);
260 
261    void push_force_uncompressed();
262    void pop_force_uncompressed();
263    void push_force_sechalf();
264    void pop_force_sechalf();
265 
266    void generate_code();
267    void generate_fb_write(fs_inst *inst);
268    void generate_pixel_xy(struct brw_reg dst, bool is_x);
269    void generate_linterp(fs_inst *inst, struct brw_reg dst,
270 			 struct brw_reg *src);
271    void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
272    void generate_math1_gen7(fs_inst *inst,
273 			    struct brw_reg dst,
274 			    struct brw_reg src);
275    void generate_math2_gen7(fs_inst *inst,
276 			    struct brw_reg dst,
277 			    struct brw_reg src0,
278 			    struct brw_reg src1);
279    void generate_math1_gen6(fs_inst *inst,
280 			    struct brw_reg dst,
281 			    struct brw_reg src);
282    void generate_math2_gen6(fs_inst *inst,
283 			    struct brw_reg dst,
284 			    struct brw_reg src0,
285 			    struct brw_reg src1);
286    void generate_math_gen4(fs_inst *inst,
287 			   struct brw_reg dst,
288 			   struct brw_reg src);
289    void generate_discard(fs_inst *inst);
290    void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
291    void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
292                      bool negate_value);
293    void generate_spill(fs_inst *inst, struct brw_reg src);
294    void generate_unspill(fs_inst *inst, struct brw_reg dst);
295    void generate_pull_constant_load(fs_inst *inst, struct brw_reg dst,
296 				    struct brw_reg index,
297 				    struct brw_reg offset);
298    void generate_mov_dispatch_to_flags();
299 
300    void emit_dummy_fs();
301    fs_reg *emit_fragcoord_interpolation(ir_variable *ir);
302    fs_inst *emit_linterp(const fs_reg &attr, const fs_reg &interp,
303                          glsl_interp_qualifier interpolation_mode,
304                          bool is_centroid);
305    fs_reg *emit_frontfacing_interpolation(ir_variable *ir);
306    fs_reg *emit_general_interpolation(ir_variable *ir);
307    void emit_interpolation_setup_gen4();
308    void emit_interpolation_setup_gen6();
309    fs_reg emit_texcoord(ir_texture *ir, int sampler, int texunit);
310    fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
311 			      fs_reg shadow_comp, fs_reg lod, fs_reg lod2);
312    fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
313 			      fs_reg shadow_comp, fs_reg lod, fs_reg lod2);
314    fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
315 			      fs_reg shadow_comp, fs_reg lod, fs_reg lod2);
316    fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0);
317    fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1);
318    bool try_emit_saturate(ir_expression *ir);
319    bool try_emit_mad(ir_expression *ir, int mul_arg);
320    void emit_bool_to_cond_code(ir_rvalue *condition);
321    void emit_if_gen6(ir_if *ir);
322    void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset);
323 
324    void emit_color_write(int target, int index, int first_color_mrf);
325    void emit_fb_writes();
326    bool try_rewrite_rhs_to_dst(ir_assignment *ir,
327 			       fs_reg dst,
328 			       fs_reg src,
329 			       fs_inst *pre_rhs_inst,
330 			       fs_inst *last_rhs_inst);
331    void emit_assignment_writes(fs_reg &l, fs_reg &r,
332 			       const glsl_type *type, bool predicated);
333    void resolve_ud_negate(fs_reg *reg);
334    void resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg);
335 
336    struct brw_reg interp_reg(int location, int channel);
337    int setup_uniform_values(int loc, const glsl_type *type);
338    void setup_builtin_uniform_values(ir_variable *ir);
339    int implied_mrf_writes(fs_inst *inst);
340 
341    struct brw_context *brw;
342    const struct gl_fragment_program *fp;
343    struct intel_context *intel;
344    struct gl_context *ctx;
345    struct brw_wm_compile *c;
346    struct brw_compile *p;
347    struct brw_shader *shader;
348    struct gl_shader_program *prog;
349    void *mem_ctx;
350    exec_list instructions;
351 
352    /* Delayed setup of c->prog_data.params[] due to realloc of
353     * ParamValues[] during compile.
354     */
355    int param_index[MAX_UNIFORMS * 4];
356    int param_offset[MAX_UNIFORMS * 4];
357 
358    int *virtual_grf_sizes;
359    int virtual_grf_count;
360    int virtual_grf_array_size;
361    int *virtual_grf_def;
362    int *virtual_grf_use;
363    bool live_intervals_valid;
364 
365    /* This is the map from UNIFORM hw_reg + reg_offset as generated by
366     * the visitor to the packed uniform number after
367     * remove_dead_constants() that represents the actual uploaded
368     * uniform index.
369     */
370    int *params_remap;
371 
372    struct hash_table *variable_ht;
373    ir_variable *frag_depth;
374    fs_reg outputs[BRW_MAX_DRAW_BUFFERS];
375    unsigned output_components[BRW_MAX_DRAW_BUFFERS];
376    fs_reg dual_src_output;
377    int first_non_payload_grf;
378    int max_grf;
379    int urb_setup[FRAG_ATTRIB_MAX];
380 
381    /** @{ debug annotation info */
382    const char *current_annotation;
383    ir_instruction *base_ir;
384    /** @} */
385 
386    bool failed;
387    char *fail_msg;
388 
389    /* Result of last visit() method. */
390    fs_reg result;
391 
392    fs_reg pixel_x;
393    fs_reg pixel_y;
394    fs_reg wpos_w;
395    fs_reg pixel_w;
396    fs_reg delta_x[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
397    fs_reg delta_y[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
398    fs_reg reg_null_cmp;
399 
400    int grf_used;
401 
402    int force_uncompressed_stack;
403    int force_sechalf_stack;
404 
405    class fs_bblock *bblock;
406 };
407 
408 bool brw_do_channel_expressions(struct exec_list *instructions);
409 bool brw_do_vector_splitting(struct exec_list *instructions);
410 bool brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog);
411