1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27 
28 #ifndef BRW_FS_H
29 #define BRW_FS_H
30 
31 #include "brw_shader.h"
32 #include "brw_ir_fs.h"
33 #include "brw_fs_builder.h"
34 #include "brw_fs_live_variables.h"
35 #include "brw_ir_performance.h"
36 #include "compiler/nir/nir.h"
37 
38 struct bblock_t;
39 namespace {
40    struct acp_entry;
41 }
42 
43 class fs_visitor;
44 
45 namespace brw {
46    /**
47     * Register pressure analysis of a shader.  Estimates how many registers
48     * are live at any point of the program in GRF units.
49     */
50    struct register_pressure {
51       register_pressure(const fs_visitor *v);
52       ~register_pressure();
53 
54       analysis_dependency_class
dependency_classregister_pressure55       dependency_class() const
56       {
57          return (DEPENDENCY_INSTRUCTION_IDENTITY |
58                  DEPENDENCY_INSTRUCTION_DATA_FLOW |
59                  DEPENDENCY_VARIABLES);
60       }
61 
62       bool
validateregister_pressure63       validate(const fs_visitor *) const
64       {
65          /* FINISHME */
66          return true;
67       }
68 
69       unsigned *regs_live_at_ip;
70    };
71 }
72 
73 struct brw_gs_compile;
74 
75 static inline fs_reg
offset(const fs_reg & reg,const brw::fs_builder & bld,unsigned delta)76 offset(const fs_reg &reg, const brw::fs_builder &bld, unsigned delta)
77 {
78    return offset(reg, bld.dispatch_width(), delta);
79 }
80 
81 #define UBO_START ((1 << 16) - 4)
82 
83 struct shader_stats {
84    const char *scheduler_mode;
85    unsigned promoted_constants;
86 };
87 
88 /**
89  * The fragment shader front-end.
90  *
91  * Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR.
92  */
93 class fs_visitor : public backend_shader
94 {
95 public:
96    fs_visitor(const struct brw_compiler *compiler, void *log_data,
97               void *mem_ctx,
98               const brw_base_prog_key *key,
99               struct brw_stage_prog_data *prog_data,
100               const nir_shader *shader,
101               unsigned dispatch_width,
102               int shader_time_index,
103               const struct brw_vue_map *input_vue_map = NULL);
104    fs_visitor(const struct brw_compiler *compiler, void *log_data,
105               void *mem_ctx,
106               struct brw_gs_compile *gs_compile,
107               struct brw_gs_prog_data *prog_data,
108               const nir_shader *shader,
109               int shader_time_index);
110    void init();
111    ~fs_visitor();
112 
113    fs_reg vgrf(const glsl_type *const type);
114    void import_uniforms(fs_visitor *v);
115 
116    void VARYING_PULL_CONSTANT_LOAD(const brw::fs_builder &bld,
117                                    const fs_reg &dst,
118                                    const fs_reg &surf_index,
119                                    const fs_reg &varying_offset,
120                                    uint32_t const_offset,
121                                    uint8_t alignment);
122    void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf);
123 
124    bool run_fs(bool allow_spilling, bool do_rep_send);
125    bool run_vs();
126    bool run_tcs();
127    bool run_tes();
128    bool run_gs();
129    bool run_cs(bool allow_spilling);
130    void optimize();
131    void allocate_registers(bool allow_spilling);
132    void setup_fs_payload_gen4();
133    void setup_fs_payload_gen6();
134    void setup_vs_payload();
135    void setup_gs_payload();
136    void setup_cs_payload();
137    bool fixup_sends_duplicate_payload();
138    void fixup_3src_null_dest();
139    bool fixup_nomask_control_flow();
140    void assign_curb_setup();
141    void assign_urb_setup();
142    void convert_attr_sources_to_hw_regs(fs_inst *inst);
143    void assign_vs_urb_setup();
144    void assign_tcs_urb_setup();
145    void assign_tes_urb_setup();
146    void assign_gs_urb_setup();
147    bool assign_regs(bool allow_spilling, bool spill_all);
148    void assign_regs_trivial();
149    void calculate_payload_ranges(int payload_node_count,
150                                  int *payload_last_use_ip) const;
151    void split_virtual_grfs();
152    bool compact_virtual_grfs();
153    void assign_constant_locations();
154    bool get_pull_locs(const fs_reg &src, unsigned *out_surf_index,
155                       unsigned *out_pull_index);
156    void lower_constant_loads();
157    virtual void invalidate_analysis(brw::analysis_dependency_class c);
158    void validate();
159    bool opt_algebraic();
160    bool opt_redundant_discard_jumps();
161    bool opt_cse();
162    bool opt_cse_local(const brw::fs_live_variables &live, bblock_t *block, int &ip);
163 
164    bool opt_copy_propagation();
165    bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);
166    bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
167    bool opt_copy_propagation_local(void *mem_ctx, bblock_t *block,
168                                    exec_list *acp);
169    bool opt_drop_redundant_mov_to_flags();
170    bool opt_register_renaming();
171    bool opt_bank_conflicts();
172    bool register_coalesce();
173    bool compute_to_mrf();
174    bool eliminate_find_live_channel();
175    bool dead_code_eliminate();
176    bool remove_duplicate_mrf_writes();
177    bool remove_extra_rounding_modes();
178 
179    void schedule_instructions(instruction_scheduler_mode mode);
180    void insert_gen4_send_dependency_workarounds();
181    void insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
182                                                     fs_inst *inst);
183    void insert_gen4_post_send_dependency_workarounds(bblock_t *block,
184                                                      fs_inst *inst);
185    void vfail(const char *msg, va_list args);
186    void fail(const char *msg, ...);
187    void limit_dispatch_width(unsigned n, const char *msg);
188    void lower_uniform_pull_constant_loads();
189    bool lower_load_payload();
190    bool lower_pack();
191    bool lower_regioning();
192    bool lower_logical_sends();
193    bool lower_integer_multiplication();
194    bool lower_minmax();
195    bool lower_simd_width();
196    bool lower_barycentrics();
197    bool lower_scoreboard();
198    bool lower_sub_sat();
199    bool opt_combine_constants();
200 
201    void emit_dummy_fs();
202    void emit_repclear_shader();
203    void emit_fragcoord_interpolation(fs_reg wpos);
204    fs_reg *emit_frontfacing_interpolation();
205    fs_reg *emit_samplepos_setup();
206    fs_reg *emit_sampleid_setup();
207    fs_reg *emit_samplemaskin_setup();
208    void emit_interpolation_setup_gen4();
209    void emit_interpolation_setup_gen6();
210    void compute_sample_position(fs_reg dst, fs_reg int_sample_pos);
211    fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
212                          const fs_reg &texture,
213                          const fs_reg &texture_handle);
214    void emit_gen6_gather_wa(uint8_t wa, fs_reg dst);
215    fs_reg resolve_source_modifiers(const fs_reg &src);
216    void emit_fsign(const class brw::fs_builder &, const nir_alu_instr *instr,
217                    fs_reg result, fs_reg *op, unsigned fsign_src);
218    void emit_shader_float_controls_execution_mode();
219    bool opt_peephole_sel();
220    bool opt_peephole_predicated_break();
221    bool opt_saturate_propagation();
222    bool opt_cmod_propagation();
223    bool opt_zero_samples();
224 
225    void set_tcs_invocation_id();
226 
227    void emit_nir_code();
228    void nir_setup_outputs();
229    void nir_setup_uniforms();
230    void nir_emit_system_values();
231    void nir_emit_impl(nir_function_impl *impl);
232    void nir_emit_cf_list(exec_list *list);
233    void nir_emit_if(nir_if *if_stmt);
234    void nir_emit_loop(nir_loop *loop);
235    void nir_emit_block(nir_block *block);
236    void nir_emit_instr(nir_instr *instr);
237    void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr,
238                      bool need_dest);
239    bool try_emit_b2fi_of_inot(const brw::fs_builder &bld, fs_reg result,
240                               nir_alu_instr *instr);
241    void nir_emit_load_const(const brw::fs_builder &bld,
242                             nir_load_const_instr *instr);
243    void nir_emit_vs_intrinsic(const brw::fs_builder &bld,
244                               nir_intrinsic_instr *instr);
245    void nir_emit_tcs_intrinsic(const brw::fs_builder &bld,
246                                nir_intrinsic_instr *instr);
247    void nir_emit_gs_intrinsic(const brw::fs_builder &bld,
248                               nir_intrinsic_instr *instr);
249    void nir_emit_fs_intrinsic(const brw::fs_builder &bld,
250                               nir_intrinsic_instr *instr);
251    void nir_emit_cs_intrinsic(const brw::fs_builder &bld,
252                               nir_intrinsic_instr *instr);
253    fs_reg get_nir_image_intrinsic_image(const brw::fs_builder &bld,
254                                         nir_intrinsic_instr *instr);
255    fs_reg get_nir_ssbo_intrinsic_index(const brw::fs_builder &bld,
256                                        nir_intrinsic_instr *instr);
257    fs_reg swizzle_nir_scratch_addr(const brw::fs_builder &bld,
258                                    const fs_reg &addr,
259                                    bool in_dwords);
260    void nir_emit_intrinsic(const brw::fs_builder &bld,
261                            nir_intrinsic_instr *instr);
262    void nir_emit_tes_intrinsic(const brw::fs_builder &bld,
263                                nir_intrinsic_instr *instr);
264    void nir_emit_ssbo_atomic(const brw::fs_builder &bld,
265                              int op, nir_intrinsic_instr *instr);
266    void nir_emit_ssbo_atomic_float(const brw::fs_builder &bld,
267                                    int op, nir_intrinsic_instr *instr);
268    void nir_emit_shared_atomic(const brw::fs_builder &bld,
269                                int op, nir_intrinsic_instr *instr);
270    void nir_emit_shared_atomic_float(const brw::fs_builder &bld,
271                                      int op, nir_intrinsic_instr *instr);
272    void nir_emit_global_atomic(const brw::fs_builder &bld,
273                                int op, nir_intrinsic_instr *instr);
274    void nir_emit_global_atomic_float(const brw::fs_builder &bld,
275                                      int op, nir_intrinsic_instr *instr);
276    void nir_emit_texture(const brw::fs_builder &bld,
277                          nir_tex_instr *instr);
278    void nir_emit_jump(const brw::fs_builder &bld,
279                       nir_jump_instr *instr);
280    fs_reg get_nir_src(const nir_src &src);
281    fs_reg get_nir_src_imm(const nir_src &src);
282    fs_reg get_nir_dest(const nir_dest &dest);
283    fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
284    fs_reg get_tcs_single_patch_icp_handle(const brw::fs_builder &bld,
285                                           nir_intrinsic_instr *instr);
286    fs_reg get_tcs_eight_patch_icp_handle(const brw::fs_builder &bld,
287                                          nir_intrinsic_instr *instr);
288    struct brw_reg get_tcs_output_urb_handle();
289 
290    void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
291                      unsigned wr_mask);
292 
293    bool optimize_extract_to_float(nir_alu_instr *instr,
294                                   const fs_reg &result);
295    bool optimize_frontfacing_ternary(nir_alu_instr *instr,
296                                      const fs_reg &result);
297 
298    void emit_alpha_test();
299    fs_inst *emit_single_fb_write(const brw::fs_builder &bld,
300                                  fs_reg color1, fs_reg color2,
301                                  fs_reg src0_alpha, unsigned components);
302    void emit_alpha_to_coverage_workaround(const fs_reg &src0_alpha);
303    void emit_fb_writes();
304    fs_inst *emit_non_coherent_fb_read(const brw::fs_builder &bld,
305                                       const fs_reg &dst, unsigned target);
306    void emit_urb_writes(const fs_reg &gs_vertex_count = fs_reg());
307    void set_gs_stream_control_data_bits(const fs_reg &vertex_count,
308                                         unsigned stream_id);
309    void emit_gs_control_data_bits(const fs_reg &vertex_count);
310    void emit_gs_end_primitive(const nir_src &vertex_count_nir_src);
311    void emit_gs_vertex(const nir_src &vertex_count_nir_src,
312                        unsigned stream_id);
313    void emit_gs_thread_end();
314    void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src,
315                            unsigned base_offset, const nir_src &offset_src,
316                            unsigned num_components, unsigned first_component);
317    void emit_cs_terminate();
318    fs_reg *emit_cs_work_group_id_setup();
319 
320    void emit_barrier();
321 
322    void emit_shader_time_begin();
323    void emit_shader_time_end();
324    void SHADER_TIME_ADD(const brw::fs_builder &bld,
325                         int shader_time_subindex,
326                         fs_reg value);
327 
328    fs_reg get_timestamp(const brw::fs_builder &bld);
329 
330    fs_reg interp_reg(int location, int channel);
331 
332    virtual void dump_instructions() const;
333    virtual void dump_instructions(const char *name) const;
334    void dump_instruction(const backend_instruction *inst) const;
335    void dump_instruction(const backend_instruction *inst, FILE *file) const;
336 
337    const brw_base_prog_key *const key;
338    const struct brw_sampler_prog_key_data *key_tex;
339 
340    struct brw_gs_compile *gs_compile;
341 
342    struct brw_stage_prog_data *prog_data;
343 
344    const struct brw_vue_map *input_vue_map;
345 
346    brw_analysis<brw::fs_live_variables, backend_shader> live_analysis;
347    brw_analysis<brw::register_pressure, fs_visitor> regpressure_analysis;
348    brw_analysis<brw::performance, fs_visitor> performance_analysis;
349 
350    /** Number of uniform variable components visited. */
351    unsigned uniforms;
352 
353    /** Byte-offset for the next available spot in the scratch space buffer. */
354    unsigned last_scratch;
355 
356    /**
357     * Array mapping UNIFORM register numbers to the pull parameter index,
358     * or -1 if this uniform register isn't being uploaded as a pull constant.
359     */
360    int *pull_constant_loc;
361 
362    /**
363     * Array mapping UNIFORM register numbers to the push parameter index,
364     * or -1 if this uniform register isn't being uploaded as a push constant.
365     */
366    int *push_constant_loc;
367 
368    fs_reg subgroup_id;
369    fs_reg group_size[3];
370    fs_reg scratch_base;
371    fs_reg frag_depth;
372    fs_reg frag_stencil;
373    fs_reg sample_mask;
374    fs_reg outputs[VARYING_SLOT_MAX];
375    fs_reg dual_src_output;
376    int first_non_payload_grf;
377    /** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */
378    unsigned max_grf;
379 
380    fs_reg *nir_locals;
381    fs_reg *nir_ssa_values;
382    fs_reg *nir_system_values;
383 
384    bool failed;
385    char *fail_msg;
386 
387    /** Register numbers for thread payload fields. */
388    struct thread_payload {
389       uint8_t subspan_coord_reg[2];
390       uint8_t source_depth_reg[2];
391       uint8_t source_w_reg[2];
392       uint8_t aa_dest_stencil_reg[2];
393       uint8_t dest_depth_reg[2];
394       uint8_t sample_pos_reg[2];
395       uint8_t sample_mask_in_reg[2];
396       uint8_t barycentric_coord_reg[BRW_BARYCENTRIC_MODE_COUNT][2];
397       uint8_t local_invocation_id_reg[2];
398 
399       /** The number of thread payload registers the hardware will supply. */
400       uint8_t num_regs;
401    } payload;
402 
403    bool source_depth_to_render_target;
404    bool runtime_check_aads_emit;
405 
406    fs_reg pixel_x;
407    fs_reg pixel_y;
408    fs_reg wpos_w;
409    fs_reg pixel_w;
410    fs_reg delta_xy[BRW_BARYCENTRIC_MODE_COUNT];
411    fs_reg shader_start_time;
412    fs_reg final_gs_vertex_count;
413    fs_reg control_data_bits;
414    fs_reg invocation_id;
415 
416    unsigned grf_used;
417    bool spilled_any_registers;
418 
419    const unsigned dispatch_width; /**< 8, 16 or 32 */
420    unsigned max_dispatch_width;
421 
422    int shader_time_index;
423 
424    struct shader_stats shader_stats;
425 
426    brw::fs_builder bld;
427 
428 private:
429    fs_reg prepare_alu_destination_and_sources(const brw::fs_builder &bld,
430                                               nir_alu_instr *instr,
431                                               fs_reg *op,
432                                               bool need_dest);
433 
434    void resolve_inot_sources(const brw::fs_builder &bld, nir_alu_instr *instr,
435                              fs_reg *op);
436    void lower_mul_dword_inst(fs_inst *inst, bblock_t *block);
437    void lower_mul_qword_inst(fs_inst *inst, bblock_t *block);
438    void lower_mulh_inst(fs_inst *inst, bblock_t *block);
439 
440    unsigned workgroup_size() const;
441 };
442 
443 /**
444  * Return the flag register used in fragment shaders to keep track of live
445  * samples.  On Gen7+ we use f1.0-f1.1 to allow discard jumps in SIMD32
446  * dispatch mode, while earlier generations are constrained to f0.1, which
447  * limits the dispatch width to SIMD16 for fragment shaders that use discard.
448  */
449 static inline unsigned
sample_mask_flag_subreg(const fs_visitor * shader)450 sample_mask_flag_subreg(const fs_visitor *shader)
451 {
452    assert(shader->stage == MESA_SHADER_FRAGMENT);
453    return shader->devinfo->gen >= 7 ? 2 : 1;
454 }
455 
456 /**
457  * The fragment shader code generator.
458  *
459  * Translates FS IR to actual i965 assembly code.
460  */
461 class fs_generator
462 {
463 public:
464    fs_generator(const struct brw_compiler *compiler, void *log_data,
465                 void *mem_ctx,
466                 struct brw_stage_prog_data *prog_data,
467                 bool runtime_check_aads_emit,
468                 gl_shader_stage stage);
469    ~fs_generator();
470 
471    void enable_debug(const char *shader_name);
472    int generate_code(const cfg_t *cfg, int dispatch_width,
473                      struct shader_stats shader_stats,
474                      const brw::performance &perf,
475                      struct brw_compile_stats *stats);
476    void add_const_data(void *data, unsigned size);
477    const unsigned *get_assembly();
478 
479 private:
480    void fire_fb_write(fs_inst *inst,
481                       struct brw_reg payload,
482                       struct brw_reg implied_header,
483                       GLuint nr);
484    void generate_send(fs_inst *inst,
485                       struct brw_reg dst,
486                       struct brw_reg desc,
487                       struct brw_reg ex_desc,
488                       struct brw_reg payload,
489                       struct brw_reg payload2);
490    void generate_fb_write(fs_inst *inst, struct brw_reg payload);
491    void generate_fb_read(fs_inst *inst, struct brw_reg dst,
492                          struct brw_reg payload);
493    void generate_urb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload);
494    void generate_urb_write(fs_inst *inst, struct brw_reg payload);
495    void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
496    void generate_barrier(fs_inst *inst, struct brw_reg src);
497    bool generate_linterp(fs_inst *inst, struct brw_reg dst,
498 			 struct brw_reg *src);
499    void generate_tex(fs_inst *inst, struct brw_reg dst,
500                      struct brw_reg surface_index,
501                      struct brw_reg sampler_index);
502    void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst,
503                                  struct brw_reg src,
504                                  struct brw_reg surf_index);
505    void generate_ddx(const fs_inst *inst,
506                      struct brw_reg dst, struct brw_reg src);
507    void generate_ddy(const fs_inst *inst,
508                      struct brw_reg dst, struct brw_reg src);
509    void generate_scratch_write(fs_inst *inst, struct brw_reg src);
510    void generate_scratch_read(fs_inst *inst, struct brw_reg dst);
511    void generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst);
512    void generate_scratch_header(fs_inst *inst, struct brw_reg dst);
513    void generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst,
514                                             struct brw_reg index,
515                                             struct brw_reg offset);
516    void generate_uniform_pull_constant_load_gen7(fs_inst *inst,
517                                                  struct brw_reg dst,
518                                                  struct brw_reg surf_index,
519                                                  struct brw_reg payload);
520    void generate_varying_pull_constant_load_gen4(fs_inst *inst,
521                                                  struct brw_reg dst,
522                                                  struct brw_reg index);
523    void generate_mov_dispatch_to_flags(fs_inst *inst);
524 
525    void generate_pixel_interpolator_query(fs_inst *inst,
526                                           struct brw_reg dst,
527                                           struct brw_reg src,
528                                           struct brw_reg msg_data,
529                                           unsigned msg_type);
530 
531    void generate_set_sample_id(fs_inst *inst,
532                                struct brw_reg dst,
533                                struct brw_reg src0,
534                                struct brw_reg src1);
535 
536    void generate_discard_jump(fs_inst *inst);
537 
538    void generate_pack_half_2x16_split(fs_inst *inst,
539                                       struct brw_reg dst,
540                                       struct brw_reg x,
541                                       struct brw_reg y);
542 
543    void generate_shader_time_add(fs_inst *inst,
544                                  struct brw_reg payload,
545                                  struct brw_reg offset,
546                                  struct brw_reg value);
547 
548    void generate_mov_indirect(fs_inst *inst,
549                               struct brw_reg dst,
550                               struct brw_reg reg,
551                               struct brw_reg indirect_byte_offset);
552 
553    void generate_shuffle(fs_inst *inst,
554                          struct brw_reg dst,
555                          struct brw_reg src,
556                          struct brw_reg idx);
557 
558    void generate_quad_swizzle(const fs_inst *inst,
559                               struct brw_reg dst, struct brw_reg src,
560                               unsigned swiz);
561 
562    bool patch_discard_jumps_to_fb_writes();
563 
564    const struct brw_compiler *compiler;
565    void *log_data; /* Passed to compiler->*_log functions */
566 
567    const struct gen_device_info *devinfo;
568 
569    struct brw_codegen *p;
570    struct brw_stage_prog_data * const prog_data;
571 
572    unsigned dispatch_width; /**< 8, 16 or 32 */
573 
574    exec_list discard_halt_patches;
575    bool runtime_check_aads_emit;
576    bool debug_flag;
577    const char *shader_name;
578    gl_shader_stage stage;
579    void *mem_ctx;
580 };
581 
582 namespace brw {
583    inline fs_reg
584    fetch_payload_reg(const brw::fs_builder &bld, uint8_t regs[2],
585                      brw_reg_type type = BRW_REGISTER_TYPE_F)
586    {
587       if (!regs[0])
588          return fs_reg();
589 
590       if (bld.dispatch_width() > 16) {
591          const fs_reg tmp = bld.vgrf(type);
592          const brw::fs_builder hbld = bld.exec_all().group(16, 0);
593          const unsigned m = bld.dispatch_width() / hbld.dispatch_width();
594          fs_reg *const components = new fs_reg[m];
595 
596          for (unsigned g = 0; g < m; g++)
597                components[g] = retype(brw_vec8_grf(regs[g], 0), type);
598 
599          hbld.LOAD_PAYLOAD(tmp, components, m, 0);
600 
601          delete[] components;
602          return tmp;
603 
604       } else {
605          return fs_reg(retype(brw_vec8_grf(regs[0], 0), type));
606       }
607    }
608 
609    inline fs_reg
fetch_barycentric_reg(const brw::fs_builder & bld,uint8_t regs[2])610    fetch_barycentric_reg(const brw::fs_builder &bld, uint8_t regs[2])
611    {
612       if (!regs[0])
613          return fs_reg();
614 
615       const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
616       const brw::fs_builder hbld = bld.exec_all().group(8, 0);
617       const unsigned m = bld.dispatch_width() / hbld.dispatch_width();
618       fs_reg *const components = new fs_reg[2 * m];
619 
620       for (unsigned c = 0; c < 2; c++) {
621          for (unsigned g = 0; g < m; g++)
622             components[c * m + g] = offset(brw_vec8_grf(regs[g / 2], 0),
623                                            hbld, c + 2 * (g % 2));
624       }
625 
626       hbld.LOAD_PAYLOAD(tmp, components, 2 * m, 0);
627 
628       delete[] components;
629       return tmp;
630    }
631 
632    bool
633    lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i);
634 }
635 
636 void shuffle_from_32bit_read(const brw::fs_builder &bld,
637                              const fs_reg &dst,
638                              const fs_reg &src,
639                              uint32_t first_component,
640                              uint32_t components);
641 
642 fs_reg setup_imm_df(const brw::fs_builder &bld,
643                     double v);
644 
645 fs_reg setup_imm_b(const brw::fs_builder &bld,
646                    int8_t v);
647 
648 fs_reg setup_imm_ub(const brw::fs_builder &bld,
649                    uint8_t v);
650 
651 enum brw_barycentric_mode brw_barycentric_mode(enum glsl_interp_mode mode,
652                                                nir_intrinsic_op op);
653 
654 uint32_t brw_fb_write_msg_control(const fs_inst *inst,
655                                   const struct brw_wm_prog_data *prog_data);
656 
657 void brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data);
658 
659 #endif /* BRW_FS_H */
660