1 /* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #pragma once 29 30 #include "brw_shader.h" 31 32 extern "C" { 33 34 #include <sys/types.h> 35 36 #include "main/macros.h" 37 #include "main/shaderobj.h" 38 #include "main/uniforms.h" 39 #include "program/prog_parameter.h" 40 #include "program/prog_print.h" 41 #include "program/prog_optimize.h" 42 #include "program/register_allocate.h" 43 #include "program/sampler.h" 44 #include "program/hash_table.h" 45 #include "brw_context.h" 46 #include "brw_eu.h" 47 #include "brw_wm.h" 48 } 49 #include "glsl/glsl_types.h" 50 #include "glsl/ir.h" 51 52 class fs_bblock; 53 namespace { 54 class acp_entry; 55 } 56 57 enum register_file { 58 BAD_FILE, 59 ARF, 60 GRF, 61 MRF, 62 IMM, 63 FIXED_HW_REG, /* a struct brw_reg */ 64 UNIFORM, /* prog_data->params[reg] */ 65 }; 66 67 class fs_reg { 68 public: 69 /* Callers of this ralloc-based new need not call delete. It's 70 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ new(size_t size,void * ctx)71 static void* operator new(size_t size, void *ctx) 72 { 73 void *node; 74 75 node = ralloc_size(ctx, size); 76 assert(node != NULL); 77 78 return node; 79 } 80 81 void init(); 82 83 fs_reg(); 84 fs_reg(float f); 85 fs_reg(int32_t i); 86 fs_reg(uint32_t u); 87 fs_reg(struct brw_reg fixed_hw_reg); 88 fs_reg(enum register_file file, int reg); 89 fs_reg(enum register_file file, int reg, uint32_t type); 90 fs_reg(class fs_visitor *v, const struct glsl_type *type); 91 92 bool equals(const fs_reg &r) const; 93 94 /** Register file: ARF, GRF, MRF, IMM. */ 95 enum register_file file; 96 /** 97 * Register number. For ARF/MRF, it's the hardware register. For 98 * GRF, it's a virtual register number until register allocation 99 */ 100 int reg; 101 /** 102 * For virtual registers, this is a hardware register offset from 103 * the start of the register block (for example, a constant index 104 * in an array access). 105 */ 106 int reg_offset; 107 /** Register type. BRW_REGISTER_TYPE_* */ 108 int type; 109 bool negate; 110 bool abs; 111 bool sechalf; 112 struct brw_reg fixed_hw_reg; 113 int smear; /* -1, or a channel of the reg to smear to all channels. */ 114 115 /** Value for file == IMM */ 116 union { 117 int32_t i; 118 uint32_t u; 119 float f; 120 } imm; 121 }; 122 123 static const fs_reg reg_undef; 124 static const fs_reg reg_null_f(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_F); 125 static const fs_reg reg_null_d(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_D); 126 127 class fs_inst : public exec_node { 128 public: 129 /* Callers of this ralloc-based new need not call delete. It's 130 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ new(size_t size,void * ctx)131 static void* operator new(size_t size, void *ctx) 132 { 133 void *node; 134 135 node = rzalloc_size(ctx, size); 136 assert(node != NULL); 137 138 return node; 139 } 140 141 void init(); 142 143 fs_inst(); 144 fs_inst(enum opcode opcode); 145 fs_inst(enum opcode opcode, fs_reg dst); 146 fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0); 147 fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1); 148 fs_inst(enum opcode opcode, fs_reg dst, 149 fs_reg src0, fs_reg src1,fs_reg src2); 150 151 bool equals(fs_inst *inst); 152 int regs_written(); 153 bool overwrites_reg(const fs_reg ®); 154 bool is_tex(); 155 bool is_math(); 156 157 enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ 158 fs_reg dst; 159 fs_reg src[3]; 160 bool saturate; 161 bool predicated; 162 bool predicate_inverse; 163 int conditional_mod; /**< BRW_CONDITIONAL_* */ 164 165 int mlen; /**< SEND message length */ 166 int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */ 167 uint32_t texture_offset; /**< Texture offset bitfield */ 168 int sampler; 169 int target; /**< MRT target. */ 170 bool eot; 171 bool header_present; 172 bool shadow_compare; 173 bool force_uncompressed; 174 bool force_sechalf; 175 uint32_t offset; /* spill/unspill offset */ 176 177 /** @{ 178 * Annotation for the generated IR. One of the two can be set. 179 */ 180 ir_instruction *ir; 181 const char *annotation; 182 /** @} */ 183 }; 184 185 class fs_visitor : public ir_visitor 186 { 187 public: 188 189 fs_visitor(struct brw_wm_compile *c, struct gl_shader_program *prog, 190 struct brw_shader *shader); 191 ~fs_visitor(); 192 193 fs_reg *variable_storage(ir_variable *var); 194 int virtual_grf_alloc(int size); 195 void import_uniforms(fs_visitor *v); 196 197 void visit(ir_variable *ir); 198 void visit(ir_assignment *ir); 199 void visit(ir_dereference_variable *ir); 200 void visit(ir_dereference_record *ir); 201 void visit(ir_dereference_array *ir); 202 void visit(ir_expression *ir); 203 void visit(ir_texture *ir); 204 void visit(ir_if *ir); 205 void visit(ir_constant *ir); 206 void visit(ir_swizzle *ir); 207 void visit(ir_return *ir); 208 void visit(ir_loop *ir); 209 void visit(ir_loop_jump *ir); 210 void visit(ir_discard *ir); 211 void visit(ir_call *ir); 212 void visit(ir_function *ir); 213 void visit(ir_function_signature *ir); 214 215 void swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler); 216 217 fs_inst *emit(fs_inst inst); 218 219 fs_inst *emit(enum opcode opcode); 220 fs_inst *emit(enum opcode opcode, fs_reg dst); 221 fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0); 222 fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1); 223 fs_inst *emit(enum opcode opcode, fs_reg dst, 224 fs_reg src0, fs_reg src1, fs_reg src2); 225 226 int type_size(const struct glsl_type *type); 227 fs_inst *get_instruction_generating_reg(fs_inst *start, 228 fs_inst *end, 229 fs_reg reg); 230 231 bool run(); 232 void setup_paramvalues_refs(); 233 void assign_curb_setup(); 234 void calculate_urb_setup(); 235 void assign_urb_setup(); 236 bool assign_regs(); 237 void assign_regs_trivial(); 238 int choose_spill_reg(struct ra_graph *g); 239 void spill_reg(int spill_reg); 240 void split_virtual_grfs(); 241 void setup_pull_constants(); 242 void calculate_live_intervals(); 243 bool propagate_constants(); 244 bool opt_algebraic(); 245 bool opt_cse(); 246 bool opt_cse_local(fs_bblock *block, exec_list *aeb); 247 bool opt_copy_propagate(); 248 bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry); 249 bool opt_copy_propagate_local(void *mem_ctx, fs_bblock *block, 250 exec_list *acp); 251 bool register_coalesce(); 252 bool register_coalesce_2(); 253 bool compute_to_mrf(); 254 bool dead_code_eliminate(); 255 bool remove_dead_constants(); 256 bool remove_duplicate_mrf_writes(); 257 bool virtual_grf_interferes(int a, int b); 258 void schedule_instructions(); 259 void fail(const char *msg, ...); 260 261 void push_force_uncompressed(); 262 void pop_force_uncompressed(); 263 void push_force_sechalf(); 264 void pop_force_sechalf(); 265 266 void generate_code(); 267 void generate_fb_write(fs_inst *inst); 268 void generate_pixel_xy(struct brw_reg dst, bool is_x); 269 void generate_linterp(fs_inst *inst, struct brw_reg dst, 270 struct brw_reg *src); 271 void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 272 void generate_math1_gen7(fs_inst *inst, 273 struct brw_reg dst, 274 struct brw_reg src); 275 void generate_math2_gen7(fs_inst *inst, 276 struct brw_reg dst, 277 struct brw_reg src0, 278 struct brw_reg src1); 279 void generate_math1_gen6(fs_inst *inst, 280 struct brw_reg dst, 281 struct brw_reg src); 282 void generate_math2_gen6(fs_inst *inst, 283 struct brw_reg dst, 284 struct brw_reg src0, 285 struct brw_reg src1); 286 void generate_math_gen4(fs_inst *inst, 287 struct brw_reg dst, 288 struct brw_reg src); 289 void generate_discard(fs_inst *inst); 290 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 291 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src, 292 bool negate_value); 293 void generate_spill(fs_inst *inst, struct brw_reg src); 294 void generate_unspill(fs_inst *inst, struct brw_reg dst); 295 void generate_pull_constant_load(fs_inst *inst, struct brw_reg dst, 296 struct brw_reg index, 297 struct brw_reg offset); 298 void generate_mov_dispatch_to_flags(); 299 300 void emit_dummy_fs(); 301 fs_reg *emit_fragcoord_interpolation(ir_variable *ir); 302 fs_inst *emit_linterp(const fs_reg &attr, const fs_reg &interp, 303 glsl_interp_qualifier interpolation_mode, 304 bool is_centroid); 305 fs_reg *emit_frontfacing_interpolation(ir_variable *ir); 306 fs_reg *emit_general_interpolation(ir_variable *ir); 307 void emit_interpolation_setup_gen4(); 308 void emit_interpolation_setup_gen6(); 309 fs_reg emit_texcoord(ir_texture *ir, int sampler, int texunit); 310 fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, 311 fs_reg shadow_comp, fs_reg lod, fs_reg lod2); 312 fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, 313 fs_reg shadow_comp, fs_reg lod, fs_reg lod2); 314 fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, 315 fs_reg shadow_comp, fs_reg lod, fs_reg lod2); 316 fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0); 317 fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1); 318 bool try_emit_saturate(ir_expression *ir); 319 bool try_emit_mad(ir_expression *ir, int mul_arg); 320 void emit_bool_to_cond_code(ir_rvalue *condition); 321 void emit_if_gen6(ir_if *ir); 322 void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset); 323 324 void emit_color_write(int target, int index, int first_color_mrf); 325 void emit_fb_writes(); 326 bool try_rewrite_rhs_to_dst(ir_assignment *ir, 327 fs_reg dst, 328 fs_reg src, 329 fs_inst *pre_rhs_inst, 330 fs_inst *last_rhs_inst); 331 void emit_assignment_writes(fs_reg &l, fs_reg &r, 332 const glsl_type *type, bool predicated); 333 void resolve_ud_negate(fs_reg *reg); 334 void resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg); 335 336 struct brw_reg interp_reg(int location, int channel); 337 int setup_uniform_values(int loc, const glsl_type *type); 338 void setup_builtin_uniform_values(ir_variable *ir); 339 int implied_mrf_writes(fs_inst *inst); 340 341 struct brw_context *brw; 342 const struct gl_fragment_program *fp; 343 struct intel_context *intel; 344 struct gl_context *ctx; 345 struct brw_wm_compile *c; 346 struct brw_compile *p; 347 struct brw_shader *shader; 348 struct gl_shader_program *prog; 349 void *mem_ctx; 350 exec_list instructions; 351 352 /* Delayed setup of c->prog_data.params[] due to realloc of 353 * ParamValues[] during compile. 354 */ 355 int param_index[MAX_UNIFORMS * 4]; 356 int param_offset[MAX_UNIFORMS * 4]; 357 358 int *virtual_grf_sizes; 359 int virtual_grf_count; 360 int virtual_grf_array_size; 361 int *virtual_grf_def; 362 int *virtual_grf_use; 363 bool live_intervals_valid; 364 365 /* This is the map from UNIFORM hw_reg + reg_offset as generated by 366 * the visitor to the packed uniform number after 367 * remove_dead_constants() that represents the actual uploaded 368 * uniform index. 369 */ 370 int *params_remap; 371 372 struct hash_table *variable_ht; 373 ir_variable *frag_depth; 374 fs_reg outputs[BRW_MAX_DRAW_BUFFERS]; 375 unsigned output_components[BRW_MAX_DRAW_BUFFERS]; 376 fs_reg dual_src_output; 377 int first_non_payload_grf; 378 int max_grf; 379 int urb_setup[FRAG_ATTRIB_MAX]; 380 381 /** @{ debug annotation info */ 382 const char *current_annotation; 383 ir_instruction *base_ir; 384 /** @} */ 385 386 bool failed; 387 char *fail_msg; 388 389 /* Result of last visit() method. */ 390 fs_reg result; 391 392 fs_reg pixel_x; 393 fs_reg pixel_y; 394 fs_reg wpos_w; 395 fs_reg pixel_w; 396 fs_reg delta_x[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT]; 397 fs_reg delta_y[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT]; 398 fs_reg reg_null_cmp; 399 400 int grf_used; 401 402 int force_uncompressed_stack; 403 int force_sechalf_stack; 404 405 class fs_bblock *bblock; 406 }; 407 408 bool brw_do_channel_expressions(struct exec_list *instructions); 409 bool brw_do_vector_splitting(struct exec_list *instructions); 410 bool brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog); 411