1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /** @file brw_fs_visitor.cpp
25  *
26  * This file supports generating the FS LIR from the GLSL IR.  The LIR
27  * makes it easier to do backend-specific optimizations than doing so
28  * in the GLSL IR or in the native code.
29  */
30 extern "C" {
31 
32 #include <sys/types.h>
33 
34 #include "main/macros.h"
35 #include "main/shaderobj.h"
36 #include "main/uniforms.h"
37 #include "program/prog_parameter.h"
38 #include "program/prog_print.h"
39 #include "program/prog_optimize.h"
40 #include "program/register_allocate.h"
41 #include "program/sampler.h"
42 #include "program/hash_table.h"
43 #include "brw_context.h"
44 #include "brw_eu.h"
45 #include "brw_wm.h"
46 }
47 #include "brw_shader.h"
48 #include "brw_fs.h"
49 #include "glsl/glsl_types.h"
50 #include "glsl/ir_optimization.h"
51 #include "glsl/ir_print_visitor.h"
52 
53 void
visit(ir_variable * ir)54 fs_visitor::visit(ir_variable *ir)
55 {
56    fs_reg *reg = NULL;
57 
58    if (variable_storage(ir))
59       return;
60 
61    if (ir->mode == ir_var_in) {
62       if (!strcmp(ir->name, "gl_FragCoord")) {
63 	 reg = emit_fragcoord_interpolation(ir);
64       } else if (!strcmp(ir->name, "gl_FrontFacing")) {
65 	 reg = emit_frontfacing_interpolation(ir);
66       } else {
67 	 reg = emit_general_interpolation(ir);
68       }
69       assert(reg);
70       hash_table_insert(this->variable_ht, reg, ir);
71       return;
72    } else if (ir->mode == ir_var_out) {
73       reg = new(this->mem_ctx) fs_reg(this, ir->type);
74 
75       if (ir->index > 0) {
76 	 assert(ir->location == FRAG_RESULT_DATA0);
77 	 assert(ir->index == 1);
78 	 this->dual_src_output = *reg;
79       } else if (ir->location == FRAG_RESULT_COLOR) {
80 	 /* Writing gl_FragColor outputs to all color regions. */
81 	 for (unsigned int i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) {
82 	    this->outputs[i] = *reg;
83 	    this->output_components[i] = 4;
84 	 }
85       } else if (ir->location == FRAG_RESULT_DEPTH) {
86 	 this->frag_depth = ir;
87       } else {
88 	 /* gl_FragData or a user-defined FS output */
89 	 assert(ir->location >= FRAG_RESULT_DATA0 &&
90 		ir->location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS);
91 
92 	 int vector_elements =
93 	    ir->type->is_array() ? ir->type->fields.array->vector_elements
94 				 : ir->type->vector_elements;
95 
96 	 /* General color output. */
97 	 for (unsigned int i = 0; i < MAX2(1, ir->type->length); i++) {
98 	    int output = ir->location - FRAG_RESULT_DATA0 + i;
99 	    this->outputs[output] = *reg;
100 	    this->outputs[output].reg_offset += vector_elements * i;
101 	    this->output_components[output] = vector_elements;
102 	 }
103       }
104    } else if (ir->mode == ir_var_uniform) {
105       int param_index = c->prog_data.nr_params;
106 
107       /* Thanks to the lower_ubo_reference pass, we will see only
108        * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
109        * variables, so no need for them to be in variable_ht.
110        */
111       if (ir->uniform_block != -1)
112          return;
113 
114       if (c->dispatch_width == 16) {
115 	 if (!variable_storage(ir)) {
116 	    fail("Failed to find uniform '%s' in 16-wide\n", ir->name);
117 	 }
118 	 return;
119       }
120 
121       if (!strncmp(ir->name, "gl_", 3)) {
122 	 setup_builtin_uniform_values(ir);
123       } else {
124 	 setup_uniform_values(ir->location, ir->type);
125       }
126 
127       reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
128       reg->type = brw_type_for_base_type(ir->type);
129    }
130 
131    if (!reg)
132       reg = new(this->mem_ctx) fs_reg(this, ir->type);
133 
134    hash_table_insert(this->variable_ht, reg, ir);
135 }
136 
137 void
visit(ir_dereference_variable * ir)138 fs_visitor::visit(ir_dereference_variable *ir)
139 {
140    fs_reg *reg = variable_storage(ir->var);
141    this->result = *reg;
142 }
143 
144 void
visit(ir_dereference_record * ir)145 fs_visitor::visit(ir_dereference_record *ir)
146 {
147    const glsl_type *struct_type = ir->record->type;
148 
149    ir->record->accept(this);
150 
151    unsigned int offset = 0;
152    for (unsigned int i = 0; i < struct_type->length; i++) {
153       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
154 	 break;
155       offset += type_size(struct_type->fields.structure[i].type);
156    }
157    this->result.reg_offset += offset;
158    this->result.type = brw_type_for_base_type(ir->type);
159 }
160 
161 void
visit(ir_dereference_array * ir)162 fs_visitor::visit(ir_dereference_array *ir)
163 {
164    ir_constant *index;
165    int element_size;
166 
167    ir->array->accept(this);
168    index = ir->array_index->as_constant();
169 
170    element_size = type_size(ir->type);
171    this->result.type = brw_type_for_base_type(ir->type);
172 
173    if (index) {
174       assert(this->result.file == UNIFORM || this->result.file == GRF);
175       this->result.reg_offset += index->value.i[0] * element_size;
176    } else {
177       assert(!"FINISHME: non-constant array element");
178    }
179 }
180 
181 /* Instruction selection: Produce a MOV.sat instead of
182  * MIN(MAX(val, 0), 1) when possible.
183  */
184 bool
try_emit_saturate(ir_expression * ir)185 fs_visitor::try_emit_saturate(ir_expression *ir)
186 {
187    ir_rvalue *sat_val = ir->as_rvalue_to_saturate();
188 
189    if (!sat_val)
190       return false;
191 
192    fs_inst *pre_inst = (fs_inst *) this->instructions.get_tail();
193 
194    sat_val->accept(this);
195    fs_reg src = this->result;
196 
197    fs_inst *last_inst = (fs_inst *) this->instructions.get_tail();
198 
199    /* If the last instruction from our accept() didn't generate our
200     * src, generate a saturated MOV
201     */
202    fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src);
203    if (!modify || modify->regs_written() != 1) {
204       this->result = fs_reg(this, ir->type);
205       fs_inst *inst = emit(BRW_OPCODE_MOV, this->result, src);
206       inst->saturate = true;
207    } else {
208       modify->saturate = true;
209       this->result = src;
210    }
211 
212 
213    return true;
214 }
215 
216 bool
try_emit_mad(ir_expression * ir,int mul_arg)217 fs_visitor::try_emit_mad(ir_expression *ir, int mul_arg)
218 {
219    /* 3-src instructions were introduced in gen6. */
220    if (intel->gen < 6)
221       return false;
222 
223    /* MAD can only handle floating-point data. */
224    if (ir->type != glsl_type::float_type)
225       return false;
226 
227    ir_rvalue *nonmul = ir->operands[1 - mul_arg];
228    ir_expression *mul = ir->operands[mul_arg]->as_expression();
229 
230    if (!mul || mul->operation != ir_binop_mul)
231       return false;
232 
233    if (nonmul->as_constant() ||
234        mul->operands[0]->as_constant() ||
235        mul->operands[1]->as_constant())
236       return false;
237 
238    nonmul->accept(this);
239    fs_reg src0 = this->result;
240 
241    mul->operands[0]->accept(this);
242    fs_reg src1 = this->result;
243 
244    mul->operands[1]->accept(this);
245    fs_reg src2 = this->result;
246 
247    this->result = fs_reg(this, ir->type);
248    emit(BRW_OPCODE_MAD, this->result, src0, src1, src2);
249 
250    return true;
251 }
252 
253 void
visit(ir_expression * ir)254 fs_visitor::visit(ir_expression *ir)
255 {
256    unsigned int operand;
257    fs_reg op[2], temp;
258    fs_inst *inst;
259 
260    assert(ir->get_num_operands() <= 2);
261 
262    if (try_emit_saturate(ir))
263       return;
264    if (ir->operation == ir_binop_add) {
265       if (try_emit_mad(ir, 0) || try_emit_mad(ir, 1))
266 	 return;
267    }
268 
269    for (operand = 0; operand < ir->get_num_operands(); operand++) {
270       ir->operands[operand]->accept(this);
271       if (this->result.file == BAD_FILE) {
272 	 ir_print_visitor v;
273 	 fail("Failed to get tree for expression operand:\n");
274 	 ir->operands[operand]->accept(&v);
275       }
276       op[operand] = this->result;
277 
278       /* Matrix expression operands should have been broken down to vector
279        * operations already.
280        */
281       assert(!ir->operands[operand]->type->is_matrix());
282       /* And then those vector operands should have been broken down to scalar.
283        */
284       assert(!ir->operands[operand]->type->is_vector());
285    }
286 
287    /* Storage for our result.  If our result goes into an assignment, it will
288     * just get copy-propagated out, so no worries.
289     */
290    this->result = fs_reg(this, ir->type);
291 
292    switch (ir->operation) {
293    case ir_unop_logic_not:
294       /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
295        * ones complement of the whole register, not just bit 0.
296        */
297       emit(BRW_OPCODE_XOR, this->result, op[0], fs_reg(1));
298       break;
299    case ir_unop_neg:
300       op[0].negate = !op[0].negate;
301       this->result = op[0];
302       break;
303    case ir_unop_abs:
304       op[0].abs = true;
305       op[0].negate = false;
306       this->result = op[0];
307       break;
308    case ir_unop_sign:
309       temp = fs_reg(this, ir->type);
310 
311       emit(BRW_OPCODE_MOV, this->result, fs_reg(0.0f));
312 
313       inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f));
314       inst->conditional_mod = BRW_CONDITIONAL_G;
315       inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(1.0f));
316       inst->predicated = true;
317 
318       inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f));
319       inst->conditional_mod = BRW_CONDITIONAL_L;
320       inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f));
321       inst->predicated = true;
322 
323       break;
324    case ir_unop_rcp:
325       emit_math(SHADER_OPCODE_RCP, this->result, op[0]);
326       break;
327 
328    case ir_unop_exp2:
329       emit_math(SHADER_OPCODE_EXP2, this->result, op[0]);
330       break;
331    case ir_unop_log2:
332       emit_math(SHADER_OPCODE_LOG2, this->result, op[0]);
333       break;
334    case ir_unop_exp:
335    case ir_unop_log:
336       assert(!"not reached: should be handled by ir_explog_to_explog2");
337       break;
338    case ir_unop_sin:
339    case ir_unop_sin_reduced:
340       emit_math(SHADER_OPCODE_SIN, this->result, op[0]);
341       break;
342    case ir_unop_cos:
343    case ir_unop_cos_reduced:
344       emit_math(SHADER_OPCODE_COS, this->result, op[0]);
345       break;
346 
347    case ir_unop_dFdx:
348       emit(FS_OPCODE_DDX, this->result, op[0]);
349       break;
350    case ir_unop_dFdy:
351       emit(FS_OPCODE_DDY, this->result, op[0]);
352       break;
353 
354    case ir_binop_add:
355       emit(BRW_OPCODE_ADD, this->result, op[0], op[1]);
356       break;
357    case ir_binop_sub:
358       assert(!"not reached: should be handled by ir_sub_to_add_neg");
359       break;
360 
361    case ir_binop_mul:
362       if (ir->type->is_integer()) {
363 	 /* For integer multiplication, the MUL uses the low 16 bits
364 	  * of one of the operands (src0 on gen6, src1 on gen7).  The
365 	  * MACH accumulates in the contribution of the upper 16 bits
366 	  * of that operand.
367 	  *
368 	  * FINISHME: Emit just the MUL if we know an operand is small
369 	  * enough.
370 	  */
371 	 if (intel->gen >= 7 && c->dispatch_width == 16)
372 	    fail("16-wide explicit accumulator operands unsupported\n");
373 
374 	 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
375 
376 	 emit(BRW_OPCODE_MUL, acc, op[0], op[1]);
377 	 emit(BRW_OPCODE_MACH, reg_null_d, op[0], op[1]);
378 	 emit(BRW_OPCODE_MOV, this->result, fs_reg(acc));
379       } else {
380 	 emit(BRW_OPCODE_MUL, this->result, op[0], op[1]);
381       }
382       break;
383    case ir_binop_div:
384       if (intel->gen >= 7 && c->dispatch_width == 16)
385 	 fail("16-wide INTDIV unsupported\n");
386 
387       /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
388       assert(ir->type->is_integer());
389       emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]);
390       break;
391    case ir_binop_mod:
392       if (intel->gen >= 7 && c->dispatch_width == 16)
393 	 fail("16-wide INTDIV unsupported\n");
394 
395       /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
396       assert(ir->type->is_integer());
397       emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]);
398       break;
399 
400    case ir_binop_less:
401    case ir_binop_greater:
402    case ir_binop_lequal:
403    case ir_binop_gequal:
404    case ir_binop_equal:
405    case ir_binop_all_equal:
406    case ir_binop_nequal:
407    case ir_binop_any_nequal:
408       temp = this->result;
409       /* original gen4 does implicit conversion before comparison. */
410       if (intel->gen < 5)
411 	 temp.type = op[0].type;
412 
413       resolve_ud_negate(&op[0]);
414       resolve_ud_negate(&op[1]);
415 
416       resolve_bool_comparison(ir->operands[0], &op[0]);
417       resolve_bool_comparison(ir->operands[1], &op[1]);
418 
419       inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
420       inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
421       break;
422 
423    case ir_binop_logic_xor:
424       emit(BRW_OPCODE_XOR, this->result, op[0], op[1]);
425       break;
426 
427    case ir_binop_logic_or:
428       emit(BRW_OPCODE_OR, this->result, op[0], op[1]);
429       break;
430 
431    case ir_binop_logic_and:
432       emit(BRW_OPCODE_AND, this->result, op[0], op[1]);
433       break;
434 
435    case ir_binop_dot:
436    case ir_unop_any:
437       assert(!"not reached: should be handled by brw_fs_channel_expressions");
438       break;
439 
440    case ir_unop_noise:
441       assert(!"not reached: should be handled by lower_noise");
442       break;
443 
444    case ir_quadop_vector:
445       assert(!"not reached: should be handled by lower_quadop_vector");
446       break;
447 
448    case ir_unop_sqrt:
449       emit_math(SHADER_OPCODE_SQRT, this->result, op[0]);
450       break;
451 
452    case ir_unop_rsq:
453       emit_math(SHADER_OPCODE_RSQ, this->result, op[0]);
454       break;
455 
456    case ir_unop_bitcast_i2f:
457    case ir_unop_bitcast_u2f:
458       op[0].type = BRW_REGISTER_TYPE_F;
459       this->result = op[0];
460       break;
461    case ir_unop_i2u:
462    case ir_unop_bitcast_f2u:
463       op[0].type = BRW_REGISTER_TYPE_UD;
464       this->result = op[0];
465       break;
466    case ir_unop_u2i:
467    case ir_unop_bitcast_f2i:
468       op[0].type = BRW_REGISTER_TYPE_D;
469       this->result = op[0];
470       break;
471    case ir_unop_i2f:
472    case ir_unop_u2f:
473    case ir_unop_f2i:
474    case ir_unop_f2u:
475       emit(BRW_OPCODE_MOV, this->result, op[0]);
476       break;
477 
478    case ir_unop_b2i:
479       inst = emit(BRW_OPCODE_AND, this->result, op[0], fs_reg(1));
480       break;
481    case ir_unop_b2f:
482       temp = fs_reg(this, glsl_type::int_type);
483       emit(BRW_OPCODE_AND, temp, op[0], fs_reg(1));
484       emit(BRW_OPCODE_MOV, this->result, temp);
485       break;
486 
487    case ir_unop_f2b:
488       inst = emit(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f));
489       inst->conditional_mod = BRW_CONDITIONAL_NZ;
490       emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(1));
491       break;
492    case ir_unop_i2b:
493       assert(op[0].type == BRW_REGISTER_TYPE_D);
494 
495       inst = emit(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0));
496       inst->conditional_mod = BRW_CONDITIONAL_NZ;
497       emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(1));
498       break;
499 
500    case ir_unop_trunc:
501       emit(BRW_OPCODE_RNDZ, this->result, op[0]);
502       break;
503    case ir_unop_ceil:
504       op[0].negate = !op[0].negate;
505       inst = emit(BRW_OPCODE_RNDD, this->result, op[0]);
506       this->result.negate = true;
507       break;
508    case ir_unop_floor:
509       inst = emit(BRW_OPCODE_RNDD, this->result, op[0]);
510       break;
511    case ir_unop_fract:
512       inst = emit(BRW_OPCODE_FRC, this->result, op[0]);
513       break;
514    case ir_unop_round_even:
515       emit(BRW_OPCODE_RNDE, this->result, op[0]);
516       break;
517 
518    case ir_binop_min:
519       resolve_ud_negate(&op[0]);
520       resolve_ud_negate(&op[1]);
521 
522       if (intel->gen >= 6) {
523 	 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]);
524 	 inst->conditional_mod = BRW_CONDITIONAL_L;
525       } else {
526 	 /* Unalias the destination */
527 	 this->result = fs_reg(this, ir->type);
528 
529 	 inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]);
530 	 inst->conditional_mod = BRW_CONDITIONAL_L;
531 
532 	 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]);
533 	 inst->predicated = true;
534       }
535       break;
536    case ir_binop_max:
537       resolve_ud_negate(&op[0]);
538       resolve_ud_negate(&op[1]);
539 
540       if (intel->gen >= 6) {
541 	 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]);
542 	 inst->conditional_mod = BRW_CONDITIONAL_GE;
543       } else {
544 	 /* Unalias the destination */
545 	 this->result = fs_reg(this, ir->type);
546 
547 	 inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]);
548 	 inst->conditional_mod = BRW_CONDITIONAL_G;
549 
550 	 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]);
551 	 inst->predicated = true;
552       }
553       break;
554 
555    case ir_binop_pow:
556       emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
557       break;
558 
559    case ir_unop_bit_not:
560       inst = emit(BRW_OPCODE_NOT, this->result, op[0]);
561       break;
562    case ir_binop_bit_and:
563       inst = emit(BRW_OPCODE_AND, this->result, op[0], op[1]);
564       break;
565    case ir_binop_bit_xor:
566       inst = emit(BRW_OPCODE_XOR, this->result, op[0], op[1]);
567       break;
568    case ir_binop_bit_or:
569       inst = emit(BRW_OPCODE_OR, this->result, op[0], op[1]);
570       break;
571 
572    case ir_binop_lshift:
573       inst = emit(BRW_OPCODE_SHL, this->result, op[0], op[1]);
574       break;
575 
576    case ir_binop_rshift:
577       if (ir->type->base_type == GLSL_TYPE_INT)
578 	 inst = emit(BRW_OPCODE_ASR, this->result, op[0], op[1]);
579       else
580 	 inst = emit(BRW_OPCODE_SHR, this->result, op[0], op[1]);
581       break;
582 
583    case ir_binop_ubo_load:
584       ir_constant *uniform_block = ir->operands[0]->as_constant();
585       ir_constant *offset = ir->operands[1]->as_constant();
586 
587       fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
588       packed_consts.type = result.type;
589       fs_reg surf_index = fs_reg((unsigned)SURF_INDEX_WM_UBO(uniform_block->value.u[0]));
590       fs_inst *pull = emit(fs_inst(FS_OPCODE_PULL_CONSTANT_LOAD,
591                                    packed_consts,
592                                    surf_index,
593                                    fs_reg(offset->value.u[0])));
594       pull->base_mrf = 14;
595       pull->mlen = 1;
596 
597       packed_consts.smear = offset->value.u[0] % 16 / 4;
598       for (int i = 0; i < ir->type->vector_elements; i++) {
599          /* UBO bools are any nonzero value.  We consider bools to be
600           * values with the low bit set to 1.  Convert them using CMP.
601           */
602          if (ir->type->base_type == GLSL_TYPE_BOOL) {
603             fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, result,
604                                          packed_consts, fs_reg(0u)));
605             inst->conditional_mod = BRW_CONDITIONAL_NZ;
606          } else {
607             emit(fs_inst(BRW_OPCODE_MOV, result, packed_consts));
608          }
609 
610          packed_consts.smear++;
611          result.reg_offset++;
612 
613          /* The std140 packing rules don't allow vectors to cross 16-byte
614           * boundaries, and a reg is 32 bytes.
615           */
616          assert(packed_consts.smear < 8);
617       }
618       result.reg_offset = 0;
619       break;
620    }
621 }
622 
623 void
emit_assignment_writes(fs_reg & l,fs_reg & r,const glsl_type * type,bool predicated)624 fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
625 				   const glsl_type *type, bool predicated)
626 {
627    switch (type->base_type) {
628    case GLSL_TYPE_FLOAT:
629    case GLSL_TYPE_UINT:
630    case GLSL_TYPE_INT:
631    case GLSL_TYPE_BOOL:
632       for (unsigned int i = 0; i < type->components(); i++) {
633 	 l.type = brw_type_for_base_type(type);
634 	 r.type = brw_type_for_base_type(type);
635 
636 	 if (predicated || !l.equals(r)) {
637 	    fs_inst *inst = emit(BRW_OPCODE_MOV, l, r);
638 	    inst->predicated = predicated;
639 	 }
640 
641 	 l.reg_offset++;
642 	 r.reg_offset++;
643       }
644       break;
645    case GLSL_TYPE_ARRAY:
646       for (unsigned int i = 0; i < type->length; i++) {
647 	 emit_assignment_writes(l, r, type->fields.array, predicated);
648       }
649       break;
650 
651    case GLSL_TYPE_STRUCT:
652       for (unsigned int i = 0; i < type->length; i++) {
653 	 emit_assignment_writes(l, r, type->fields.structure[i].type,
654 				predicated);
655       }
656       break;
657 
658    case GLSL_TYPE_SAMPLER:
659       break;
660 
661    default:
662       assert(!"not reached");
663       break;
664    }
665 }
666 
667 /* If the RHS processing resulted in an instruction generating a
668  * temporary value, and it would be easy to rewrite the instruction to
669  * generate its result right into the LHS instead, do so.  This ends
670  * up reliably removing instructions where it can be tricky to do so
671  * later without real UD chain information.
672  */
673 bool
try_rewrite_rhs_to_dst(ir_assignment * ir,fs_reg dst,fs_reg src,fs_inst * pre_rhs_inst,fs_inst * last_rhs_inst)674 fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
675                                    fs_reg dst,
676                                    fs_reg src,
677                                    fs_inst *pre_rhs_inst,
678                                    fs_inst *last_rhs_inst)
679 {
680    /* Only attempt if we're doing a direct assignment. */
681    if (ir->condition ||
682        !(ir->lhs->type->is_scalar() ||
683         (ir->lhs->type->is_vector() &&
684          ir->write_mask == (1 << ir->lhs->type->vector_elements) - 1)))
685       return false;
686 
687    /* Make sure the last instruction generated our source reg. */
688    fs_inst *modify = get_instruction_generating_reg(pre_rhs_inst,
689 						    last_rhs_inst,
690 						    src);
691    if (!modify)
692       return false;
693 
694    /* If last_rhs_inst wrote a different number of components than our LHS,
695     * we can't safely rewrite it.
696     */
697    if (ir->lhs->type->vector_elements != modify->regs_written())
698       return false;
699 
700    /* Success!  Rewrite the instruction. */
701    modify->dst = dst;
702 
703    return true;
704 }
705 
706 void
visit(ir_assignment * ir)707 fs_visitor::visit(ir_assignment *ir)
708 {
709    fs_reg l, r;
710    fs_inst *inst;
711 
712    /* FINISHME: arrays on the lhs */
713    ir->lhs->accept(this);
714    l = this->result;
715 
716    fs_inst *pre_rhs_inst = (fs_inst *) this->instructions.get_tail();
717 
718    ir->rhs->accept(this);
719    r = this->result;
720 
721    fs_inst *last_rhs_inst = (fs_inst *) this->instructions.get_tail();
722 
723    assert(l.file != BAD_FILE);
724    assert(r.file != BAD_FILE);
725 
726    if (try_rewrite_rhs_to_dst(ir, l, r, pre_rhs_inst, last_rhs_inst))
727       return;
728 
729    if (ir->condition) {
730       emit_bool_to_cond_code(ir->condition);
731    }
732 
733    if (ir->lhs->type->is_scalar() ||
734        ir->lhs->type->is_vector()) {
735       for (int i = 0; i < ir->lhs->type->vector_elements; i++) {
736 	 if (ir->write_mask & (1 << i)) {
737 	    inst = emit(BRW_OPCODE_MOV, l, r);
738 	    if (ir->condition)
739 	       inst->predicated = true;
740 	    r.reg_offset++;
741 	 }
742 	 l.reg_offset++;
743       }
744    } else {
745       emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL);
746    }
747 }
748 
749 fs_inst *
emit_texture_gen4(ir_texture * ir,fs_reg dst,fs_reg coordinate,fs_reg shadow_c,fs_reg lod,fs_reg dPdy)750 fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
751 			      fs_reg shadow_c, fs_reg lod, fs_reg dPdy)
752 {
753    int mlen;
754    int base_mrf = 1;
755    bool simd16 = false;
756    fs_reg orig_dst;
757 
758    /* g0 header. */
759    mlen = 1;
760 
761    if (ir->shadow_comparitor) {
762       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
763 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate);
764 	 coordinate.reg_offset++;
765       }
766       /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
767       mlen += 3;
768 
769       if (ir->op == ir_tex) {
770 	 /* There's no plain shadow compare message, so we use shadow
771 	  * compare with a bias of 0.0.
772 	  */
773 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f));
774 	 mlen++;
775       } else if (ir->op == ir_txb || ir->op == ir_txl) {
776 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod);
777 	 mlen++;
778       } else {
779          assert(!"Should not get here.");
780       }
781 
782       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), shadow_c);
783       mlen++;
784    } else if (ir->op == ir_tex) {
785       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
786 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate);
787 	 coordinate.reg_offset++;
788       }
789       /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
790       mlen += 3;
791    } else if (ir->op == ir_txd) {
792       fs_reg &dPdx = lod;
793 
794       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
795 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate);
796 	 coordinate.reg_offset++;
797       }
798       /* the slots for u and v are always present, but r is optional */
799       mlen += MAX2(ir->coordinate->type->vector_elements, 2);
800 
801       /*  P   = u, v, r
802        * dPdx = dudx, dvdx, drdx
803        * dPdy = dudy, dvdy, drdy
804        *
805        * 1-arg: Does not exist.
806        *
807        * 2-arg: dudx   dvdx   dudy   dvdy
808        *        dPdx.x dPdx.y dPdy.x dPdy.y
809        *        m4     m5     m6     m7
810        *
811        * 3-arg: dudx   dvdx   drdx   dudy   dvdy   drdy
812        *        dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z
813        *        m5     m6     m7     m8     m9     m10
814        */
815       for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) {
816 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx);
817 	 dPdx.reg_offset++;
818       }
819       mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2);
820 
821       for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) {
822 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy);
823 	 dPdy.reg_offset++;
824       }
825       mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2);
826    } else if (ir->op == ir_txs) {
827       /* There's no SIMD8 resinfo message on Gen4.  Use SIMD16 instead. */
828       simd16 = true;
829       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod);
830       mlen += 2;
831    } else {
832       /* Oh joy.  gen4 doesn't have SIMD8 non-shadow-compare bias/lod
833        * instructions.  We'll need to do SIMD16 here.
834        */
835       simd16 = true;
836       assert(ir->op == ir_txb || ir->op == ir_txl || ir->op == ir_txf);
837 
838       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
839 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type),
840 	      coordinate);
841 	 coordinate.reg_offset++;
842       }
843 
844       /* Initialize the rest of u/v/r with 0.0.  Empirically, this seems to
845        * be necessary for TXF (ld), but seems wise to do for all messages.
846        */
847       for (int i = ir->coordinate->type->vector_elements; i < 3; i++) {
848 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f));
849       }
850 
851       /* lod/bias appears after u/v/r. */
852       mlen += 6;
853 
854       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, lod.type), lod);
855       mlen++;
856 
857       /* The unused upper half. */
858       mlen++;
859    }
860 
861    if (simd16) {
862       /* Now, since we're doing simd16, the return is 2 interleaved
863        * vec4s where the odd-indexed ones are junk. We'll need to move
864        * this weirdness around to the expected layout.
865        */
866       orig_dst = dst;
867       const glsl_type *vec_type =
868 	 glsl_type::get_instance(ir->type->base_type, 4, 1);
869       dst = fs_reg(this, glsl_type::get_array_instance(vec_type, 2));
870       dst.type = intel->is_g4x ? brw_type_for_base_type(ir->type)
871 			       : BRW_REGISTER_TYPE_F;
872    }
873 
874    fs_inst *inst = NULL;
875    switch (ir->op) {
876    case ir_tex:
877       inst = emit(SHADER_OPCODE_TEX, dst);
878       break;
879    case ir_txb:
880       inst = emit(FS_OPCODE_TXB, dst);
881       break;
882    case ir_txl:
883       inst = emit(SHADER_OPCODE_TXL, dst);
884       break;
885    case ir_txd:
886       inst = emit(SHADER_OPCODE_TXD, dst);
887       break;
888    case ir_txs:
889       inst = emit(SHADER_OPCODE_TXS, dst);
890       break;
891    case ir_txf:
892       inst = emit(SHADER_OPCODE_TXF, dst);
893       break;
894    }
895    inst->base_mrf = base_mrf;
896    inst->mlen = mlen;
897    inst->header_present = true;
898 
899    if (simd16) {
900       for (int i = 0; i < 4; i++) {
901 	 emit(BRW_OPCODE_MOV, orig_dst, dst);
902 	 orig_dst.reg_offset++;
903 	 dst.reg_offset += 2;
904       }
905    }
906 
907    return inst;
908 }
909 
910 /* gen5's sampler has slots for u, v, r, array index, then optional
911  * parameters like shadow comparitor or LOD bias.  If optional
912  * parameters aren't present, those base slots are optional and don't
913  * need to be included in the message.
914  *
915  * We don't fill in the unnecessary slots regardless, which may look
916  * surprising in the disassembly.
917  */
918 fs_inst *
emit_texture_gen5(ir_texture * ir,fs_reg dst,fs_reg coordinate,fs_reg shadow_c,fs_reg lod,fs_reg lod2)919 fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
920 			      fs_reg shadow_c, fs_reg lod, fs_reg lod2)
921 {
922    int mlen = 0;
923    int base_mrf = 2;
924    int reg_width = c->dispatch_width / 8;
925    bool header_present = false;
926    const int vector_elements =
927       ir->coordinate ? ir->coordinate->type->vector_elements : 0;
928 
929    if (ir->offset != NULL && ir->op == ir_txf) {
930       /* It appears that the ld instruction used for txf does its
931        * address bounds check before adding in the offset.  To work
932        * around this, just add the integer offset to the integer texel
933        * coordinate, and don't put the offset in the header.
934        */
935       ir_constant *offset = ir->offset->as_constant();
936       for (int i = 0; i < vector_elements; i++) {
937 	 emit(BRW_OPCODE_ADD,
938 	      fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type),
939 	      coordinate,
940 	      offset->value.i[i]);
941 	 coordinate.reg_offset++;
942       }
943    } else {
944       if (ir->offset) {
945 	 /* The offsets set up by the ir_texture visitor are in the
946 	  * m1 header, so we can't go headerless.
947 	  */
948 	 header_present = true;
949 	 mlen++;
950 	 base_mrf--;
951       }
952 
953       for (int i = 0; i < vector_elements; i++) {
954 	 emit(BRW_OPCODE_MOV,
955 	      fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type),
956 	      coordinate);
957 	 coordinate.reg_offset++;
958       }
959    }
960    mlen += vector_elements * reg_width;
961 
962    if (ir->shadow_comparitor) {
963       mlen = MAX2(mlen, header_present + 4 * reg_width);
964 
965       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), shadow_c);
966       mlen += reg_width;
967    }
968 
969    fs_inst *inst = NULL;
970    switch (ir->op) {
971    case ir_tex:
972       inst = emit(SHADER_OPCODE_TEX, dst);
973       break;
974    case ir_txb:
975       mlen = MAX2(mlen, header_present + 4 * reg_width);
976       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod);
977       mlen += reg_width;
978 
979       inst = emit(FS_OPCODE_TXB, dst);
980       break;
981    case ir_txl:
982       mlen = MAX2(mlen, header_present + 4 * reg_width);
983       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod);
984       mlen += reg_width;
985 
986       inst = emit(SHADER_OPCODE_TXL, dst);
987       break;
988    case ir_txd: {
989       mlen = MAX2(mlen, header_present + 4 * reg_width); /* skip over 'ai' */
990 
991       /**
992        *  P   =  u,    v,    r
993        * dPdx = dudx, dvdx, drdx
994        * dPdy = dudy, dvdy, drdy
995        *
996        * Load up these values:
997        * - dudx   dudy   dvdx   dvdy   drdx   drdy
998        * - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z
999        */
1000       for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) {
1001 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod);
1002 	 lod.reg_offset++;
1003 	 mlen += reg_width;
1004 
1005 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod2);
1006 	 lod2.reg_offset++;
1007 	 mlen += reg_width;
1008       }
1009 
1010       inst = emit(SHADER_OPCODE_TXD, dst);
1011       break;
1012    }
1013    case ir_txs:
1014       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod);
1015       mlen += reg_width;
1016       inst = emit(SHADER_OPCODE_TXS, dst);
1017       break;
1018    case ir_txf:
1019       mlen = header_present + 4 * reg_width;
1020 
1021       emit(BRW_OPCODE_MOV,
1022 	   fs_reg(MRF, base_mrf + mlen - reg_width, BRW_REGISTER_TYPE_UD),
1023 	   lod);
1024       inst = emit(SHADER_OPCODE_TXF, dst);
1025       break;
1026    }
1027    inst->base_mrf = base_mrf;
1028    inst->mlen = mlen;
1029    inst->header_present = header_present;
1030 
1031    if (mlen > 11) {
1032       fail("Message length >11 disallowed by hardware\n");
1033    }
1034 
1035    return inst;
1036 }
1037 
1038 fs_inst *
emit_texture_gen7(ir_texture * ir,fs_reg dst,fs_reg coordinate,fs_reg shadow_c,fs_reg lod,fs_reg lod2)1039 fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
1040 			      fs_reg shadow_c, fs_reg lod, fs_reg lod2)
1041 {
1042    int mlen = 0;
1043    int base_mrf = 2;
1044    int reg_width = c->dispatch_width / 8;
1045    bool header_present = false;
1046    int offsets[3];
1047 
1048    if (ir->offset && ir->op != ir_txf) {
1049       /* The offsets set up by the ir_texture visitor are in the
1050        * m1 header, so we can't go headerless.
1051        */
1052       header_present = true;
1053       mlen++;
1054       base_mrf--;
1055    }
1056 
1057    if (ir->shadow_comparitor) {
1058       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), shadow_c);
1059       mlen += reg_width;
1060    }
1061 
1062    /* Set up the LOD info */
1063    switch (ir->op) {
1064    case ir_tex:
1065       break;
1066    case ir_txb:
1067       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod);
1068       mlen += reg_width;
1069       break;
1070    case ir_txl:
1071       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod);
1072       mlen += reg_width;
1073       break;
1074    case ir_txd: {
1075       if (c->dispatch_width == 16)
1076 	 fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
1077 
1078       /* Load dPdx and the coordinate together:
1079        * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
1080        */
1081       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1082 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate);
1083 	 coordinate.reg_offset++;
1084 	 mlen += reg_width;
1085 
1086 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod);
1087 	 lod.reg_offset++;
1088 	 mlen += reg_width;
1089 
1090 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod2);
1091 	 lod2.reg_offset++;
1092 	 mlen += reg_width;
1093       }
1094       break;
1095    }
1096    case ir_txs:
1097       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod);
1098       mlen += reg_width;
1099       break;
1100    case ir_txf:
1101       /* It appears that the ld instruction used for txf does its
1102        * address bounds check before adding in the offset.  To work
1103        * around this, just add the integer offset to the integer texel
1104        * coordinate, and don't put the offset in the header.
1105        */
1106       if (ir->offset) {
1107 	 ir_constant *offset = ir->offset->as_constant();
1108 	 offsets[0] = offset->value.i[0];
1109 	 offsets[1] = offset->value.i[1];
1110 	 offsets[2] = offset->value.i[2];
1111       } else {
1112 	 memset(offsets, 0, sizeof(offsets));
1113       }
1114 
1115       /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */
1116       emit(BRW_OPCODE_ADD,
1117 	   fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), coordinate, offsets[0]);
1118       coordinate.reg_offset++;
1119       mlen += reg_width;
1120 
1121       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), lod);
1122       mlen += reg_width;
1123 
1124       for (int i = 1; i < ir->coordinate->type->vector_elements; i++) {
1125 	 emit(BRW_OPCODE_ADD,
1126 	      fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), coordinate, offsets[i]);
1127 	 coordinate.reg_offset++;
1128 	 mlen += reg_width;
1129       }
1130       break;
1131    }
1132 
1133    /* Set up the coordinate (except for cases where it was done above) */
1134    if (ir->op != ir_txd && ir->op != ir_txs && ir->op != ir_txf) {
1135       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1136 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate);
1137 	 coordinate.reg_offset++;
1138 	 mlen += reg_width;
1139       }
1140    }
1141 
1142    /* Generate the SEND */
1143    fs_inst *inst = NULL;
1144    switch (ir->op) {
1145    case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst); break;
1146    case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break;
1147    case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst); break;
1148    case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst); break;
1149    case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst); break;
1150    case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst); break;
1151    }
1152    inst->base_mrf = base_mrf;
1153    inst->mlen = mlen;
1154    inst->header_present = header_present;
1155 
1156    if (mlen > 11) {
1157       fail("Message length >11 disallowed by hardware\n");
1158    }
1159 
1160    return inst;
1161 }
1162 
1163 /**
1164  * Emit code to produce the coordinates for a texture lookup.
1165  *
1166  * Returns the fs_reg containing the texture coordinate (as opposed to
1167  * setting this->result).
1168  */
1169 fs_reg
emit_texcoord(ir_texture * ir,int sampler,int texunit)1170 fs_visitor::emit_texcoord(ir_texture *ir, int sampler, int texunit)
1171 {
1172    fs_inst *inst = NULL;
1173 
1174    if (!ir->coordinate)
1175       return fs_reg(); /* Return the default BAD_FILE register. */
1176 
1177    ir->coordinate->accept(this);
1178    fs_reg coordinate = this->result;
1179 
1180    bool needs_gl_clamp = true;
1181 
1182    fs_reg scale_x, scale_y;
1183 
1184    /* The 965 requires the EU to do the normalization of GL rectangle
1185     * texture coordinates.  We use the program parameter state
1186     * tracking to get the scaling factor.
1187     */
1188    if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT &&
1189        (intel->gen < 6 ||
1190 	(intel->gen >= 6 && (c->key.tex.gl_clamp_mask[0] & (1 << sampler) ||
1191 			     c->key.tex.gl_clamp_mask[1] & (1 << sampler))))) {
1192       struct gl_program_parameter_list *params = c->fp->program.Base.Parameters;
1193       int tokens[STATE_LENGTH] = {
1194 	 STATE_INTERNAL,
1195 	 STATE_TEXRECT_SCALE,
1196 	 texunit,
1197 	 0,
1198 	 0
1199       };
1200 
1201       if (c->dispatch_width == 16) {
1202 	 fail("rectangle scale uniform setup not supported on 16-wide\n");
1203 	 return fs_reg(this, ir->type);
1204       }
1205 
1206       scale_x = fs_reg(UNIFORM, c->prog_data.nr_params);
1207       scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1);
1208 
1209       GLuint index = _mesa_add_state_reference(params,
1210 					       (gl_state_index *)tokens);
1211 
1212       this->param_index[c->prog_data.nr_params] = index;
1213       this->param_offset[c->prog_data.nr_params] = 0;
1214       c->prog_data.nr_params++;
1215       this->param_index[c->prog_data.nr_params] = index;
1216       this->param_offset[c->prog_data.nr_params] = 1;
1217       c->prog_data.nr_params++;
1218    }
1219 
1220    /* The 965 requires the EU to do the normalization of GL rectangle
1221     * texture coordinates.  We use the program parameter state
1222     * tracking to get the scaling factor.
1223     */
1224    if (intel->gen < 6 &&
1225        ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) {
1226       fs_reg dst = fs_reg(this, ir->coordinate->type);
1227       fs_reg src = coordinate;
1228       coordinate = dst;
1229 
1230       emit(BRW_OPCODE_MUL, dst, src, scale_x);
1231       dst.reg_offset++;
1232       src.reg_offset++;
1233       emit(BRW_OPCODE_MUL, dst, src, scale_y);
1234    } else if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) {
1235       /* On gen6+, the sampler handles the rectangle coordinates
1236        * natively, without needing rescaling.  But that means we have
1237        * to do GL_CLAMP clamping at the [0, width], [0, height] scale,
1238        * not [0, 1] like the default case below.
1239        */
1240       needs_gl_clamp = false;
1241 
1242       for (int i = 0; i < 2; i++) {
1243 	 if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) {
1244 	    fs_reg chan = coordinate;
1245 	    chan.reg_offset += i;
1246 
1247 	    inst = emit(BRW_OPCODE_SEL, chan, chan, brw_imm_f(0.0));
1248 	    inst->conditional_mod = BRW_CONDITIONAL_G;
1249 
1250 	    /* Our parameter comes in as 1.0/width or 1.0/height,
1251 	     * because that's what people normally want for doing
1252 	     * texture rectangle handling.  We need width or height
1253 	     * for clamping, but we don't care enough to make a new
1254 	     * parameter type, so just invert back.
1255 	     */
1256 	    fs_reg limit = fs_reg(this, glsl_type::float_type);
1257 	    emit(BRW_OPCODE_MOV, limit, i == 0 ? scale_x : scale_y);
1258 	    emit(SHADER_OPCODE_RCP, limit, limit);
1259 
1260 	    inst = emit(BRW_OPCODE_SEL, chan, chan, limit);
1261 	    inst->conditional_mod = BRW_CONDITIONAL_L;
1262 	 }
1263       }
1264    }
1265 
1266    if (ir->coordinate && needs_gl_clamp) {
1267       for (unsigned int i = 0;
1268 	   i < MIN2(ir->coordinate->type->vector_elements, 3); i++) {
1269 	 if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) {
1270 	    fs_reg chan = coordinate;
1271 	    chan.reg_offset += i;
1272 
1273 	    fs_inst *inst = emit(BRW_OPCODE_MOV, chan, chan);
1274 	    inst->saturate = true;
1275 	 }
1276       }
1277    }
1278    return coordinate;
1279 }
1280 
1281 void
visit(ir_texture * ir)1282 fs_visitor::visit(ir_texture *ir)
1283 {
1284    fs_inst *inst = NULL;
1285 
1286    int sampler = _mesa_get_sampler_uniform_value(ir->sampler, prog, &fp->Base);
1287    int texunit = fp->Base.SamplerUnits[sampler];
1288 
1289    /* Should be lowered by do_lower_texture_projection */
1290    assert(!ir->projector);
1291 
1292    /* Generate code to compute all the subexpression trees.  This has to be
1293     * done before loading any values into MRFs for the sampler message since
1294     * generating these values may involve SEND messages that need the MRFs.
1295     */
1296    fs_reg coordinate = emit_texcoord(ir, sampler, texunit);
1297 
1298    fs_reg shadow_comparitor;
1299    if (ir->shadow_comparitor) {
1300       ir->shadow_comparitor->accept(this);
1301       shadow_comparitor = this->result;
1302    }
1303 
1304    fs_reg lod, lod2;
1305    switch (ir->op) {
1306    case ir_tex:
1307       break;
1308    case ir_txb:
1309       ir->lod_info.bias->accept(this);
1310       lod = this->result;
1311       break;
1312    case ir_txd:
1313       ir->lod_info.grad.dPdx->accept(this);
1314       lod = this->result;
1315 
1316       ir->lod_info.grad.dPdy->accept(this);
1317       lod2 = this->result;
1318       break;
1319    case ir_txf:
1320    case ir_txl:
1321    case ir_txs:
1322       ir->lod_info.lod->accept(this);
1323       lod = this->result;
1324       break;
1325    };
1326 
1327    /* Writemasking doesn't eliminate channels on SIMD8 texture
1328     * samples, so don't worry about them.
1329     */
1330    fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1));
1331 
1332    if (intel->gen >= 7) {
1333       inst = emit_texture_gen7(ir, dst, coordinate, shadow_comparitor,
1334                                lod, lod2);
1335    } else if (intel->gen >= 5) {
1336       inst = emit_texture_gen5(ir, dst, coordinate, shadow_comparitor,
1337                                lod, lod2);
1338    } else {
1339       inst = emit_texture_gen4(ir, dst, coordinate, shadow_comparitor,
1340                                lod, lod2);
1341    }
1342 
1343    /* The header is set up by generate_tex() when necessary. */
1344    inst->src[0] = reg_undef;
1345 
1346    if (ir->offset != NULL && ir->op != ir_txf)
1347       inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
1348 
1349    inst->sampler = sampler;
1350 
1351    if (ir->shadow_comparitor)
1352       inst->shadow_compare = true;
1353 
1354    swizzle_result(ir, dst, sampler);
1355 }
1356 
1357 /**
1358  * Swizzle the result of a texture result.  This is necessary for
1359  * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons.
1360  */
1361 void
swizzle_result(ir_texture * ir,fs_reg orig_val,int sampler)1362 fs_visitor::swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler)
1363 {
1364    this->result = orig_val;
1365 
1366    if (ir->op == ir_txs)
1367       return;
1368 
1369    if (ir->type == glsl_type::float_type) {
1370       /* Ignore DEPTH_TEXTURE_MODE swizzling. */
1371       assert(ir->sampler->type->sampler_shadow);
1372    } else if (c->key.tex.swizzles[sampler] != SWIZZLE_NOOP) {
1373       fs_reg swizzled_result = fs_reg(this, glsl_type::vec4_type);
1374 
1375       for (int i = 0; i < 4; i++) {
1376 	 int swiz = GET_SWZ(c->key.tex.swizzles[sampler], i);
1377 	 fs_reg l = swizzled_result;
1378 	 l.reg_offset += i;
1379 
1380 	 if (swiz == SWIZZLE_ZERO) {
1381 	    emit(BRW_OPCODE_MOV, l, fs_reg(0.0f));
1382 	 } else if (swiz == SWIZZLE_ONE) {
1383 	    emit(BRW_OPCODE_MOV, l, fs_reg(1.0f));
1384 	 } else {
1385 	    fs_reg r = orig_val;
1386 	    r.reg_offset += GET_SWZ(c->key.tex.swizzles[sampler], i);
1387 	    emit(BRW_OPCODE_MOV, l, r);
1388 	 }
1389       }
1390       this->result = swizzled_result;
1391    }
1392 }
1393 
1394 void
visit(ir_swizzle * ir)1395 fs_visitor::visit(ir_swizzle *ir)
1396 {
1397    ir->val->accept(this);
1398    fs_reg val = this->result;
1399 
1400    if (ir->type->vector_elements == 1) {
1401       this->result.reg_offset += ir->mask.x;
1402       return;
1403    }
1404 
1405    fs_reg result = fs_reg(this, ir->type);
1406    this->result = result;
1407 
1408    for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1409       fs_reg channel = val;
1410       int swiz = 0;
1411 
1412       switch (i) {
1413       case 0:
1414 	 swiz = ir->mask.x;
1415 	 break;
1416       case 1:
1417 	 swiz = ir->mask.y;
1418 	 break;
1419       case 2:
1420 	 swiz = ir->mask.z;
1421 	 break;
1422       case 3:
1423 	 swiz = ir->mask.w;
1424 	 break;
1425       }
1426 
1427       channel.reg_offset += swiz;
1428       emit(BRW_OPCODE_MOV, result, channel);
1429       result.reg_offset++;
1430    }
1431 }
1432 
1433 void
visit(ir_discard * ir)1434 fs_visitor::visit(ir_discard *ir)
1435 {
1436    assert(ir->condition == NULL); /* FINISHME */
1437 
1438    emit(FS_OPCODE_DISCARD);
1439 }
1440 
1441 void
visit(ir_constant * ir)1442 fs_visitor::visit(ir_constant *ir)
1443 {
1444    /* Set this->result to reg at the bottom of the function because some code
1445     * paths will cause this visitor to be applied to other fields.  This will
1446     * cause the value stored in this->result to be modified.
1447     *
1448     * Make reg constant so that it doesn't get accidentally modified along the
1449     * way.  Yes, I actually had this problem. :(
1450     */
1451    const fs_reg reg(this, ir->type);
1452    fs_reg dst_reg = reg;
1453 
1454    if (ir->type->is_array()) {
1455       const unsigned size = type_size(ir->type->fields.array);
1456 
1457       for (unsigned i = 0; i < ir->type->length; i++) {
1458 	 ir->array_elements[i]->accept(this);
1459 	 fs_reg src_reg = this->result;
1460 
1461 	 dst_reg.type = src_reg.type;
1462 	 for (unsigned j = 0; j < size; j++) {
1463 	    emit(BRW_OPCODE_MOV, dst_reg, src_reg);
1464 	    src_reg.reg_offset++;
1465 	    dst_reg.reg_offset++;
1466 	 }
1467       }
1468    } else if (ir->type->is_record()) {
1469       foreach_list(node, &ir->components) {
1470 	 ir_constant *const field = (ir_constant *) node;
1471 	 const unsigned size = type_size(field->type);
1472 
1473 	 field->accept(this);
1474 	 fs_reg src_reg = this->result;
1475 
1476 	 dst_reg.type = src_reg.type;
1477 	 for (unsigned j = 0; j < size; j++) {
1478 	    emit(BRW_OPCODE_MOV, dst_reg, src_reg);
1479 	    src_reg.reg_offset++;
1480 	    dst_reg.reg_offset++;
1481 	 }
1482       }
1483    } else {
1484       const unsigned size = type_size(ir->type);
1485 
1486       for (unsigned i = 0; i < size; i++) {
1487 	 switch (ir->type->base_type) {
1488 	 case GLSL_TYPE_FLOAT:
1489 	    emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i]));
1490 	    break;
1491 	 case GLSL_TYPE_UINT:
1492 	    emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i]));
1493 	    break;
1494 	 case GLSL_TYPE_INT:
1495 	    emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i]));
1496 	    break;
1497 	 case GLSL_TYPE_BOOL:
1498 	    emit(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i]));
1499 	    break;
1500 	 default:
1501 	    assert(!"Non-float/uint/int/bool constant");
1502 	 }
1503 	 dst_reg.reg_offset++;
1504       }
1505    }
1506 
1507    this->result = reg;
1508 }
1509 
1510 void
emit_bool_to_cond_code(ir_rvalue * ir)1511 fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
1512 {
1513    ir_expression *expr = ir->as_expression();
1514 
1515    if (expr) {
1516       fs_reg op[2];
1517       fs_inst *inst;
1518 
1519       assert(expr->get_num_operands() <= 2);
1520       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
1521 	 assert(expr->operands[i]->type->is_scalar());
1522 
1523 	 expr->operands[i]->accept(this);
1524 	 op[i] = this->result;
1525 
1526 	 resolve_ud_negate(&op[i]);
1527       }
1528 
1529       switch (expr->operation) {
1530       case ir_unop_logic_not:
1531 	 inst = emit(BRW_OPCODE_AND, reg_null_d, op[0], fs_reg(1));
1532 	 inst->conditional_mod = BRW_CONDITIONAL_Z;
1533 	 break;
1534 
1535       case ir_binop_logic_xor:
1536       case ir_binop_logic_or:
1537       case ir_binop_logic_and:
1538 	 goto out;
1539 
1540       case ir_unop_f2b:
1541 	 if (intel->gen >= 6) {
1542 	    inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0.0f));
1543 	 } else {
1544 	    inst = emit(BRW_OPCODE_MOV, reg_null_f, op[0]);
1545 	 }
1546 	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1547 	 break;
1548 
1549       case ir_unop_i2b:
1550 	 if (intel->gen >= 6) {
1551 	    inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0));
1552 	 } else {
1553 	    inst = emit(BRW_OPCODE_MOV, reg_null_d, op[0]);
1554 	 }
1555 	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1556 	 break;
1557 
1558       case ir_binop_greater:
1559       case ir_binop_gequal:
1560       case ir_binop_less:
1561       case ir_binop_lequal:
1562       case ir_binop_equal:
1563       case ir_binop_all_equal:
1564       case ir_binop_nequal:
1565       case ir_binop_any_nequal:
1566 	 resolve_bool_comparison(expr->operands[0], &op[0]);
1567 	 resolve_bool_comparison(expr->operands[1], &op[1]);
1568 
1569 	 inst = emit(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1]);
1570 	 inst->conditional_mod =
1571 	    brw_conditional_for_comparison(expr->operation);
1572 	 break;
1573 
1574       default:
1575 	 assert(!"not reached");
1576 	 fail("bad cond code\n");
1577 	 break;
1578       }
1579       return;
1580    }
1581 
1582 out:
1583    ir->accept(this);
1584 
1585    fs_inst *inst = emit(BRW_OPCODE_AND, reg_null_d, this->result, fs_reg(1));
1586    inst->conditional_mod = BRW_CONDITIONAL_NZ;
1587 }
1588 
1589 /**
1590  * Emit a gen6 IF statement with the comparison folded into the IF
1591  * instruction.
1592  */
1593 void
emit_if_gen6(ir_if * ir)1594 fs_visitor::emit_if_gen6(ir_if *ir)
1595 {
1596    ir_expression *expr = ir->condition->as_expression();
1597 
1598    if (expr) {
1599       fs_reg op[2];
1600       fs_inst *inst;
1601       fs_reg temp;
1602 
1603       assert(expr->get_num_operands() <= 2);
1604       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
1605 	 assert(expr->operands[i]->type->is_scalar());
1606 
1607 	 expr->operands[i]->accept(this);
1608 	 op[i] = this->result;
1609       }
1610 
1611       switch (expr->operation) {
1612       case ir_unop_logic_not:
1613       case ir_binop_logic_xor:
1614       case ir_binop_logic_or:
1615       case ir_binop_logic_and:
1616          /* For operations on bool arguments, only the low bit of the bool is
1617           * valid, and the others are undefined.  Fall back to the condition
1618           * code path.
1619           */
1620          break;
1621 
1622       case ir_unop_f2b:
1623 	 inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0));
1624 	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1625 	 return;
1626 
1627       case ir_unop_i2b:
1628 	 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0));
1629 	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1630 	 return;
1631 
1632       case ir_binop_greater:
1633       case ir_binop_gequal:
1634       case ir_binop_less:
1635       case ir_binop_lequal:
1636       case ir_binop_equal:
1637       case ir_binop_all_equal:
1638       case ir_binop_nequal:
1639       case ir_binop_any_nequal:
1640 	 resolve_bool_comparison(expr->operands[0], &op[0]);
1641 	 resolve_bool_comparison(expr->operands[1], &op[1]);
1642 
1643 	 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]);
1644 	 inst->conditional_mod =
1645 	    brw_conditional_for_comparison(expr->operation);
1646 	 return;
1647       default:
1648 	 assert(!"not reached");
1649 	 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0));
1650 	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1651 	 fail("bad condition\n");
1652 	 return;
1653       }
1654    }
1655 
1656    emit_bool_to_cond_code(ir->condition);
1657    fs_inst *inst = emit(BRW_OPCODE_IF);
1658    inst->predicated = true;
1659 }
1660 
1661 void
visit(ir_if * ir)1662 fs_visitor::visit(ir_if *ir)
1663 {
1664    fs_inst *inst;
1665 
1666    if (intel->gen < 6 && c->dispatch_width == 16) {
1667       fail("Can't support (non-uniform) control flow on 16-wide\n");
1668    }
1669 
1670    /* Don't point the annotation at the if statement, because then it plus
1671     * the then and else blocks get printed.
1672     */
1673    this->base_ir = ir->condition;
1674 
1675    if (intel->gen == 6) {
1676       emit_if_gen6(ir);
1677    } else {
1678       emit_bool_to_cond_code(ir->condition);
1679 
1680       inst = emit(BRW_OPCODE_IF);
1681       inst->predicated = true;
1682    }
1683 
1684    foreach_list(node, &ir->then_instructions) {
1685       ir_instruction *ir = (ir_instruction *)node;
1686       this->base_ir = ir;
1687 
1688       ir->accept(this);
1689    }
1690 
1691    if (!ir->else_instructions.is_empty()) {
1692       emit(BRW_OPCODE_ELSE);
1693 
1694       foreach_list(node, &ir->else_instructions) {
1695 	 ir_instruction *ir = (ir_instruction *)node;
1696 	 this->base_ir = ir;
1697 
1698 	 ir->accept(this);
1699       }
1700    }
1701 
1702    emit(BRW_OPCODE_ENDIF);
1703 }
1704 
1705 void
visit(ir_loop * ir)1706 fs_visitor::visit(ir_loop *ir)
1707 {
1708    fs_reg counter = reg_undef;
1709 
1710    if (intel->gen < 6 && c->dispatch_width == 16) {
1711       fail("Can't support (non-uniform) control flow on 16-wide\n");
1712    }
1713 
1714    if (ir->counter) {
1715       this->base_ir = ir->counter;
1716       ir->counter->accept(this);
1717       counter = *(variable_storage(ir->counter));
1718 
1719       if (ir->from) {
1720 	 this->base_ir = ir->from;
1721 	 ir->from->accept(this);
1722 
1723 	 emit(BRW_OPCODE_MOV, counter, this->result);
1724       }
1725    }
1726 
1727    this->base_ir = NULL;
1728    emit(BRW_OPCODE_DO);
1729 
1730    if (ir->to) {
1731       this->base_ir = ir->to;
1732       ir->to->accept(this);
1733 
1734       fs_inst *inst = emit(BRW_OPCODE_CMP, reg_null_cmp, counter, this->result);
1735       inst->conditional_mod = brw_conditional_for_comparison(ir->cmp);
1736 
1737       inst = emit(BRW_OPCODE_BREAK);
1738       inst->predicated = true;
1739    }
1740 
1741    foreach_list(node, &ir->body_instructions) {
1742       ir_instruction *ir = (ir_instruction *)node;
1743 
1744       this->base_ir = ir;
1745       ir->accept(this);
1746    }
1747 
1748    if (ir->increment) {
1749       this->base_ir = ir->increment;
1750       ir->increment->accept(this);
1751       emit(BRW_OPCODE_ADD, counter, counter, this->result);
1752    }
1753 
1754    this->base_ir = NULL;
1755    emit(BRW_OPCODE_WHILE);
1756 }
1757 
1758 void
visit(ir_loop_jump * ir)1759 fs_visitor::visit(ir_loop_jump *ir)
1760 {
1761    switch (ir->mode) {
1762    case ir_loop_jump::jump_break:
1763       emit(BRW_OPCODE_BREAK);
1764       break;
1765    case ir_loop_jump::jump_continue:
1766       emit(BRW_OPCODE_CONTINUE);
1767       break;
1768    }
1769 }
1770 
1771 void
visit(ir_call * ir)1772 fs_visitor::visit(ir_call *ir)
1773 {
1774    assert(!"FINISHME");
1775 }
1776 
1777 void
visit(ir_return * ir)1778 fs_visitor::visit(ir_return *ir)
1779 {
1780    assert(!"FINISHME");
1781 }
1782 
1783 void
visit(ir_function * ir)1784 fs_visitor::visit(ir_function *ir)
1785 {
1786    /* Ignore function bodies other than main() -- we shouldn't see calls to
1787     * them since they should all be inlined before we get to ir_to_mesa.
1788     */
1789    if (strcmp(ir->name, "main") == 0) {
1790       const ir_function_signature *sig;
1791       exec_list empty;
1792 
1793       sig = ir->matching_signature(&empty);
1794 
1795       assert(sig);
1796 
1797       foreach_list(node, &sig->body) {
1798 	 ir_instruction *ir = (ir_instruction *)node;
1799 	 this->base_ir = ir;
1800 
1801 	 ir->accept(this);
1802       }
1803    }
1804 }
1805 
1806 void
visit(ir_function_signature * ir)1807 fs_visitor::visit(ir_function_signature *ir)
1808 {
1809    assert(!"not reached");
1810    (void)ir;
1811 }
1812 
1813 fs_inst *
emit(fs_inst inst)1814 fs_visitor::emit(fs_inst inst)
1815 {
1816    fs_inst *list_inst = new(mem_ctx) fs_inst;
1817    *list_inst = inst;
1818 
1819    if (force_uncompressed_stack > 0)
1820       list_inst->force_uncompressed = true;
1821    else if (force_sechalf_stack > 0)
1822       list_inst->force_sechalf = true;
1823 
1824    list_inst->annotation = this->current_annotation;
1825    list_inst->ir = this->base_ir;
1826 
1827    this->instructions.push_tail(list_inst);
1828 
1829    return list_inst;
1830 }
1831 
1832 /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
1833 void
emit_dummy_fs()1834 fs_visitor::emit_dummy_fs()
1835 {
1836    int reg_width = c->dispatch_width / 8;
1837 
1838    /* Everyone's favorite color. */
1839    emit(BRW_OPCODE_MOV, fs_reg(MRF, 2 + 0 * reg_width), fs_reg(1.0f));
1840    emit(BRW_OPCODE_MOV, fs_reg(MRF, 2 + 1 * reg_width), fs_reg(0.0f));
1841    emit(BRW_OPCODE_MOV, fs_reg(MRF, 2 + 2 * reg_width), fs_reg(1.0f));
1842    emit(BRW_OPCODE_MOV, fs_reg(MRF, 2 + 3 * reg_width), fs_reg(0.0f));
1843 
1844    fs_inst *write;
1845    write = emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0));
1846    write->base_mrf = 2;
1847    write->mlen = 4 * reg_width;
1848    write->eot = true;
1849 }
1850 
1851 /* The register location here is relative to the start of the URB
1852  * data.  It will get adjusted to be a real location before
1853  * generate_code() time.
1854  */
1855 struct brw_reg
interp_reg(int location,int channel)1856 fs_visitor::interp_reg(int location, int channel)
1857 {
1858    int regnr = urb_setup[location] * 2 + channel / 2;
1859    int stride = (channel & 1) * 4;
1860 
1861    assert(urb_setup[location] != -1);
1862 
1863    return brw_vec1_grf(regnr, stride);
1864 }
1865 
1866 /** Emits the interpolation for the varying inputs. */
1867 void
emit_interpolation_setup_gen4()1868 fs_visitor::emit_interpolation_setup_gen4()
1869 {
1870    this->current_annotation = "compute pixel centers";
1871    this->pixel_x = fs_reg(this, glsl_type::uint_type);
1872    this->pixel_y = fs_reg(this, glsl_type::uint_type);
1873    this->pixel_x.type = BRW_REGISTER_TYPE_UW;
1874    this->pixel_y.type = BRW_REGISTER_TYPE_UW;
1875 
1876    emit(FS_OPCODE_PIXEL_X, this->pixel_x);
1877    emit(FS_OPCODE_PIXEL_Y, this->pixel_y);
1878 
1879    this->current_annotation = "compute pixel deltas from v0";
1880    if (brw->has_pln) {
1881       this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
1882          fs_reg(this, glsl_type::vec2_type);
1883       this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
1884          this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC];
1885       this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg_offset++;
1886    } else {
1887       this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
1888          fs_reg(this, glsl_type::float_type);
1889       this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
1890          fs_reg(this, glsl_type::float_type);
1891    }
1892    emit(BRW_OPCODE_ADD, this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
1893 	this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0))));
1894    emit(BRW_OPCODE_ADD, this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
1895 	this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1))));
1896 
1897    this->current_annotation = "compute pos.w and 1/pos.w";
1898    /* Compute wpos.w.  It's always in our setup, since it's needed to
1899     * interpolate the other attributes.
1900     */
1901    this->wpos_w = fs_reg(this, glsl_type::float_type);
1902    emit(FS_OPCODE_LINTERP, wpos_w,
1903         this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
1904         this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
1905 	interp_reg(FRAG_ATTRIB_WPOS, 3));
1906    /* Compute the pixel 1/W value from wpos.w. */
1907    this->pixel_w = fs_reg(this, glsl_type::float_type);
1908    emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w);
1909    this->current_annotation = NULL;
1910 }
1911 
1912 /** Emits the interpolation for the varying inputs. */
1913 void
emit_interpolation_setup_gen6()1914 fs_visitor::emit_interpolation_setup_gen6()
1915 {
1916    struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1917 
1918    /* If the pixel centers end up used, the setup is the same as for gen4. */
1919    this->current_annotation = "compute pixel centers";
1920    fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type);
1921    fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type);
1922    int_pixel_x.type = BRW_REGISTER_TYPE_UW;
1923    int_pixel_y.type = BRW_REGISTER_TYPE_UW;
1924    emit(BRW_OPCODE_ADD,
1925 	int_pixel_x,
1926 	fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1927 	fs_reg(brw_imm_v(0x10101010)));
1928    emit(BRW_OPCODE_ADD,
1929 	int_pixel_y,
1930 	fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1931 	fs_reg(brw_imm_v(0x11001100)));
1932 
1933    /* As of gen6, we can no longer mix float and int sources.  We have
1934     * to turn the integer pixel centers into floats for their actual
1935     * use.
1936     */
1937    this->pixel_x = fs_reg(this, glsl_type::float_type);
1938    this->pixel_y = fs_reg(this, glsl_type::float_type);
1939    emit(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x);
1940    emit(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y);
1941 
1942    this->current_annotation = "compute pos.w";
1943    this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0));
1944    this->wpos_w = fs_reg(this, glsl_type::float_type);
1945    emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
1946 
1947    for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) {
1948       uint8_t reg = c->barycentric_coord_reg[i];
1949       this->delta_x[i] = fs_reg(brw_vec8_grf(reg, 0));
1950       this->delta_y[i] = fs_reg(brw_vec8_grf(reg + 1, 0));
1951    }
1952 
1953    this->current_annotation = NULL;
1954 }
1955 
1956 void
emit_color_write(int target,int index,int first_color_mrf)1957 fs_visitor::emit_color_write(int target, int index, int first_color_mrf)
1958 {
1959    int reg_width = c->dispatch_width / 8;
1960    fs_inst *inst;
1961    fs_reg color = outputs[target];
1962    fs_reg mrf;
1963 
1964    /* If there's no color data to be written, skip it. */
1965    if (color.file == BAD_FILE)
1966       return;
1967 
1968    color.reg_offset += index;
1969 
1970    if (c->dispatch_width == 8 || intel->gen >= 6) {
1971       /* SIMD8 write looks like:
1972        * m + 0: r0
1973        * m + 1: r1
1974        * m + 2: g0
1975        * m + 3: g1
1976        *
1977        * gen6 SIMD16 DP write looks like:
1978        * m + 0: r0
1979        * m + 1: r1
1980        * m + 2: g0
1981        * m + 3: g1
1982        * m + 4: b0
1983        * m + 5: b1
1984        * m + 6: a0
1985        * m + 7: a1
1986        */
1987       inst = emit(BRW_OPCODE_MOV,
1988 		  fs_reg(MRF, first_color_mrf + index * reg_width, color.type),
1989 		  color);
1990       inst->saturate = c->key.clamp_fragment_color;
1991    } else {
1992       /* pre-gen6 SIMD16 single source DP write looks like:
1993        * m + 0: r0
1994        * m + 1: g0
1995        * m + 2: b0
1996        * m + 3: a0
1997        * m + 4: r1
1998        * m + 5: g1
1999        * m + 6: b1
2000        * m + 7: a1
2001        */
2002       if (brw->has_compr4) {
2003 	 /* By setting the high bit of the MRF register number, we
2004 	  * indicate that we want COMPR4 mode - instead of doing the
2005 	  * usual destination + 1 for the second half we get
2006 	  * destination + 4.
2007 	  */
2008 	 inst = emit(BRW_OPCODE_MOV,
2009 		     fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index,
2010 			    color.type),
2011 		     color);
2012 	 inst->saturate = c->key.clamp_fragment_color;
2013       } else {
2014 	 push_force_uncompressed();
2015 	 inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index,
2016 					    color.type),
2017 		     color);
2018 	 inst->saturate = c->key.clamp_fragment_color;
2019 	 pop_force_uncompressed();
2020 
2021 	 push_force_sechalf();
2022 	 color.sechalf = true;
2023 	 inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4,
2024 					    color.type),
2025 		     color);
2026 	 inst->saturate = c->key.clamp_fragment_color;
2027 	 pop_force_sechalf();
2028 	 color.sechalf = false;
2029       }
2030    }
2031 }
2032 
2033 void
emit_fb_writes()2034 fs_visitor::emit_fb_writes()
2035 {
2036    this->current_annotation = "FB write header";
2037    bool header_present = true;
2038    /* We can potentially have a message length of up to 15, so we have to set
2039     * base_mrf to either 0 or 1 in order to fit in m0..m15.
2040     */
2041    int base_mrf = 1;
2042    int nr = base_mrf;
2043    int reg_width = c->dispatch_width / 8;
2044    bool do_dual_src = this->dual_src_output.file != BAD_FILE;
2045    bool src0_alpha_to_render_target = false;
2046 
2047    if (c->dispatch_width == 16 && do_dual_src) {
2048       fail("GL_ARB_blend_func_extended not yet supported in 16-wide.");
2049       do_dual_src = false;
2050    }
2051 
2052    /* From the Sandy Bridge PRM, volume 4, page 198:
2053     *
2054     *     "Dispatched Pixel Enables. One bit per pixel indicating
2055     *      which pixels were originally enabled when the thread was
2056     *      dispatched. This field is only required for the end-of-
2057     *      thread message and on all dual-source messages."
2058     */
2059    if (intel->gen >= 6 &&
2060        !this->fp->UsesKill &&
2061        !do_dual_src &&
2062        c->key.nr_color_regions == 1) {
2063       header_present = false;
2064    }
2065 
2066    if (header_present) {
2067       src0_alpha_to_render_target = intel->gen >= 6 &&
2068 				    !do_dual_src &&
2069 				    c->key.nr_color_regions > 1 &&
2070 				    c->key.sample_alpha_to_coverage;
2071       /* m2, m3 header */
2072       nr += 2;
2073    }
2074 
2075    if (c->aa_dest_stencil_reg) {
2076       push_force_uncompressed();
2077       emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
2078 	   fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0)));
2079       pop_force_uncompressed();
2080    }
2081 
2082    /* Reserve space for color. It'll be filled in per MRT below. */
2083    int color_mrf = nr;
2084    nr += 4 * reg_width;
2085    if (do_dual_src)
2086       nr += 4;
2087    if (src0_alpha_to_render_target)
2088       nr += reg_width;
2089 
2090    if (c->source_depth_to_render_target) {
2091       if (intel->gen == 6 && c->dispatch_width == 16) {
2092 	 /* For outputting oDepth on gen6, SIMD8 writes have to be
2093 	  * used.  This would require 8-wide moves of each half to
2094 	  * message regs, kind of like pre-gen5 SIMD16 FB writes.
2095 	  * Just bail on doing so for now.
2096 	  */
2097 	 fail("Missing support for simd16 depth writes on gen6\n");
2098       }
2099 
2100       if (c->computes_depth) {
2101 	 /* Hand over gl_FragDepth. */
2102 	 assert(this->frag_depth);
2103 	 fs_reg depth = *(variable_storage(this->frag_depth));
2104 
2105 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), depth);
2106       } else {
2107 	 /* Pass through the payload depth. */
2108 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr),
2109 	      fs_reg(brw_vec8_grf(c->source_depth_reg, 0)));
2110       }
2111       nr += reg_width;
2112    }
2113 
2114    if (c->dest_depth_reg) {
2115       emit(BRW_OPCODE_MOV, fs_reg(MRF, nr),
2116 	   fs_reg(brw_vec8_grf(c->dest_depth_reg, 0)));
2117       nr += reg_width;
2118    }
2119 
2120    if (do_dual_src) {
2121       fs_reg src0 = this->outputs[0];
2122       fs_reg src1 = this->dual_src_output;
2123 
2124       this->current_annotation = ralloc_asprintf(this->mem_ctx,
2125 						 "FB write src0");
2126       for (int i = 0; i < 4; i++) {
2127 	 fs_inst *inst = emit(BRW_OPCODE_MOV,
2128 			      fs_reg(MRF, color_mrf + i, src0.type),
2129 			      src0);
2130 	 src0.reg_offset++;
2131 	 inst->saturate = c->key.clamp_fragment_color;
2132       }
2133 
2134       this->current_annotation = ralloc_asprintf(this->mem_ctx,
2135 						 "FB write src1");
2136       for (int i = 0; i < 4; i++) {
2137 	 fs_inst *inst = emit(BRW_OPCODE_MOV,
2138 			      fs_reg(MRF, color_mrf + 4 + i, src1.type),
2139 			      src1);
2140 	 src1.reg_offset++;
2141 	 inst->saturate = c->key.clamp_fragment_color;
2142       }
2143 
2144       fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
2145       inst->target = 0;
2146       inst->base_mrf = base_mrf;
2147       inst->mlen = nr - base_mrf;
2148       inst->eot = true;
2149       inst->header_present = header_present;
2150 
2151       c->prog_data.dual_src_blend = true;
2152       this->current_annotation = NULL;
2153       return;
2154    }
2155 
2156    for (int target = 0; target < c->key.nr_color_regions; target++) {
2157       this->current_annotation = ralloc_asprintf(this->mem_ctx,
2158 						 "FB write target %d",
2159 						 target);
2160       /* If src0_alpha_to_render_target is true, include source zero alpha
2161        * data in RenderTargetWrite message for targets > 0.
2162        */
2163       int write_color_mrf = color_mrf;
2164       if (src0_alpha_to_render_target && target != 0) {
2165          fs_inst *inst;
2166          fs_reg color = outputs[0];
2167          color.reg_offset += 3;
2168 
2169          inst = emit(BRW_OPCODE_MOV,
2170 		     fs_reg(MRF, write_color_mrf, color.type),
2171 		     color);
2172          inst->saturate = c->key.clamp_fragment_color;
2173          write_color_mrf = color_mrf + reg_width;
2174       }
2175 
2176       for (unsigned i = 0; i < this->output_components[target]; i++)
2177          emit_color_write(target, i, write_color_mrf);
2178 
2179       fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
2180       inst->target = target;
2181       inst->base_mrf = base_mrf;
2182       if (src0_alpha_to_render_target && target == 0)
2183          inst->mlen = nr - base_mrf - reg_width;
2184       else
2185          inst->mlen = nr - base_mrf;
2186       if (target == c->key.nr_color_regions - 1)
2187 	 inst->eot = true;
2188       inst->header_present = header_present;
2189    }
2190 
2191    if (c->key.nr_color_regions == 0) {
2192       /* Even if there's no color buffers enabled, we still need to send
2193        * alpha out the pipeline to our null renderbuffer to support
2194        * alpha-testing, alpha-to-coverage, and so on.
2195        */
2196       emit_color_write(0, 3, color_mrf);
2197 
2198       fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
2199       inst->base_mrf = base_mrf;
2200       inst->mlen = nr - base_mrf;
2201       inst->eot = true;
2202       inst->header_present = header_present;
2203    }
2204 
2205    this->current_annotation = NULL;
2206 }
2207 
2208 void
resolve_ud_negate(fs_reg * reg)2209 fs_visitor::resolve_ud_negate(fs_reg *reg)
2210 {
2211    if (reg->type != BRW_REGISTER_TYPE_UD ||
2212        !reg->negate)
2213       return;
2214 
2215    fs_reg temp = fs_reg(this, glsl_type::uint_type);
2216    emit(BRW_OPCODE_MOV, temp, *reg);
2217    *reg = temp;
2218 }
2219 
2220 void
resolve_bool_comparison(ir_rvalue * rvalue,fs_reg * reg)2221 fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg)
2222 {
2223    if (rvalue->type != glsl_type::bool_type)
2224       return;
2225 
2226    fs_reg temp = fs_reg(this, glsl_type::bool_type);
2227    emit(BRW_OPCODE_AND, temp, *reg, fs_reg(1));
2228    *reg = temp;
2229 }
2230 
fs_visitor(struct brw_wm_compile * c,struct gl_shader_program * prog,struct brw_shader * shader)2231 fs_visitor::fs_visitor(struct brw_wm_compile *c, struct gl_shader_program *prog,
2232                        struct brw_shader *shader)
2233 {
2234    this->c = c;
2235    this->p = &c->func;
2236    this->brw = p->brw;
2237    this->fp = (struct gl_fragment_program *)
2238       prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program;
2239    this->prog = prog;
2240    this->intel = &brw->intel;
2241    this->ctx = &intel->ctx;
2242    this->mem_ctx = ralloc_context(NULL);
2243    this->shader = shader;
2244    this->failed = false;
2245    this->variable_ht = hash_table_ctor(0,
2246                                        hash_table_pointer_hash,
2247                                        hash_table_pointer_compare);
2248 
2249    /* There's a question that appears to be left open in the spec:
2250     * How do implicit dst conversions interact with the CMP
2251     * instruction or conditional mods?  On gen6, the instruction:
2252     *
2253     * CMP null<d> src0<f> src1<f>
2254     *
2255     * will do src1 - src0 and compare that result as if it was an
2256     * integer.  On gen4, it will do src1 - src0 as float, convert
2257     * the result to int, and compare as int.  In between, it
2258     * appears that it does src1 - src0 and does the compare in the
2259     * execution type so dst type doesn't matter.
2260     */
2261    if (this->intel->gen > 4)
2262       this->reg_null_cmp = reg_null_d;
2263    else
2264       this->reg_null_cmp = reg_null_f;
2265 
2266    this->frag_depth = NULL;
2267    memset(this->outputs, 0, sizeof(this->outputs));
2268    memset(this->output_components, 0, sizeof(this->output_components));
2269    this->first_non_payload_grf = 0;
2270    this->max_grf = intel->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
2271 
2272    this->current_annotation = NULL;
2273    this->base_ir = NULL;
2274 
2275    this->virtual_grf_sizes = NULL;
2276    this->virtual_grf_count = 0;
2277    this->virtual_grf_array_size = 0;
2278    this->virtual_grf_def = NULL;
2279    this->virtual_grf_use = NULL;
2280    this->live_intervals_valid = false;
2281 
2282    this->force_uncompressed_stack = 0;
2283    this->force_sechalf_stack = 0;
2284 }
2285 
~fs_visitor()2286 fs_visitor::~fs_visitor()
2287 {
2288    ralloc_free(this->mem_ctx);
2289    hash_table_dtor(this->variable_ht);
2290 }
2291