1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4  develop this 3D driver.
5 
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13 
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <keith@tungstengraphics.com>
30   */
31 
32 
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36 
37 #include "glsl/ralloc.h"
38 
39 /***********************************************************************
40  * Internal helper for constructing instructions
41  */
42 
guess_execution_size(struct brw_compile * p,struct brw_instruction * insn,struct brw_reg reg)43 static void guess_execution_size(struct brw_compile *p,
44 				 struct brw_instruction *insn,
45 				 struct brw_reg reg)
46 {
47    if (reg.width == BRW_WIDTH_8 && p->compressed)
48       insn->header.execution_size = BRW_EXECUTE_16;
49    else
50       insn->header.execution_size = reg.width;	/* note - definitions are compatible */
51 }
52 
53 
54 /**
55  * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56  * registers, implicitly moving the operand to a message register.
57  *
58  * On Sandybridge, this is no longer the case.  This function performs the
59  * explicit move; it should be called before emitting a SEND instruction.
60  */
61 void
gen6_resolve_implied_move(struct brw_compile * p,struct brw_reg * src,GLuint msg_reg_nr)62 gen6_resolve_implied_move(struct brw_compile *p,
63 			  struct brw_reg *src,
64 			  GLuint msg_reg_nr)
65 {
66    struct intel_context *intel = &p->brw->intel;
67    if (intel->gen < 6)
68       return;
69 
70    if (src->file == BRW_MESSAGE_REGISTER_FILE)
71       return;
72 
73    if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
74       brw_push_insn_state(p);
75       brw_set_mask_control(p, BRW_MASK_DISABLE);
76       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
77       brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
78 	      retype(*src, BRW_REGISTER_TYPE_UD));
79       brw_pop_insn_state(p);
80    }
81    *src = brw_message_reg(msg_reg_nr);
82 }
83 
84 static void
gen7_convert_mrf_to_grf(struct brw_compile * p,struct brw_reg * reg)85 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
86 {
87    /* From the BSpec / ISA Reference / send - [DevIVB+]:
88     * "The send with EOT should use register space R112-R127 for <src>. This is
89     *  to enable loading of a new thread into the same slot while the message
90     *  with EOT for current thread is pending dispatch."
91     *
92     * Since we're pretending to have 16 MRFs anyway, we may as well use the
93     * registers required for messages with EOT.
94     */
95    struct intel_context *intel = &p->brw->intel;
96    if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
97       reg->file = BRW_GENERAL_REGISTER_FILE;
98       reg->nr += GEN7_MRF_HACK_START;
99    }
100 }
101 
102 
103 void
brw_set_dest(struct brw_compile * p,struct brw_instruction * insn,struct brw_reg dest)104 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
105 	     struct brw_reg dest)
106 {
107    if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
108        dest.file != BRW_MESSAGE_REGISTER_FILE)
109       assert(dest.nr < 128);
110 
111    gen7_convert_mrf_to_grf(p, &dest);
112 
113    insn->bits1.da1.dest_reg_file = dest.file;
114    insn->bits1.da1.dest_reg_type = dest.type;
115    insn->bits1.da1.dest_address_mode = dest.address_mode;
116 
117    if (dest.address_mode == BRW_ADDRESS_DIRECT) {
118       insn->bits1.da1.dest_reg_nr = dest.nr;
119 
120       if (insn->header.access_mode == BRW_ALIGN_1) {
121 	 insn->bits1.da1.dest_subreg_nr = dest.subnr;
122 	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
123 	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
124 	 insn->bits1.da1.dest_horiz_stride = dest.hstride;
125       }
126       else {
127 	 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
128 	 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
129 	 /* even ignored in da16, still need to set as '01' */
130 	 insn->bits1.da16.dest_horiz_stride = 1;
131       }
132    }
133    else {
134       insn->bits1.ia1.dest_subreg_nr = dest.subnr;
135 
136       /* These are different sizes in align1 vs align16:
137        */
138       if (insn->header.access_mode == BRW_ALIGN_1) {
139 	 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
140 	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
141 	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
142 	 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
143       }
144       else {
145 	 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
146 	 /* even ignored in da16, still need to set as '01' */
147 	 insn->bits1.ia16.dest_horiz_stride = 1;
148       }
149    }
150 
151    /* NEW: Set the execution size based on dest.width and
152     * insn->compression_control:
153     */
154    guess_execution_size(p, insn, dest);
155 }
156 
157 extern int reg_type_size[];
158 
159 static void
validate_reg(struct brw_instruction * insn,struct brw_reg reg)160 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
161 {
162    int hstride_for_reg[] = {0, 1, 2, 4};
163    int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
164    int width_for_reg[] = {1, 2, 4, 8, 16};
165    int execsize_for_reg[] = {1, 2, 4, 8, 16};
166    int width, hstride, vstride, execsize;
167 
168    if (reg.file == BRW_IMMEDIATE_VALUE) {
169       /* 3.3.6: Region Parameters.  Restriction: Immediate vectors
170        * mean the destination has to be 128-bit aligned and the
171        * destination horiz stride has to be a word.
172        */
173       if (reg.type == BRW_REGISTER_TYPE_V) {
174 	 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
175 		reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
176       }
177 
178       return;
179    }
180 
181    if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
182        reg.file == BRW_ARF_NULL)
183       return;
184 
185    assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
186    hstride = hstride_for_reg[reg.hstride];
187 
188    if (reg.vstride == 0xf) {
189       vstride = -1;
190    } else {
191       assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
192       vstride = vstride_for_reg[reg.vstride];
193    }
194 
195    assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
196    width = width_for_reg[reg.width];
197 
198    assert(insn->header.execution_size >= 0 &&
199 	  insn->header.execution_size < Elements(execsize_for_reg));
200    execsize = execsize_for_reg[insn->header.execution_size];
201 
202    /* Restrictions from 3.3.10: Register Region Restrictions. */
203    /* 3. */
204    assert(execsize >= width);
205 
206    /* 4. */
207    if (execsize == width && hstride != 0) {
208       assert(vstride == -1 || vstride == width * hstride);
209    }
210 
211    /* 5. */
212    if (execsize == width && hstride == 0) {
213       /* no restriction on vstride. */
214    }
215 
216    /* 6. */
217    if (width == 1) {
218       assert(hstride == 0);
219    }
220 
221    /* 7. */
222    if (execsize == 1 && width == 1) {
223       assert(hstride == 0);
224       assert(vstride == 0);
225    }
226 
227    /* 8. */
228    if (vstride == 0 && hstride == 0) {
229       assert(width == 1);
230    }
231 
232    /* 10. Check destination issues. */
233 }
234 
235 void
brw_set_src0(struct brw_compile * p,struct brw_instruction * insn,struct brw_reg reg)236 brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
237 	     struct brw_reg reg)
238 {
239    if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
240       assert(reg.nr < 128);
241 
242    gen7_convert_mrf_to_grf(p, &reg);
243 
244    validate_reg(insn, reg);
245 
246    insn->bits1.da1.src0_reg_file = reg.file;
247    insn->bits1.da1.src0_reg_type = reg.type;
248    insn->bits2.da1.src0_abs = reg.abs;
249    insn->bits2.da1.src0_negate = reg.negate;
250    insn->bits2.da1.src0_address_mode = reg.address_mode;
251 
252    if (reg.file == BRW_IMMEDIATE_VALUE) {
253       insn->bits3.ud = reg.dw1.ud;
254 
255       /* Required to set some fields in src1 as well:
256        */
257       insn->bits1.da1.src1_reg_file = 0; /* arf */
258       insn->bits1.da1.src1_reg_type = reg.type;
259    }
260    else
261    {
262       if (reg.address_mode == BRW_ADDRESS_DIRECT) {
263 	 if (insn->header.access_mode == BRW_ALIGN_1) {
264 	    insn->bits2.da1.src0_subreg_nr = reg.subnr;
265 	    insn->bits2.da1.src0_reg_nr = reg.nr;
266 	 }
267 	 else {
268 	    insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
269 	    insn->bits2.da16.src0_reg_nr = reg.nr;
270 	 }
271       }
272       else {
273 	 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
274 
275 	 if (insn->header.access_mode == BRW_ALIGN_1) {
276 	    insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
277 	 }
278 	 else {
279 	    insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
280 	 }
281       }
282 
283       if (insn->header.access_mode == BRW_ALIGN_1) {
284 	 if (reg.width == BRW_WIDTH_1 &&
285 	     insn->header.execution_size == BRW_EXECUTE_1) {
286 	    insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
287 	    insn->bits2.da1.src0_width = BRW_WIDTH_1;
288 	    insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
289 	 }
290 	 else {
291 	    insn->bits2.da1.src0_horiz_stride = reg.hstride;
292 	    insn->bits2.da1.src0_width = reg.width;
293 	    insn->bits2.da1.src0_vert_stride = reg.vstride;
294 	 }
295       }
296       else {
297 	 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
298 	 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
299 	 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
300 	 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
301 
302 	 /* This is an oddity of the fact we're using the same
303 	  * descriptions for registers in align_16 as align_1:
304 	  */
305 	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
306 	    insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
307 	 else
308 	    insn->bits2.da16.src0_vert_stride = reg.vstride;
309       }
310    }
311 }
312 
313 
brw_set_src1(struct brw_compile * p,struct brw_instruction * insn,struct brw_reg reg)314 void brw_set_src1(struct brw_compile *p,
315 		  struct brw_instruction *insn,
316 		  struct brw_reg reg)
317 {
318    assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
319 
320    assert(reg.nr < 128);
321 
322    gen7_convert_mrf_to_grf(p, &reg);
323 
324    validate_reg(insn, reg);
325 
326    insn->bits1.da1.src1_reg_file = reg.file;
327    insn->bits1.da1.src1_reg_type = reg.type;
328    insn->bits3.da1.src1_abs = reg.abs;
329    insn->bits3.da1.src1_negate = reg.negate;
330 
331    /* Only src1 can be immediate in two-argument instructions.
332     */
333    assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
334 
335    if (reg.file == BRW_IMMEDIATE_VALUE) {
336       insn->bits3.ud = reg.dw1.ud;
337    }
338    else {
339       /* This is a hardware restriction, which may or may not be lifted
340        * in the future:
341        */
342       assert (reg.address_mode == BRW_ADDRESS_DIRECT);
343       /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
344 
345       if (insn->header.access_mode == BRW_ALIGN_1) {
346 	 insn->bits3.da1.src1_subreg_nr = reg.subnr;
347 	 insn->bits3.da1.src1_reg_nr = reg.nr;
348       }
349       else {
350 	 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
351 	 insn->bits3.da16.src1_reg_nr = reg.nr;
352       }
353 
354       if (insn->header.access_mode == BRW_ALIGN_1) {
355 	 if (reg.width == BRW_WIDTH_1 &&
356 	     insn->header.execution_size == BRW_EXECUTE_1) {
357 	    insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
358 	    insn->bits3.da1.src1_width = BRW_WIDTH_1;
359 	    insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
360 	 }
361 	 else {
362 	    insn->bits3.da1.src1_horiz_stride = reg.hstride;
363 	    insn->bits3.da1.src1_width = reg.width;
364 	    insn->bits3.da1.src1_vert_stride = reg.vstride;
365 	 }
366       }
367       else {
368 	 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
369 	 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
370 	 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
371 	 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
372 
373 	 /* This is an oddity of the fact we're using the same
374 	  * descriptions for registers in align_16 as align_1:
375 	  */
376 	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
377 	    insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
378 	 else
379 	    insn->bits3.da16.src1_vert_stride = reg.vstride;
380       }
381    }
382 }
383 
384 /**
385  * Set the Message Descriptor and Extended Message Descriptor fields
386  * for SEND messages.
387  *
388  * \note This zeroes out the Function Control bits, so it must be called
389  *       \b before filling out any message-specific data.  Callers can
390  *       choose not to fill in irrelevant bits; they will be zero.
391  */
392 static void
brw_set_message_descriptor(struct brw_compile * p,struct brw_instruction * inst,enum brw_message_target sfid,unsigned msg_length,unsigned response_length,bool header_present,bool end_of_thread)393 brw_set_message_descriptor(struct brw_compile *p,
394 			   struct brw_instruction *inst,
395 			   enum brw_message_target sfid,
396 			   unsigned msg_length,
397 			   unsigned response_length,
398 			   bool header_present,
399 			   bool end_of_thread)
400 {
401    struct intel_context *intel = &p->brw->intel;
402 
403    brw_set_src1(p, inst, brw_imm_d(0));
404 
405    if (intel->gen >= 5) {
406       inst->bits3.generic_gen5.header_present = header_present;
407       inst->bits3.generic_gen5.response_length = response_length;
408       inst->bits3.generic_gen5.msg_length = msg_length;
409       inst->bits3.generic_gen5.end_of_thread = end_of_thread;
410 
411       if (intel->gen >= 6) {
412 	 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
413 	 inst->header.destreg__conditionalmod = sfid;
414       } else {
415 	 /* Set Extended Message Descriptor (ex_desc) */
416 	 inst->bits2.send_gen5.sfid = sfid;
417 	 inst->bits2.send_gen5.end_of_thread = end_of_thread;
418       }
419    } else {
420       inst->bits3.generic.response_length = response_length;
421       inst->bits3.generic.msg_length = msg_length;
422       inst->bits3.generic.msg_target = sfid;
423       inst->bits3.generic.end_of_thread = end_of_thread;
424    }
425 }
426 
brw_set_math_message(struct brw_compile * p,struct brw_instruction * insn,GLuint function,GLuint integer_type,bool low_precision,GLuint dataType)427 static void brw_set_math_message( struct brw_compile *p,
428 				  struct brw_instruction *insn,
429 				  GLuint function,
430 				  GLuint integer_type,
431 				  bool low_precision,
432 				  GLuint dataType )
433 {
434    struct brw_context *brw = p->brw;
435    struct intel_context *intel = &brw->intel;
436    unsigned msg_length;
437    unsigned response_length;
438 
439    /* Infer message length from the function */
440    switch (function) {
441    case BRW_MATH_FUNCTION_POW:
442    case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
443    case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
444    case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
445       msg_length = 2;
446       break;
447    default:
448       msg_length = 1;
449       break;
450    }
451 
452    /* Infer response length from the function */
453    switch (function) {
454    case BRW_MATH_FUNCTION_SINCOS:
455    case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
456       response_length = 2;
457       break;
458    default:
459       response_length = 1;
460       break;
461    }
462 
463 
464    brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
465 			      msg_length, response_length, false, false);
466    if (intel->gen == 5) {
467       insn->bits3.math_gen5.function = function;
468       insn->bits3.math_gen5.int_type = integer_type;
469       insn->bits3.math_gen5.precision = low_precision;
470       insn->bits3.math_gen5.saturate = insn->header.saturate;
471       insn->bits3.math_gen5.data_type = dataType;
472       insn->bits3.math_gen5.snapshot = 0;
473    } else {
474       insn->bits3.math.function = function;
475       insn->bits3.math.int_type = integer_type;
476       insn->bits3.math.precision = low_precision;
477       insn->bits3.math.saturate = insn->header.saturate;
478       insn->bits3.math.data_type = dataType;
479    }
480    insn->header.saturate = 0;
481 }
482 
483 
brw_set_ff_sync_message(struct brw_compile * p,struct brw_instruction * insn,bool allocate,GLuint response_length,bool end_of_thread)484 static void brw_set_ff_sync_message(struct brw_compile *p,
485 				    struct brw_instruction *insn,
486 				    bool allocate,
487 				    GLuint response_length,
488 				    bool end_of_thread)
489 {
490    brw_set_message_descriptor(p, insn, BRW_SFID_URB,
491 			      1, response_length, true, end_of_thread);
492    insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
493    insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
494    insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
495    insn->bits3.urb_gen5.allocate = allocate;
496    insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
497    insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
498 }
499 
brw_set_urb_message(struct brw_compile * p,struct brw_instruction * insn,bool allocate,bool used,GLuint msg_length,GLuint response_length,bool end_of_thread,bool complete,GLuint offset,GLuint swizzle_control)500 static void brw_set_urb_message( struct brw_compile *p,
501 				 struct brw_instruction *insn,
502 				 bool allocate,
503 				 bool used,
504 				 GLuint msg_length,
505 				 GLuint response_length,
506 				 bool end_of_thread,
507 				 bool complete,
508 				 GLuint offset,
509 				 GLuint swizzle_control )
510 {
511    struct brw_context *brw = p->brw;
512    struct intel_context *intel = &brw->intel;
513 
514    brw_set_message_descriptor(p, insn, BRW_SFID_URB,
515 			      msg_length, response_length, true, end_of_thread);
516    if (intel->gen == 7) {
517       insn->bits3.urb_gen7.opcode = 0;	/* URB_WRITE_HWORD */
518       insn->bits3.urb_gen7.offset = offset;
519       assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
520       insn->bits3.urb_gen7.swizzle_control = swizzle_control;
521       /* per_slot_offset = 0 makes it ignore offsets in message header */
522       insn->bits3.urb_gen7.per_slot_offset = 0;
523       insn->bits3.urb_gen7.complete = complete;
524    } else if (intel->gen >= 5) {
525       insn->bits3.urb_gen5.opcode = 0;	/* URB_WRITE */
526       insn->bits3.urb_gen5.offset = offset;
527       insn->bits3.urb_gen5.swizzle_control = swizzle_control;
528       insn->bits3.urb_gen5.allocate = allocate;
529       insn->bits3.urb_gen5.used = used;	/* ? */
530       insn->bits3.urb_gen5.complete = complete;
531    } else {
532       insn->bits3.urb.opcode = 0;	/* ? */
533       insn->bits3.urb.offset = offset;
534       insn->bits3.urb.swizzle_control = swizzle_control;
535       insn->bits3.urb.allocate = allocate;
536       insn->bits3.urb.used = used;	/* ? */
537       insn->bits3.urb.complete = complete;
538    }
539 }
540 
541 void
brw_set_dp_write_message(struct brw_compile * p,struct brw_instruction * insn,GLuint binding_table_index,GLuint msg_control,GLuint msg_type,GLuint msg_length,bool header_present,GLuint last_render_target,GLuint response_length,GLuint end_of_thread,GLuint send_commit_msg)542 brw_set_dp_write_message(struct brw_compile *p,
543 			 struct brw_instruction *insn,
544 			 GLuint binding_table_index,
545 			 GLuint msg_control,
546 			 GLuint msg_type,
547 			 GLuint msg_length,
548 			 bool header_present,
549 			 GLuint last_render_target,
550 			 GLuint response_length,
551 			 GLuint end_of_thread,
552 			 GLuint send_commit_msg)
553 {
554    struct brw_context *brw = p->brw;
555    struct intel_context *intel = &brw->intel;
556    unsigned sfid;
557 
558    if (intel->gen >= 7) {
559       /* Use the Render Cache for RT writes; otherwise use the Data Cache */
560       if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
561 	 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
562       else
563 	 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
564    } else if (intel->gen == 6) {
565       /* Use the render cache for all write messages. */
566       sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
567    } else {
568       sfid = BRW_SFID_DATAPORT_WRITE;
569    }
570 
571    brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
572 			      header_present, end_of_thread);
573 
574    if (intel->gen >= 7) {
575       insn->bits3.gen7_dp.binding_table_index = binding_table_index;
576       insn->bits3.gen7_dp.msg_control = msg_control;
577       insn->bits3.gen7_dp.last_render_target = last_render_target;
578       insn->bits3.gen7_dp.msg_type = msg_type;
579    } else if (intel->gen == 6) {
580       insn->bits3.gen6_dp.binding_table_index = binding_table_index;
581       insn->bits3.gen6_dp.msg_control = msg_control;
582       insn->bits3.gen6_dp.last_render_target = last_render_target;
583       insn->bits3.gen6_dp.msg_type = msg_type;
584       insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
585    } else if (intel->gen == 5) {
586       insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
587       insn->bits3.dp_write_gen5.msg_control = msg_control;
588       insn->bits3.dp_write_gen5.last_render_target = last_render_target;
589       insn->bits3.dp_write_gen5.msg_type = msg_type;
590       insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
591    } else {
592       insn->bits3.dp_write.binding_table_index = binding_table_index;
593       insn->bits3.dp_write.msg_control = msg_control;
594       insn->bits3.dp_write.last_render_target = last_render_target;
595       insn->bits3.dp_write.msg_type = msg_type;
596       insn->bits3.dp_write.send_commit_msg = send_commit_msg;
597    }
598 }
599 
600 void
brw_set_dp_read_message(struct brw_compile * p,struct brw_instruction * insn,GLuint binding_table_index,GLuint msg_control,GLuint msg_type,GLuint target_cache,GLuint msg_length,GLuint response_length)601 brw_set_dp_read_message(struct brw_compile *p,
602 			struct brw_instruction *insn,
603 			GLuint binding_table_index,
604 			GLuint msg_control,
605 			GLuint msg_type,
606 			GLuint target_cache,
607 			GLuint msg_length,
608 			GLuint response_length)
609 {
610    struct brw_context *brw = p->brw;
611    struct intel_context *intel = &brw->intel;
612    unsigned sfid;
613 
614    if (intel->gen >= 7) {
615       sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
616    } else if (intel->gen == 6) {
617       if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
618 	 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
619       else
620 	 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
621    } else {
622       sfid = BRW_SFID_DATAPORT_READ;
623    }
624 
625    brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
626 			      true, false);
627 
628    if (intel->gen >= 7) {
629       insn->bits3.gen7_dp.binding_table_index = binding_table_index;
630       insn->bits3.gen7_dp.msg_control = msg_control;
631       insn->bits3.gen7_dp.last_render_target = 0;
632       insn->bits3.gen7_dp.msg_type = msg_type;
633    } else if (intel->gen == 6) {
634       insn->bits3.gen6_dp.binding_table_index = binding_table_index;
635       insn->bits3.gen6_dp.msg_control = msg_control;
636       insn->bits3.gen6_dp.last_render_target = 0;
637       insn->bits3.gen6_dp.msg_type = msg_type;
638       insn->bits3.gen6_dp.send_commit_msg = 0;
639    } else if (intel->gen == 5) {
640       insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
641       insn->bits3.dp_read_gen5.msg_control = msg_control;
642       insn->bits3.dp_read_gen5.msg_type = msg_type;
643       insn->bits3.dp_read_gen5.target_cache = target_cache;
644    } else if (intel->is_g4x) {
645       insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
646       insn->bits3.dp_read_g4x.msg_control = msg_control;  /*8:10*/
647       insn->bits3.dp_read_g4x.msg_type = msg_type;  /*11:13*/
648       insn->bits3.dp_read_g4x.target_cache = target_cache;  /*14:15*/
649    } else {
650       insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
651       insn->bits3.dp_read.msg_control = msg_control;  /*8:11*/
652       insn->bits3.dp_read.msg_type = msg_type;  /*12:13*/
653       insn->bits3.dp_read.target_cache = target_cache;  /*14:15*/
654    }
655 }
656 
657 void
brw_set_sampler_message(struct brw_compile * p,struct brw_instruction * insn,GLuint binding_table_index,GLuint sampler,GLuint msg_type,GLuint response_length,GLuint msg_length,GLuint header_present,GLuint simd_mode,GLuint return_format)658 brw_set_sampler_message(struct brw_compile *p,
659                         struct brw_instruction *insn,
660                         GLuint binding_table_index,
661                         GLuint sampler,
662                         GLuint msg_type,
663                         GLuint response_length,
664                         GLuint msg_length,
665                         GLuint header_present,
666                         GLuint simd_mode,
667                         GLuint return_format)
668 {
669    struct brw_context *brw = p->brw;
670    struct intel_context *intel = &brw->intel;
671 
672    brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length,
673 			      response_length, header_present, false);
674 
675    if (intel->gen >= 7) {
676       insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
677       insn->bits3.sampler_gen7.sampler = sampler;
678       insn->bits3.sampler_gen7.msg_type = msg_type;
679       insn->bits3.sampler_gen7.simd_mode = simd_mode;
680    } else if (intel->gen >= 5) {
681       insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
682       insn->bits3.sampler_gen5.sampler = sampler;
683       insn->bits3.sampler_gen5.msg_type = msg_type;
684       insn->bits3.sampler_gen5.simd_mode = simd_mode;
685    } else if (intel->is_g4x) {
686       insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
687       insn->bits3.sampler_g4x.sampler = sampler;
688       insn->bits3.sampler_g4x.msg_type = msg_type;
689    } else {
690       insn->bits3.sampler.binding_table_index = binding_table_index;
691       insn->bits3.sampler.sampler = sampler;
692       insn->bits3.sampler.msg_type = msg_type;
693       insn->bits3.sampler.return_format = return_format;
694    }
695 }
696 
697 
698 #define next_insn brw_next_insn
699 struct brw_instruction *
brw_next_insn(struct brw_compile * p,GLuint opcode)700 brw_next_insn(struct brw_compile *p, GLuint opcode)
701 {
702    struct brw_instruction *insn;
703 
704    if (p->nr_insn + 1 > p->store_size) {
705       if (0)
706          printf("incresing the store size to %d\n", p->store_size << 1);
707       p->store_size <<= 1;
708       p->store = reralloc(p->mem_ctx, p->store,
709                           struct brw_instruction, p->store_size);
710       if (!p->store)
711          assert(!"realloc eu store memeory failed");
712    }
713 
714    insn = &p->store[p->nr_insn++];
715    memcpy(insn, p->current, sizeof(*insn));
716 
717    /* Reset this one-shot flag:
718     */
719 
720    if (p->current->header.destreg__conditionalmod) {
721       p->current->header.destreg__conditionalmod = 0;
722       p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
723    }
724 
725    insn->header.opcode = opcode;
726    return insn;
727 }
728 
brw_alu1(struct brw_compile * p,GLuint opcode,struct brw_reg dest,struct brw_reg src)729 static struct brw_instruction *brw_alu1( struct brw_compile *p,
730 					 GLuint opcode,
731 					 struct brw_reg dest,
732 					 struct brw_reg src )
733 {
734    struct brw_instruction *insn = next_insn(p, opcode);
735    brw_set_dest(p, insn, dest);
736    brw_set_src0(p, insn, src);
737    return insn;
738 }
739 
brw_alu2(struct brw_compile * p,GLuint opcode,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)740 static struct brw_instruction *brw_alu2(struct brw_compile *p,
741 					GLuint opcode,
742 					struct brw_reg dest,
743 					struct brw_reg src0,
744 					struct brw_reg src1 )
745 {
746    struct brw_instruction *insn = next_insn(p, opcode);
747    brw_set_dest(p, insn, dest);
748    brw_set_src0(p, insn, src0);
749    brw_set_src1(p, insn, src1);
750    return insn;
751 }
752 
753 static int
get_3src_subreg_nr(struct brw_reg reg)754 get_3src_subreg_nr(struct brw_reg reg)
755 {
756    if (reg.vstride == BRW_VERTICAL_STRIDE_0) {
757       assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle));
758       return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0);
759    } else {
760       return reg.subnr / 4;
761    }
762 }
763 
brw_alu3(struct brw_compile * p,GLuint opcode,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1,struct brw_reg src2)764 static struct brw_instruction *brw_alu3(struct brw_compile *p,
765 					GLuint opcode,
766 					struct brw_reg dest,
767 					struct brw_reg src0,
768 					struct brw_reg src1,
769 					struct brw_reg src2)
770 {
771    struct brw_instruction *insn = next_insn(p, opcode);
772 
773    gen7_convert_mrf_to_grf(p, &dest);
774 
775    assert(insn->header.access_mode == BRW_ALIGN_16);
776 
777    assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
778 	  dest.file == BRW_MESSAGE_REGISTER_FILE);
779    assert(dest.nr < 128);
780    assert(dest.address_mode == BRW_ADDRESS_DIRECT);
781    assert(dest.type = BRW_REGISTER_TYPE_F);
782    insn->bits1.da3src.dest_reg_file = (dest.file == BRW_MESSAGE_REGISTER_FILE);
783    insn->bits1.da3src.dest_reg_nr = dest.nr;
784    insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16;
785    insn->bits1.da3src.dest_writemask = dest.dw1.bits.writemask;
786    guess_execution_size(p, insn, dest);
787 
788    assert(src0.file == BRW_GENERAL_REGISTER_FILE);
789    assert(src0.address_mode == BRW_ADDRESS_DIRECT);
790    assert(src0.nr < 128);
791    assert(src0.type == BRW_REGISTER_TYPE_F);
792    insn->bits2.da3src.src0_swizzle = src0.dw1.bits.swizzle;
793    insn->bits2.da3src.src0_subreg_nr = get_3src_subreg_nr(src0);
794    insn->bits2.da3src.src0_reg_nr = src0.nr;
795    insn->bits1.da3src.src0_abs = src0.abs;
796    insn->bits1.da3src.src0_negate = src0.negate;
797    insn->bits2.da3src.src0_rep_ctrl = src0.vstride == BRW_VERTICAL_STRIDE_0;
798 
799    assert(src1.file == BRW_GENERAL_REGISTER_FILE);
800    assert(src1.address_mode == BRW_ADDRESS_DIRECT);
801    assert(src1.nr < 128);
802    assert(src1.type == BRW_REGISTER_TYPE_F);
803    insn->bits2.da3src.src1_swizzle = src1.dw1.bits.swizzle;
804    insn->bits2.da3src.src1_subreg_nr_low = get_3src_subreg_nr(src1) & 0x3;
805    insn->bits3.da3src.src1_subreg_nr_high = get_3src_subreg_nr(src1) >> 2;
806    insn->bits2.da3src.src1_rep_ctrl = src1.vstride == BRW_VERTICAL_STRIDE_0;
807    insn->bits3.da3src.src1_reg_nr = src1.nr;
808    insn->bits1.da3src.src1_abs = src1.abs;
809    insn->bits1.da3src.src1_negate = src1.negate;
810 
811    assert(src2.file == BRW_GENERAL_REGISTER_FILE);
812    assert(src2.address_mode == BRW_ADDRESS_DIRECT);
813    assert(src2.nr < 128);
814    assert(src2.type == BRW_REGISTER_TYPE_F);
815    insn->bits3.da3src.src2_swizzle = src2.dw1.bits.swizzle;
816    insn->bits3.da3src.src2_subreg_nr = get_3src_subreg_nr(src2);
817    insn->bits3.da3src.src2_rep_ctrl = src2.vstride == BRW_VERTICAL_STRIDE_0;
818    insn->bits3.da3src.src2_reg_nr = src2.nr;
819    insn->bits1.da3src.src2_abs = src2.abs;
820    insn->bits1.da3src.src2_negate = src2.negate;
821 
822    return insn;
823 }
824 
825 
826 /***********************************************************************
827  * Convenience routines.
828  */
829 #define ALU1(OP)					\
830 struct brw_instruction *brw_##OP(struct brw_compile *p,	\
831 	      struct brw_reg dest,			\
832 	      struct brw_reg src0)   			\
833 {							\
834    return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);    	\
835 }
836 
837 #define ALU2(OP)					\
838 struct brw_instruction *brw_##OP(struct brw_compile *p,	\
839 	      struct brw_reg dest,			\
840 	      struct brw_reg src0,			\
841 	      struct brw_reg src1)   			\
842 {							\
843    return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);	\
844 }
845 
846 #define ALU3(OP)					\
847 struct brw_instruction *brw_##OP(struct brw_compile *p,	\
848 	      struct brw_reg dest,			\
849 	      struct brw_reg src0,			\
850 	      struct brw_reg src1,			\
851 	      struct brw_reg src2)   			\
852 {							\
853    return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2);	\
854 }
855 
856 /* Rounding operations (other than RNDD) require two instructions - the first
857  * stores a rounded value (possibly the wrong way) in the dest register, but
858  * also sets a per-channel "increment bit" in the flag register.  A predicated
859  * add of 1.0 fixes dest to contain the desired result.
860  *
861  * Sandybridge and later appear to round correctly without an ADD.
862  */
863 #define ROUND(OP)							      \
864 void brw_##OP(struct brw_compile *p,					      \
865 	      struct brw_reg dest,					      \
866 	      struct brw_reg src)					      \
867 {									      \
868    struct brw_instruction *rnd, *add;					      \
869    rnd = next_insn(p, BRW_OPCODE_##OP);					      \
870    brw_set_dest(p, rnd, dest);						      \
871    brw_set_src0(p, rnd, src);						      \
872 									      \
873    if (p->brw->intel.gen < 6) {						      \
874       /* turn on round-increments */					      \
875       rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R;		      \
876       add = brw_ADD(p, dest, dest, brw_imm_f(1.0f));			      \
877       add->header.predicate_control = BRW_PREDICATE_NORMAL;		      \
878    }									      \
879 }
880 
881 
882 ALU1(MOV)
ALU2(SEL)883 ALU2(SEL)
884 ALU1(NOT)
885 ALU2(AND)
886 ALU2(OR)
887 ALU2(XOR)
888 ALU2(SHR)
889 ALU2(SHL)
890 ALU2(RSR)
891 ALU2(RSL)
892 ALU2(ASR)
893 ALU1(FRC)
894 ALU1(RNDD)
895 ALU2(MAC)
896 ALU2(MACH)
897 ALU1(LZD)
898 ALU2(DP4)
899 ALU2(DPH)
900 ALU2(DP3)
901 ALU2(DP2)
902 ALU2(LINE)
903 ALU2(PLN)
904 ALU3(MAD)
905 
906 ROUND(RNDZ)
907 ROUND(RNDE)
908 
909 
910 struct brw_instruction *brw_ADD(struct brw_compile *p,
911 				struct brw_reg dest,
912 				struct brw_reg src0,
913 				struct brw_reg src1)
914 {
915    /* 6.2.2: add */
916    if (src0.type == BRW_REGISTER_TYPE_F ||
917        (src0.file == BRW_IMMEDIATE_VALUE &&
918 	src0.type == BRW_REGISTER_TYPE_VF)) {
919       assert(src1.type != BRW_REGISTER_TYPE_UD);
920       assert(src1.type != BRW_REGISTER_TYPE_D);
921    }
922 
923    if (src1.type == BRW_REGISTER_TYPE_F ||
924        (src1.file == BRW_IMMEDIATE_VALUE &&
925 	src1.type == BRW_REGISTER_TYPE_VF)) {
926       assert(src0.type != BRW_REGISTER_TYPE_UD);
927       assert(src0.type != BRW_REGISTER_TYPE_D);
928    }
929 
930    return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
931 }
932 
brw_AVG(struct brw_compile * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)933 struct brw_instruction *brw_AVG(struct brw_compile *p,
934                                 struct brw_reg dest,
935                                 struct brw_reg src0,
936                                 struct brw_reg src1)
937 {
938    assert(dest.type == src0.type);
939    assert(src0.type == src1.type);
940    switch (src0.type) {
941    case BRW_REGISTER_TYPE_B:
942    case BRW_REGISTER_TYPE_UB:
943    case BRW_REGISTER_TYPE_W:
944    case BRW_REGISTER_TYPE_UW:
945    case BRW_REGISTER_TYPE_D:
946    case BRW_REGISTER_TYPE_UD:
947       break;
948    default:
949       assert(!"Bad type for brw_AVG");
950    }
951 
952    return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
953 }
954 
brw_MUL(struct brw_compile * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)955 struct brw_instruction *brw_MUL(struct brw_compile *p,
956 				struct brw_reg dest,
957 				struct brw_reg src0,
958 				struct brw_reg src1)
959 {
960    /* 6.32.38: mul */
961    if (src0.type == BRW_REGISTER_TYPE_D ||
962        src0.type == BRW_REGISTER_TYPE_UD ||
963        src1.type == BRW_REGISTER_TYPE_D ||
964        src1.type == BRW_REGISTER_TYPE_UD) {
965       assert(dest.type != BRW_REGISTER_TYPE_F);
966    }
967 
968    if (src0.type == BRW_REGISTER_TYPE_F ||
969        (src0.file == BRW_IMMEDIATE_VALUE &&
970 	src0.type == BRW_REGISTER_TYPE_VF)) {
971       assert(src1.type != BRW_REGISTER_TYPE_UD);
972       assert(src1.type != BRW_REGISTER_TYPE_D);
973    }
974 
975    if (src1.type == BRW_REGISTER_TYPE_F ||
976        (src1.file == BRW_IMMEDIATE_VALUE &&
977 	src1.type == BRW_REGISTER_TYPE_VF)) {
978       assert(src0.type != BRW_REGISTER_TYPE_UD);
979       assert(src0.type != BRW_REGISTER_TYPE_D);
980    }
981 
982    assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
983 	  src0.nr != BRW_ARF_ACCUMULATOR);
984    assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
985 	  src1.nr != BRW_ARF_ACCUMULATOR);
986 
987    return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
988 }
989 
990 
brw_NOP(struct brw_compile * p)991 void brw_NOP(struct brw_compile *p)
992 {
993    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
994    brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
995    brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
996    brw_set_src1(p, insn, brw_imm_ud(0x0));
997 }
998 
999 
1000 
1001 
1002 
1003 /***********************************************************************
1004  * Comparisons, if/else/endif
1005  */
1006 
brw_JMPI(struct brw_compile * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)1007 struct brw_instruction *brw_JMPI(struct brw_compile *p,
1008                                  struct brw_reg dest,
1009                                  struct brw_reg src0,
1010                                  struct brw_reg src1)
1011 {
1012    struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
1013 
1014    insn->header.execution_size = 1;
1015    insn->header.compression_control = BRW_COMPRESSION_NONE;
1016    insn->header.mask_control = BRW_MASK_DISABLE;
1017 
1018    p->current->header.predicate_control = BRW_PREDICATE_NONE;
1019 
1020    return insn;
1021 }
1022 
1023 static void
push_if_stack(struct brw_compile * p,struct brw_instruction * inst)1024 push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
1025 {
1026    p->if_stack[p->if_stack_depth] = inst - p->store;
1027 
1028    p->if_stack_depth++;
1029    if (p->if_stack_array_size <= p->if_stack_depth) {
1030       p->if_stack_array_size *= 2;
1031       p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
1032 			     p->if_stack_array_size);
1033    }
1034 }
1035 
1036 static struct brw_instruction *
pop_if_stack(struct brw_compile * p)1037 pop_if_stack(struct brw_compile *p)
1038 {
1039    p->if_stack_depth--;
1040    return &p->store[p->if_stack[p->if_stack_depth]];
1041 }
1042 
1043 static void
push_loop_stack(struct brw_compile * p,struct brw_instruction * inst)1044 push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
1045 {
1046    if (p->loop_stack_array_size < p->loop_stack_depth) {
1047       p->loop_stack_array_size *= 2;
1048       p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
1049 			       p->loop_stack_array_size);
1050       p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
1051 				     p->loop_stack_array_size);
1052    }
1053 
1054    p->loop_stack[p->loop_stack_depth] = inst - p->store;
1055    p->loop_stack_depth++;
1056    p->if_depth_in_loop[p->loop_stack_depth] = 0;
1057 }
1058 
1059 static struct brw_instruction *
get_inner_do_insn(struct brw_compile * p)1060 get_inner_do_insn(struct brw_compile *p)
1061 {
1062    return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
1063 }
1064 
1065 /* EU takes the value from the flag register and pushes it onto some
1066  * sort of a stack (presumably merging with any flag value already on
1067  * the stack).  Within an if block, the flags at the top of the stack
1068  * control execution on each channel of the unit, eg. on each of the
1069  * 16 pixel values in our wm programs.
1070  *
1071  * When the matching 'else' instruction is reached (presumably by
1072  * countdown of the instruction count patched in by our ELSE/ENDIF
1073  * functions), the relevent flags are inverted.
1074  *
1075  * When the matching 'endif' instruction is reached, the flags are
1076  * popped off.  If the stack is now empty, normal execution resumes.
1077  */
1078 struct brw_instruction *
brw_IF(struct brw_compile * p,GLuint execute_size)1079 brw_IF(struct brw_compile *p, GLuint execute_size)
1080 {
1081    struct intel_context *intel = &p->brw->intel;
1082    struct brw_instruction *insn;
1083 
1084    insn = next_insn(p, BRW_OPCODE_IF);
1085 
1086    /* Override the defaults for this instruction:
1087     */
1088    if (intel->gen < 6) {
1089       brw_set_dest(p, insn, brw_ip_reg());
1090       brw_set_src0(p, insn, brw_ip_reg());
1091       brw_set_src1(p, insn, brw_imm_d(0x0));
1092    } else if (intel->gen == 6) {
1093       brw_set_dest(p, insn, brw_imm_w(0));
1094       insn->bits1.branch_gen6.jump_count = 0;
1095       brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1096       brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1097    } else {
1098       brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1099       brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1100       brw_set_src1(p, insn, brw_imm_ud(0));
1101       insn->bits3.break_cont.jip = 0;
1102       insn->bits3.break_cont.uip = 0;
1103    }
1104 
1105    insn->header.execution_size = execute_size;
1106    insn->header.compression_control = BRW_COMPRESSION_NONE;
1107    insn->header.predicate_control = BRW_PREDICATE_NORMAL;
1108    insn->header.mask_control = BRW_MASK_ENABLE;
1109    if (!p->single_program_flow)
1110       insn->header.thread_control = BRW_THREAD_SWITCH;
1111 
1112    p->current->header.predicate_control = BRW_PREDICATE_NONE;
1113 
1114    push_if_stack(p, insn);
1115    p->if_depth_in_loop[p->loop_stack_depth]++;
1116    return insn;
1117 }
1118 
1119 /* This function is only used for gen6-style IF instructions with an
1120  * embedded comparison (conditional modifier).  It is not used on gen7.
1121  */
1122 struct brw_instruction *
gen6_IF(struct brw_compile * p,uint32_t conditional,struct brw_reg src0,struct brw_reg src1)1123 gen6_IF(struct brw_compile *p, uint32_t conditional,
1124 	struct brw_reg src0, struct brw_reg src1)
1125 {
1126    struct brw_instruction *insn;
1127 
1128    insn = next_insn(p, BRW_OPCODE_IF);
1129 
1130    brw_set_dest(p, insn, brw_imm_w(0));
1131    if (p->compressed) {
1132       insn->header.execution_size = BRW_EXECUTE_16;
1133    } else {
1134       insn->header.execution_size = BRW_EXECUTE_8;
1135    }
1136    insn->bits1.branch_gen6.jump_count = 0;
1137    brw_set_src0(p, insn, src0);
1138    brw_set_src1(p, insn, src1);
1139 
1140    assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
1141    assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1142    insn->header.destreg__conditionalmod = conditional;
1143 
1144    if (!p->single_program_flow)
1145       insn->header.thread_control = BRW_THREAD_SWITCH;
1146 
1147    push_if_stack(p, insn);
1148    return insn;
1149 }
1150 
1151 /**
1152  * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1153  */
1154 static void
convert_IF_ELSE_to_ADD(struct brw_compile * p,struct brw_instruction * if_inst,struct brw_instruction * else_inst)1155 convert_IF_ELSE_to_ADD(struct brw_compile *p,
1156 		       struct brw_instruction *if_inst,
1157 		       struct brw_instruction *else_inst)
1158 {
1159    /* The next instruction (where the ENDIF would be, if it existed) */
1160    struct brw_instruction *next_inst = &p->store[p->nr_insn];
1161 
1162    assert(p->single_program_flow);
1163    assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1164    assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1165    assert(if_inst->header.execution_size == BRW_EXECUTE_1);
1166 
1167    /* Convert IF to an ADD instruction that moves the instruction pointer
1168     * to the first instruction of the ELSE block.  If there is no ELSE
1169     * block, point to where ENDIF would be.  Reverse the predicate.
1170     *
1171     * There's no need to execute an ENDIF since we don't need to do any
1172     * stack operations, and if we're currently executing, we just want to
1173     * continue normally.
1174     */
1175    if_inst->header.opcode = BRW_OPCODE_ADD;
1176    if_inst->header.predicate_inverse = 1;
1177 
1178    if (else_inst != NULL) {
1179       /* Convert ELSE to an ADD instruction that points where the ENDIF
1180        * would be.
1181        */
1182       else_inst->header.opcode = BRW_OPCODE_ADD;
1183 
1184       if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
1185       else_inst->bits3.ud = (next_inst - else_inst) * 16;
1186    } else {
1187       if_inst->bits3.ud = (next_inst - if_inst) * 16;
1188    }
1189 }
1190 
1191 /**
1192  * Patch IF and ELSE instructions with appropriate jump targets.
1193  */
1194 static void
patch_IF_ELSE(struct brw_compile * p,struct brw_instruction * if_inst,struct brw_instruction * else_inst,struct brw_instruction * endif_inst)1195 patch_IF_ELSE(struct brw_compile *p,
1196 	      struct brw_instruction *if_inst,
1197 	      struct brw_instruction *else_inst,
1198 	      struct brw_instruction *endif_inst)
1199 {
1200    struct intel_context *intel = &p->brw->intel;
1201 
1202    /* We shouldn't be patching IF and ELSE instructions in single program flow
1203     * mode when gen < 6, because in single program flow mode on those
1204     * platforms, we convert flow control instructions to conditional ADDs that
1205     * operate on IP (see brw_ENDIF).
1206     *
1207     * However, on Gen6, writing to IP doesn't work in single program flow mode
1208     * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1209     * not be updated by non-flow control instructions.").  And on later
1210     * platforms, there is no significant benefit to converting control flow
1211     * instructions to conditional ADDs.  So we do patch IF and ELSE
1212     * instructions in single program flow mode on those platforms.
1213     */
1214    if (intel->gen < 6)
1215       assert(!p->single_program_flow);
1216 
1217    assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1218    assert(endif_inst != NULL);
1219    assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1220 
1221    unsigned br = 1;
1222    /* Jump count is for 64bit data chunk each, so one 128bit instruction
1223     * requires 2 chunks.
1224     */
1225    if (intel->gen >= 5)
1226       br = 2;
1227 
1228    assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
1229    endif_inst->header.execution_size = if_inst->header.execution_size;
1230 
1231    if (else_inst == NULL) {
1232       /* Patch IF -> ENDIF */
1233       if (intel->gen < 6) {
1234 	 /* Turn it into an IFF, which means no mask stack operations for
1235 	  * all-false and jumping past the ENDIF.
1236 	  */
1237 	 if_inst->header.opcode = BRW_OPCODE_IFF;
1238 	 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
1239 	 if_inst->bits3.if_else.pop_count = 0;
1240 	 if_inst->bits3.if_else.pad0 = 0;
1241       } else if (intel->gen == 6) {
1242 	 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1243 	 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
1244       } else {
1245 	 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1246 	 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
1247       }
1248    } else {
1249       else_inst->header.execution_size = if_inst->header.execution_size;
1250 
1251       /* Patch IF -> ELSE */
1252       if (intel->gen < 6) {
1253 	 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
1254 	 if_inst->bits3.if_else.pop_count = 0;
1255 	 if_inst->bits3.if_else.pad0 = 0;
1256       } else if (intel->gen == 6) {
1257 	 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
1258       }
1259 
1260       /* Patch ELSE -> ENDIF */
1261       if (intel->gen < 6) {
1262 	 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1263 	  * matching ENDIF.
1264 	  */
1265 	 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
1266 	 else_inst->bits3.if_else.pop_count = 1;
1267 	 else_inst->bits3.if_else.pad0 = 0;
1268       } else if (intel->gen == 6) {
1269 	 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1270 	 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
1271       } else {
1272 	 /* The IF instruction's JIP should point just past the ELSE */
1273 	 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
1274 	 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1275 	 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1276 	 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
1277       }
1278    }
1279 }
1280 
1281 void
brw_ELSE(struct brw_compile * p)1282 brw_ELSE(struct brw_compile *p)
1283 {
1284    struct intel_context *intel = &p->brw->intel;
1285    struct brw_instruction *insn;
1286 
1287    insn = next_insn(p, BRW_OPCODE_ELSE);
1288 
1289    if (intel->gen < 6) {
1290       brw_set_dest(p, insn, brw_ip_reg());
1291       brw_set_src0(p, insn, brw_ip_reg());
1292       brw_set_src1(p, insn, brw_imm_d(0x0));
1293    } else if (intel->gen == 6) {
1294       brw_set_dest(p, insn, brw_imm_w(0));
1295       insn->bits1.branch_gen6.jump_count = 0;
1296       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1297       brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1298    } else {
1299       brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1300       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1301       brw_set_src1(p, insn, brw_imm_ud(0));
1302       insn->bits3.break_cont.jip = 0;
1303       insn->bits3.break_cont.uip = 0;
1304    }
1305 
1306    insn->header.compression_control = BRW_COMPRESSION_NONE;
1307    insn->header.mask_control = BRW_MASK_ENABLE;
1308    if (!p->single_program_flow)
1309       insn->header.thread_control = BRW_THREAD_SWITCH;
1310 
1311    push_if_stack(p, insn);
1312 }
1313 
1314 void
brw_ENDIF(struct brw_compile * p)1315 brw_ENDIF(struct brw_compile *p)
1316 {
1317    struct intel_context *intel = &p->brw->intel;
1318    struct brw_instruction *insn = NULL;
1319    struct brw_instruction *else_inst = NULL;
1320    struct brw_instruction *if_inst = NULL;
1321    struct brw_instruction *tmp;
1322    bool emit_endif = true;
1323 
1324    /* In single program flow mode, we can express IF and ELSE instructions
1325     * equivalently as ADD instructions that operate on IP.  On platforms prior
1326     * to Gen6, flow control instructions cause an implied thread switch, so
1327     * this is a significant savings.
1328     *
1329     * However, on Gen6, writing to IP doesn't work in single program flow mode
1330     * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1331     * not be updated by non-flow control instructions.").  And on later
1332     * platforms, there is no significant benefit to converting control flow
1333     * instructions to conditional ADDs.  So we only do this trick on Gen4 and
1334     * Gen5.
1335     */
1336    if (intel->gen < 6 && p->single_program_flow)
1337       emit_endif = false;
1338 
1339    /*
1340     * A single next_insn() may change the base adress of instruction store
1341     * memory(p->store), so call it first before referencing the instruction
1342     * store pointer from an index
1343     */
1344    if (emit_endif)
1345       insn = next_insn(p, BRW_OPCODE_ENDIF);
1346 
1347    /* Pop the IF and (optional) ELSE instructions from the stack */
1348    p->if_depth_in_loop[p->loop_stack_depth]--;
1349    tmp = pop_if_stack(p);
1350    if (tmp->header.opcode == BRW_OPCODE_ELSE) {
1351       else_inst = tmp;
1352       tmp = pop_if_stack(p);
1353    }
1354    if_inst = tmp;
1355 
1356    if (!emit_endif) {
1357       /* ENDIF is useless; don't bother emitting it. */
1358       convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1359       return;
1360    }
1361 
1362    if (intel->gen < 6) {
1363       brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1364       brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1365       brw_set_src1(p, insn, brw_imm_d(0x0));
1366    } else if (intel->gen == 6) {
1367       brw_set_dest(p, insn, brw_imm_w(0));
1368       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1369       brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1370    } else {
1371       brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1372       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1373       brw_set_src1(p, insn, brw_imm_ud(0));
1374    }
1375 
1376    insn->header.compression_control = BRW_COMPRESSION_NONE;
1377    insn->header.mask_control = BRW_MASK_ENABLE;
1378    insn->header.thread_control = BRW_THREAD_SWITCH;
1379 
1380    /* Also pop item off the stack in the endif instruction: */
1381    if (intel->gen < 6) {
1382       insn->bits3.if_else.jump_count = 0;
1383       insn->bits3.if_else.pop_count = 1;
1384       insn->bits3.if_else.pad0 = 0;
1385    } else if (intel->gen == 6) {
1386       insn->bits1.branch_gen6.jump_count = 2;
1387    } else {
1388       insn->bits3.break_cont.jip = 2;
1389    }
1390    patch_IF_ELSE(p, if_inst, else_inst, insn);
1391 }
1392 
brw_BREAK(struct brw_compile * p)1393 struct brw_instruction *brw_BREAK(struct brw_compile *p)
1394 {
1395    struct intel_context *intel = &p->brw->intel;
1396    struct brw_instruction *insn;
1397 
1398    insn = next_insn(p, BRW_OPCODE_BREAK);
1399    if (intel->gen >= 6) {
1400       brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1401       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1402       brw_set_src1(p, insn, brw_imm_d(0x0));
1403    } else {
1404       brw_set_dest(p, insn, brw_ip_reg());
1405       brw_set_src0(p, insn, brw_ip_reg());
1406       brw_set_src1(p, insn, brw_imm_d(0x0));
1407       insn->bits3.if_else.pad0 = 0;
1408       insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1409    }
1410    insn->header.compression_control = BRW_COMPRESSION_NONE;
1411    insn->header.execution_size = BRW_EXECUTE_8;
1412 
1413    return insn;
1414 }
1415 
gen6_CONT(struct brw_compile * p)1416 struct brw_instruction *gen6_CONT(struct brw_compile *p)
1417 {
1418    struct brw_instruction *insn;
1419 
1420    insn = next_insn(p, BRW_OPCODE_CONTINUE);
1421    brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1422    brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1423    brw_set_dest(p, insn, brw_ip_reg());
1424    brw_set_src0(p, insn, brw_ip_reg());
1425    brw_set_src1(p, insn, brw_imm_d(0x0));
1426 
1427    insn->header.compression_control = BRW_COMPRESSION_NONE;
1428    insn->header.execution_size = BRW_EXECUTE_8;
1429    return insn;
1430 }
1431 
brw_CONT(struct brw_compile * p)1432 struct brw_instruction *brw_CONT(struct brw_compile *p)
1433 {
1434    struct brw_instruction *insn;
1435    insn = next_insn(p, BRW_OPCODE_CONTINUE);
1436    brw_set_dest(p, insn, brw_ip_reg());
1437    brw_set_src0(p, insn, brw_ip_reg());
1438    brw_set_src1(p, insn, brw_imm_d(0x0));
1439    insn->header.compression_control = BRW_COMPRESSION_NONE;
1440    insn->header.execution_size = BRW_EXECUTE_8;
1441    /* insn->header.mask_control = BRW_MASK_DISABLE; */
1442    insn->bits3.if_else.pad0 = 0;
1443    insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1444    return insn;
1445 }
1446 
1447 /* DO/WHILE loop:
1448  *
1449  * The DO/WHILE is just an unterminated loop -- break or continue are
1450  * used for control within the loop.  We have a few ways they can be
1451  * done.
1452  *
1453  * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1454  * jip and no DO instruction.
1455  *
1456  * For non-uniform control flow pre-gen6, there's a DO instruction to
1457  * push the mask, and a WHILE to jump back, and BREAK to get out and
1458  * pop the mask.
1459  *
1460  * For gen6, there's no more mask stack, so no need for DO.  WHILE
1461  * just points back to the first instruction of the loop.
1462  */
brw_DO(struct brw_compile * p,GLuint execute_size)1463 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1464 {
1465    struct intel_context *intel = &p->brw->intel;
1466 
1467    if (intel->gen >= 6 || p->single_program_flow) {
1468       push_loop_stack(p, &p->store[p->nr_insn]);
1469       return &p->store[p->nr_insn];
1470    } else {
1471       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1472 
1473       push_loop_stack(p, insn);
1474 
1475       /* Override the defaults for this instruction:
1476        */
1477       brw_set_dest(p, insn, brw_null_reg());
1478       brw_set_src0(p, insn, brw_null_reg());
1479       brw_set_src1(p, insn, brw_null_reg());
1480 
1481       insn->header.compression_control = BRW_COMPRESSION_NONE;
1482       insn->header.execution_size = execute_size;
1483       insn->header.predicate_control = BRW_PREDICATE_NONE;
1484       /* insn->header.mask_control = BRW_MASK_ENABLE; */
1485       /* insn->header.mask_control = BRW_MASK_DISABLE; */
1486 
1487       return insn;
1488    }
1489 }
1490 
1491 /**
1492  * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1493  * instruction here.
1494  *
1495  * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1496  * nesting, since it can always just point to the end of the block/current loop.
1497  */
1498 static void
brw_patch_break_cont(struct brw_compile * p,struct brw_instruction * while_inst)1499 brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst)
1500 {
1501    struct intel_context *intel = &p->brw->intel;
1502    struct brw_instruction *do_inst = get_inner_do_insn(p);
1503    struct brw_instruction *inst;
1504    int br = (intel->gen == 5) ? 2 : 1;
1505 
1506    for (inst = while_inst - 1; inst != do_inst; inst--) {
1507       /* If the jump count is != 0, that means that this instruction has already
1508        * been patched because it's part of a loop inside of the one we're
1509        * patching.
1510        */
1511       if (inst->header.opcode == BRW_OPCODE_BREAK &&
1512 	  inst->bits3.if_else.jump_count == 0) {
1513 	 inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1);
1514       } else if (inst->header.opcode == BRW_OPCODE_CONTINUE &&
1515 		 inst->bits3.if_else.jump_count == 0) {
1516 	 inst->bits3.if_else.jump_count = br * (while_inst - inst);
1517       }
1518    }
1519 }
1520 
brw_WHILE(struct brw_compile * p)1521 struct brw_instruction *brw_WHILE(struct brw_compile *p)
1522 {
1523    struct intel_context *intel = &p->brw->intel;
1524    struct brw_instruction *insn, *do_insn;
1525    GLuint br = 1;
1526 
1527    if (intel->gen >= 5)
1528       br = 2;
1529 
1530    if (intel->gen >= 7) {
1531       insn = next_insn(p, BRW_OPCODE_WHILE);
1532       do_insn = get_inner_do_insn(p);
1533 
1534       brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1535       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1536       brw_set_src1(p, insn, brw_imm_ud(0));
1537       insn->bits3.break_cont.jip = br * (do_insn - insn);
1538 
1539       insn->header.execution_size = BRW_EXECUTE_8;
1540    } else if (intel->gen == 6) {
1541       insn = next_insn(p, BRW_OPCODE_WHILE);
1542       do_insn = get_inner_do_insn(p);
1543 
1544       brw_set_dest(p, insn, brw_imm_w(0));
1545       insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1546       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1547       brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1548 
1549       insn->header.execution_size = BRW_EXECUTE_8;
1550    } else {
1551       if (p->single_program_flow) {
1552 	 insn = next_insn(p, BRW_OPCODE_ADD);
1553          do_insn = get_inner_do_insn(p);
1554 
1555 	 brw_set_dest(p, insn, brw_ip_reg());
1556 	 brw_set_src0(p, insn, brw_ip_reg());
1557 	 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1558 	 insn->header.execution_size = BRW_EXECUTE_1;
1559       } else {
1560 	 insn = next_insn(p, BRW_OPCODE_WHILE);
1561          do_insn = get_inner_do_insn(p);
1562 
1563 	 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1564 
1565 	 brw_set_dest(p, insn, brw_ip_reg());
1566 	 brw_set_src0(p, insn, brw_ip_reg());
1567 	 brw_set_src1(p, insn, brw_imm_d(0));
1568 
1569 	 insn->header.execution_size = do_insn->header.execution_size;
1570 	 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1571 	 insn->bits3.if_else.pop_count = 0;
1572 	 insn->bits3.if_else.pad0 = 0;
1573 
1574 	 brw_patch_break_cont(p, insn);
1575       }
1576    }
1577    insn->header.compression_control = BRW_COMPRESSION_NONE;
1578    p->current->header.predicate_control = BRW_PREDICATE_NONE;
1579 
1580    p->loop_stack_depth--;
1581 
1582    return insn;
1583 }
1584 
1585 
1586 /* FORWARD JUMPS:
1587  */
brw_land_fwd_jump(struct brw_compile * p,int jmp_insn_idx)1588 void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx)
1589 {
1590    struct intel_context *intel = &p->brw->intel;
1591    struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx];
1592    GLuint jmpi = 1;
1593 
1594    if (intel->gen >= 5)
1595       jmpi = 2;
1596 
1597    assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1598    assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1599 
1600    jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1);
1601 }
1602 
1603 
1604 
1605 /* To integrate with the above, it makes sense that the comparison
1606  * instruction should populate the flag register.  It might be simpler
1607  * just to use the flag reg for most WM tasks?
1608  */
brw_CMP(struct brw_compile * p,struct brw_reg dest,GLuint conditional,struct brw_reg src0,struct brw_reg src1)1609 void brw_CMP(struct brw_compile *p,
1610 	     struct brw_reg dest,
1611 	     GLuint conditional,
1612 	     struct brw_reg src0,
1613 	     struct brw_reg src1)
1614 {
1615    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1616 
1617    insn->header.destreg__conditionalmod = conditional;
1618    brw_set_dest(p, insn, dest);
1619    brw_set_src0(p, insn, src0);
1620    brw_set_src1(p, insn, src1);
1621 
1622 /*    guess_execution_size(insn, src0); */
1623 
1624 
1625    /* Make it so that future instructions will use the computed flag
1626     * value until brw_set_predicate_control_flag_value() is called
1627     * again.
1628     */
1629    if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1630        dest.nr == 0) {
1631       p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1632       p->flag_value = 0xff;
1633    }
1634 }
1635 
1636 /* Issue 'wait' instruction for n1, host could program MMIO
1637    to wake up thread. */
brw_WAIT(struct brw_compile * p)1638 void brw_WAIT (struct brw_compile *p)
1639 {
1640    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1641    struct brw_reg src = brw_notification_1_reg();
1642 
1643    brw_set_dest(p, insn, src);
1644    brw_set_src0(p, insn, src);
1645    brw_set_src1(p, insn, brw_null_reg());
1646    insn->header.execution_size = 0; /* must */
1647    insn->header.predicate_control = 0;
1648    insn->header.compression_control = 0;
1649 }
1650 
1651 
1652 /***********************************************************************
1653  * Helpers for the various SEND message types:
1654  */
1655 
1656 /** Extended math function, float[8].
1657  */
brw_math(struct brw_compile * p,struct brw_reg dest,GLuint function,GLuint msg_reg_nr,struct brw_reg src,GLuint data_type,GLuint precision)1658 void brw_math( struct brw_compile *p,
1659 	       struct brw_reg dest,
1660 	       GLuint function,
1661 	       GLuint msg_reg_nr,
1662 	       struct brw_reg src,
1663 	       GLuint data_type,
1664 	       GLuint precision )
1665 {
1666    struct intel_context *intel = &p->brw->intel;
1667 
1668    if (intel->gen >= 6) {
1669       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1670 
1671       assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1672       assert(src.file == BRW_GENERAL_REGISTER_FILE);
1673 
1674       assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1675       if (intel->gen == 6)
1676 	 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1677 
1678       /* Source modifiers are ignored for extended math instructions on Gen6. */
1679       if (intel->gen == 6) {
1680 	 assert(!src.negate);
1681 	 assert(!src.abs);
1682       }
1683 
1684       if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1685 	  function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1686 	  function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1687 	 assert(src.type != BRW_REGISTER_TYPE_F);
1688       } else {
1689 	 assert(src.type == BRW_REGISTER_TYPE_F);
1690       }
1691 
1692       /* Math is the same ISA format as other opcodes, except that CondModifier
1693        * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1694        */
1695       insn->header.destreg__conditionalmod = function;
1696 
1697       brw_set_dest(p, insn, dest);
1698       brw_set_src0(p, insn, src);
1699       brw_set_src1(p, insn, brw_null_reg());
1700    } else {
1701       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1702 
1703       /* Example code doesn't set predicate_control for send
1704        * instructions.
1705        */
1706       insn->header.predicate_control = 0;
1707       insn->header.destreg__conditionalmod = msg_reg_nr;
1708 
1709       brw_set_dest(p, insn, dest);
1710       brw_set_src0(p, insn, src);
1711       brw_set_math_message(p,
1712 			   insn,
1713 			   function,
1714 			   src.type == BRW_REGISTER_TYPE_D,
1715 			   precision,
1716 			   data_type);
1717    }
1718 }
1719 
1720 /** Extended math function, float[8].
1721  */
brw_math2(struct brw_compile * p,struct brw_reg dest,GLuint function,struct brw_reg src0,struct brw_reg src1)1722 void brw_math2(struct brw_compile *p,
1723 	       struct brw_reg dest,
1724 	       GLuint function,
1725 	       struct brw_reg src0,
1726 	       struct brw_reg src1)
1727 {
1728    struct intel_context *intel = &p->brw->intel;
1729    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1730 
1731    assert(intel->gen >= 6);
1732    (void) intel;
1733 
1734 
1735    assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1736    assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1737    assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1738 
1739    assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1740    if (intel->gen == 6) {
1741       assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1742       assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1743    }
1744 
1745    if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1746        function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1747        function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1748       assert(src0.type != BRW_REGISTER_TYPE_F);
1749       assert(src1.type != BRW_REGISTER_TYPE_F);
1750    } else {
1751       assert(src0.type == BRW_REGISTER_TYPE_F);
1752       assert(src1.type == BRW_REGISTER_TYPE_F);
1753    }
1754 
1755    /* Source modifiers are ignored for extended math instructions on Gen6. */
1756    if (intel->gen == 6) {
1757       assert(!src0.negate);
1758       assert(!src0.abs);
1759       assert(!src1.negate);
1760       assert(!src1.abs);
1761    }
1762 
1763    /* Math is the same ISA format as other opcodes, except that CondModifier
1764     * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1765     */
1766    insn->header.destreg__conditionalmod = function;
1767 
1768    brw_set_dest(p, insn, dest);
1769    brw_set_src0(p, insn, src0);
1770    brw_set_src1(p, insn, src1);
1771 }
1772 
1773 /**
1774  * Extended math function, float[16].
1775  * Use 2 send instructions.
1776  */
brw_math_16(struct brw_compile * p,struct brw_reg dest,GLuint function,GLuint msg_reg_nr,struct brw_reg src,GLuint precision)1777 void brw_math_16( struct brw_compile *p,
1778 		  struct brw_reg dest,
1779 		  GLuint function,
1780 		  GLuint msg_reg_nr,
1781 		  struct brw_reg src,
1782 		  GLuint precision )
1783 {
1784    struct intel_context *intel = &p->brw->intel;
1785    struct brw_instruction *insn;
1786 
1787    if (intel->gen >= 6) {
1788       insn = next_insn(p, BRW_OPCODE_MATH);
1789 
1790       /* Math is the same ISA format as other opcodes, except that CondModifier
1791        * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1792        */
1793       insn->header.destreg__conditionalmod = function;
1794 
1795       /* Source modifiers are ignored for extended math instructions. */
1796       assert(!src.negate);
1797       assert(!src.abs);
1798 
1799       brw_set_dest(p, insn, dest);
1800       brw_set_src0(p, insn, src);
1801       brw_set_src1(p, insn, brw_null_reg());
1802       return;
1803    }
1804 
1805    /* First instruction:
1806     */
1807    brw_push_insn_state(p);
1808    brw_set_predicate_control_flag_value(p, 0xff);
1809    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1810 
1811    insn = next_insn(p, BRW_OPCODE_SEND);
1812    insn->header.destreg__conditionalmod = msg_reg_nr;
1813 
1814    brw_set_dest(p, insn, dest);
1815    brw_set_src0(p, insn, src);
1816    brw_set_math_message(p,
1817 			insn,
1818 			function,
1819 			BRW_MATH_INTEGER_UNSIGNED,
1820 			precision,
1821 			BRW_MATH_DATA_VECTOR);
1822 
1823    /* Second instruction:
1824     */
1825    insn = next_insn(p, BRW_OPCODE_SEND);
1826    insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1827    insn->header.destreg__conditionalmod = msg_reg_nr+1;
1828 
1829    brw_set_dest(p, insn, offset(dest,1));
1830    brw_set_src0(p, insn, src);
1831    brw_set_math_message(p,
1832 			insn,
1833 			function,
1834 			BRW_MATH_INTEGER_UNSIGNED,
1835 			precision,
1836 			BRW_MATH_DATA_VECTOR);
1837 
1838    brw_pop_insn_state(p);
1839 }
1840 
1841 
1842 /**
1843  * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1844  * using a constant offset per channel.
1845  *
1846  * The offset must be aligned to oword size (16 bytes).  Used for
1847  * register spilling.
1848  */
brw_oword_block_write_scratch(struct brw_compile * p,struct brw_reg mrf,int num_regs,GLuint offset)1849 void brw_oword_block_write_scratch(struct brw_compile *p,
1850 				   struct brw_reg mrf,
1851 				   int num_regs,
1852 				   GLuint offset)
1853 {
1854    struct intel_context *intel = &p->brw->intel;
1855    uint32_t msg_control, msg_type;
1856    int mlen;
1857 
1858    if (intel->gen >= 6)
1859       offset /= 16;
1860 
1861    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1862 
1863    if (num_regs == 1) {
1864       msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1865       mlen = 2;
1866    } else {
1867       msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1868       mlen = 3;
1869    }
1870 
1871    /* Set up the message header.  This is g0, with g0.2 filled with
1872     * the offset.  We don't want to leave our offset around in g0 or
1873     * it'll screw up texture samples, so set it up inside the message
1874     * reg.
1875     */
1876    {
1877       brw_push_insn_state(p);
1878       brw_set_mask_control(p, BRW_MASK_DISABLE);
1879       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1880 
1881       brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1882 
1883       /* set message header global offset field (reg 0, element 2) */
1884       brw_MOV(p,
1885 	      retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1886 				  mrf.nr,
1887 				  2), BRW_REGISTER_TYPE_UD),
1888 	      brw_imm_ud(offset));
1889 
1890       brw_pop_insn_state(p);
1891    }
1892 
1893    {
1894       struct brw_reg dest;
1895       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1896       int send_commit_msg;
1897       struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1898 					 BRW_REGISTER_TYPE_UW);
1899 
1900       if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1901 	 insn->header.compression_control = BRW_COMPRESSION_NONE;
1902 	 src_header = vec16(src_header);
1903       }
1904       assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1905       insn->header.destreg__conditionalmod = mrf.nr;
1906 
1907       /* Until gen6, writes followed by reads from the same location
1908        * are not guaranteed to be ordered unless write_commit is set.
1909        * If set, then a no-op write is issued to the destination
1910        * register to set a dependency, and a read from the destination
1911        * can be used to ensure the ordering.
1912        *
1913        * For gen6, only writes between different threads need ordering
1914        * protection.  Our use of DP writes is all about register
1915        * spilling within a thread.
1916        */
1917       if (intel->gen >= 6) {
1918 	 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1919 	 send_commit_msg = 0;
1920       } else {
1921 	 dest = src_header;
1922 	 send_commit_msg = 1;
1923       }
1924 
1925       brw_set_dest(p, insn, dest);
1926       if (intel->gen >= 6) {
1927 	 brw_set_src0(p, insn, mrf);
1928       } else {
1929 	 brw_set_src0(p, insn, brw_null_reg());
1930       }
1931 
1932       if (intel->gen >= 6)
1933 	 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1934       else
1935 	 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1936 
1937       brw_set_dp_write_message(p,
1938 			       insn,
1939 			       255, /* binding table index (255=stateless) */
1940 			       msg_control,
1941 			       msg_type,
1942 			       mlen,
1943 			       true, /* header_present */
1944 			       0, /* not a render target */
1945 			       send_commit_msg, /* response_length */
1946 			       0, /* eot */
1947 			       send_commit_msg);
1948    }
1949 }
1950 
1951 
1952 /**
1953  * Read a block of owords (half a GRF each) from the scratch buffer
1954  * using a constant index per channel.
1955  *
1956  * Offset must be aligned to oword size (16 bytes).  Used for register
1957  * spilling.
1958  */
1959 void
brw_oword_block_read_scratch(struct brw_compile * p,struct brw_reg dest,struct brw_reg mrf,int num_regs,GLuint offset)1960 brw_oword_block_read_scratch(struct brw_compile *p,
1961 			     struct brw_reg dest,
1962 			     struct brw_reg mrf,
1963 			     int num_regs,
1964 			     GLuint offset)
1965 {
1966    struct intel_context *intel = &p->brw->intel;
1967    uint32_t msg_control;
1968    int rlen;
1969 
1970    if (intel->gen >= 6)
1971       offset /= 16;
1972 
1973    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1974    dest = retype(dest, BRW_REGISTER_TYPE_UW);
1975 
1976    if (num_regs == 1) {
1977       msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1978       rlen = 1;
1979    } else {
1980       msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1981       rlen = 2;
1982    }
1983 
1984    {
1985       brw_push_insn_state(p);
1986       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1987       brw_set_mask_control(p, BRW_MASK_DISABLE);
1988 
1989       brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1990 
1991       /* set message header global offset field (reg 0, element 2) */
1992       brw_MOV(p,
1993 	      retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1994 				  mrf.nr,
1995 				  2), BRW_REGISTER_TYPE_UD),
1996 	      brw_imm_ud(offset));
1997 
1998       brw_pop_insn_state(p);
1999    }
2000 
2001    {
2002       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2003 
2004       assert(insn->header.predicate_control == 0);
2005       insn->header.compression_control = BRW_COMPRESSION_NONE;
2006       insn->header.destreg__conditionalmod = mrf.nr;
2007 
2008       brw_set_dest(p, insn, dest);	/* UW? */
2009       if (intel->gen >= 6) {
2010 	 brw_set_src0(p, insn, mrf);
2011       } else {
2012 	 brw_set_src0(p, insn, brw_null_reg());
2013       }
2014 
2015       brw_set_dp_read_message(p,
2016 			      insn,
2017 			      255, /* binding table index (255=stateless) */
2018 			      msg_control,
2019 			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
2020 			      BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
2021 			      1, /* msg_length */
2022 			      rlen);
2023    }
2024 }
2025 
2026 /**
2027  * Read a float[4] vector from the data port Data Cache (const buffer).
2028  * Location (in buffer) should be a multiple of 16.
2029  * Used for fetching shader constants.
2030  */
brw_oword_block_read(struct brw_compile * p,struct brw_reg dest,struct brw_reg mrf,uint32_t offset,uint32_t bind_table_index)2031 void brw_oword_block_read(struct brw_compile *p,
2032 			  struct brw_reg dest,
2033 			  struct brw_reg mrf,
2034 			  uint32_t offset,
2035 			  uint32_t bind_table_index)
2036 {
2037    struct intel_context *intel = &p->brw->intel;
2038 
2039    /* On newer hardware, offset is in units of owords. */
2040    if (intel->gen >= 6)
2041       offset /= 16;
2042 
2043    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2044 
2045    brw_push_insn_state(p);
2046    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2047    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2048    brw_set_mask_control(p, BRW_MASK_DISABLE);
2049 
2050    brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2051 
2052    /* set message header global offset field (reg 0, element 2) */
2053    brw_MOV(p,
2054 	   retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2055 			       mrf.nr,
2056 			       2), BRW_REGISTER_TYPE_UD),
2057 	   brw_imm_ud(offset));
2058 
2059    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2060    insn->header.destreg__conditionalmod = mrf.nr;
2061 
2062    /* cast dest to a uword[8] vector */
2063    dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
2064 
2065    brw_set_dest(p, insn, dest);
2066    if (intel->gen >= 6) {
2067       brw_set_src0(p, insn, mrf);
2068    } else {
2069       brw_set_src0(p, insn, brw_null_reg());
2070    }
2071 
2072    brw_set_dp_read_message(p,
2073 			   insn,
2074 			   bind_table_index,
2075 			   BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
2076 			   BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
2077 			   BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2078 			   1, /* msg_length */
2079 			   1); /* response_length (1 reg, 2 owords!) */
2080 
2081    brw_pop_insn_state(p);
2082 }
2083 
2084 /**
2085  * Read a set of dwords from the data port Data Cache (const buffer).
2086  *
2087  * Location (in buffer) appears as UD offsets in the register after
2088  * the provided mrf header reg.
2089  */
brw_dword_scattered_read(struct brw_compile * p,struct brw_reg dest,struct brw_reg mrf,uint32_t bind_table_index)2090 void brw_dword_scattered_read(struct brw_compile *p,
2091 			      struct brw_reg dest,
2092 			      struct brw_reg mrf,
2093 			      uint32_t bind_table_index)
2094 {
2095    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2096 
2097    brw_push_insn_state(p);
2098    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2099    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2100    brw_set_mask_control(p, BRW_MASK_DISABLE);
2101    brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2102    brw_pop_insn_state(p);
2103 
2104    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2105    insn->header.destreg__conditionalmod = mrf.nr;
2106 
2107    /* cast dest to a uword[8] vector */
2108    dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
2109 
2110    brw_set_dest(p, insn, dest);
2111    brw_set_src0(p, insn, brw_null_reg());
2112 
2113    brw_set_dp_read_message(p,
2114 			   insn,
2115 			   bind_table_index,
2116 			   BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
2117 			   BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
2118 			   BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2119 			   2, /* msg_length */
2120 			   1); /* response_length */
2121 }
2122 
2123 
2124 
2125 /**
2126  * Read float[4] constant(s) from VS constant buffer.
2127  * For relative addressing, two float[4] constants will be read into 'dest'.
2128  * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
2129  */
brw_dp_READ_4_vs(struct brw_compile * p,struct brw_reg dest,GLuint location,GLuint bind_table_index)2130 void brw_dp_READ_4_vs(struct brw_compile *p,
2131                       struct brw_reg dest,
2132                       GLuint location,
2133                       GLuint bind_table_index)
2134 {
2135    struct intel_context *intel = &p->brw->intel;
2136    struct brw_instruction *insn;
2137    GLuint msg_reg_nr = 1;
2138 
2139    if (intel->gen >= 6)
2140       location /= 16;
2141 
2142    /* Setup MRF[1] with location/offset into const buffer */
2143    brw_push_insn_state(p);
2144    brw_set_access_mode(p, BRW_ALIGN_1);
2145    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2146    brw_set_mask_control(p, BRW_MASK_DISABLE);
2147    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2148    brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2),
2149 		     BRW_REGISTER_TYPE_UD),
2150 	   brw_imm_ud(location));
2151    brw_pop_insn_state(p);
2152 
2153    insn = next_insn(p, BRW_OPCODE_SEND);
2154 
2155    insn->header.predicate_control = BRW_PREDICATE_NONE;
2156    insn->header.compression_control = BRW_COMPRESSION_NONE;
2157    insn->header.destreg__conditionalmod = msg_reg_nr;
2158    insn->header.mask_control = BRW_MASK_DISABLE;
2159 
2160    brw_set_dest(p, insn, dest);
2161    if (intel->gen >= 6) {
2162       brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
2163    } else {
2164       brw_set_src0(p, insn, brw_null_reg());
2165    }
2166 
2167    brw_set_dp_read_message(p,
2168 			   insn,
2169 			   bind_table_index,
2170 			   0,
2171 			   BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
2172 			   BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2173 			   1, /* msg_length */
2174 			   1); /* response_length (1 Oword) */
2175 }
2176 
2177 /**
2178  * Read a float[4] constant per vertex from VS constant buffer, with
2179  * relative addressing.
2180  */
brw_dp_READ_4_vs_relative(struct brw_compile * p,struct brw_reg dest,struct brw_reg addr_reg,GLuint offset,GLuint bind_table_index)2181 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
2182 			       struct brw_reg dest,
2183 			       struct brw_reg addr_reg,
2184 			       GLuint offset,
2185 			       GLuint bind_table_index)
2186 {
2187    struct intel_context *intel = &p->brw->intel;
2188    struct brw_reg src = brw_vec8_grf(0, 0);
2189    int msg_type;
2190 
2191    /* Setup MRF[1] with offset into const buffer */
2192    brw_push_insn_state(p);
2193    brw_set_access_mode(p, BRW_ALIGN_1);
2194    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2195    brw_set_mask_control(p, BRW_MASK_DISABLE);
2196    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2197 
2198    /* M1.0 is block offset 0, M1.4 is block offset 1, all other
2199     * fields ignored.
2200     */
2201    brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D),
2202 	   addr_reg, brw_imm_d(offset));
2203    brw_pop_insn_state(p);
2204 
2205    gen6_resolve_implied_move(p, &src, 0);
2206    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2207 
2208    insn->header.predicate_control = BRW_PREDICATE_NONE;
2209    insn->header.compression_control = BRW_COMPRESSION_NONE;
2210    insn->header.destreg__conditionalmod = 0;
2211    insn->header.mask_control = BRW_MASK_DISABLE;
2212 
2213    brw_set_dest(p, insn, dest);
2214    brw_set_src0(p, insn, src);
2215 
2216    if (intel->gen >= 6)
2217       msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2218    else if (intel->gen == 5 || intel->is_g4x)
2219       msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2220    else
2221       msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2222 
2223    brw_set_dp_read_message(p,
2224 			   insn,
2225 			   bind_table_index,
2226 			   BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
2227 			   msg_type,
2228 			   BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2229 			   2, /* msg_length */
2230 			   1); /* response_length */
2231 }
2232 
2233 
2234 
brw_fb_WRITE(struct brw_compile * p,int dispatch_width,GLuint msg_reg_nr,struct brw_reg src0,GLuint msg_control,GLuint binding_table_index,GLuint msg_length,GLuint response_length,bool eot,bool header_present)2235 void brw_fb_WRITE(struct brw_compile *p,
2236 		  int dispatch_width,
2237                   GLuint msg_reg_nr,
2238                   struct brw_reg src0,
2239                   GLuint msg_control,
2240                   GLuint binding_table_index,
2241                   GLuint msg_length,
2242                   GLuint response_length,
2243                   bool eot,
2244                   bool header_present)
2245 {
2246    struct intel_context *intel = &p->brw->intel;
2247    struct brw_instruction *insn;
2248    GLuint msg_type;
2249    struct brw_reg dest;
2250 
2251    if (dispatch_width == 16)
2252       dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2253    else
2254       dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2255 
2256    if (intel->gen >= 6) {
2257       insn = next_insn(p, BRW_OPCODE_SENDC);
2258    } else {
2259       insn = next_insn(p, BRW_OPCODE_SEND);
2260    }
2261    /* The execution mask is ignored for render target writes. */
2262    insn->header.predicate_control = 0;
2263    insn->header.compression_control = BRW_COMPRESSION_NONE;
2264 
2265    if (intel->gen >= 6) {
2266       /* headerless version, just submit color payload */
2267       src0 = brw_message_reg(msg_reg_nr);
2268 
2269       msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2270    } else {
2271       insn->header.destreg__conditionalmod = msg_reg_nr;
2272 
2273       msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2274    }
2275 
2276    brw_set_dest(p, insn, dest);
2277    brw_set_src0(p, insn, src0);
2278    brw_set_dp_write_message(p,
2279 			    insn,
2280 			    binding_table_index,
2281 			    msg_control,
2282 			    msg_type,
2283 			    msg_length,
2284 			    header_present,
2285 			    eot, /* last render target write */
2286 			    response_length,
2287 			    eot,
2288 			    0 /* send_commit_msg */);
2289 }
2290 
2291 
2292 /**
2293  * Texture sample instruction.
2294  * Note: the msg_type plus msg_length values determine exactly what kind
2295  * of sampling operation is performed.  See volume 4, page 161 of docs.
2296  */
brw_SAMPLE(struct brw_compile * p,struct brw_reg dest,GLuint msg_reg_nr,struct brw_reg src0,GLuint binding_table_index,GLuint sampler,GLuint writemask,GLuint msg_type,GLuint response_length,GLuint msg_length,GLuint header_present,GLuint simd_mode,GLuint return_format)2297 void brw_SAMPLE(struct brw_compile *p,
2298 		struct brw_reg dest,
2299 		GLuint msg_reg_nr,
2300 		struct brw_reg src0,
2301 		GLuint binding_table_index,
2302 		GLuint sampler,
2303 		GLuint writemask,
2304 		GLuint msg_type,
2305 		GLuint response_length,
2306 		GLuint msg_length,
2307 		GLuint header_present,
2308 		GLuint simd_mode,
2309 		GLuint return_format)
2310 {
2311    struct intel_context *intel = &p->brw->intel;
2312    bool need_stall = 0;
2313 
2314    if (writemask == 0) {
2315       /*printf("%s: zero writemask??\n", __FUNCTION__); */
2316       return;
2317    }
2318 
2319    /* Hardware doesn't do destination dependency checking on send
2320     * instructions properly.  Add a workaround which generates the
2321     * dependency by other means.  In practice it seems like this bug
2322     * only crops up for texture samples, and only where registers are
2323     * written by the send and then written again later without being
2324     * read in between.  Luckily for us, we already track that
2325     * information and use it to modify the writemask for the
2326     * instruction, so that is a guide for whether a workaround is
2327     * needed.
2328     */
2329    if (writemask != WRITEMASK_XYZW) {
2330       GLuint dst_offset = 0;
2331       GLuint i, newmask = 0, len = 0;
2332 
2333       for (i = 0; i < 4; i++) {
2334 	 if (writemask & (1<<i))
2335 	    break;
2336 	 dst_offset += 2;
2337       }
2338       for (; i < 4; i++) {
2339 	 if (!(writemask & (1<<i)))
2340 	    break;
2341 	 newmask |= 1<<i;
2342 	 len++;
2343       }
2344 
2345       if (newmask != writemask) {
2346 	 need_stall = 1;
2347          /* printf("need stall %x %x\n", newmask , writemask); */
2348       }
2349       else {
2350 	 bool dispatch_16 = false;
2351 
2352 	 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
2353 
2354 	 guess_execution_size(p, p->current, dest);
2355 	 if (p->current->header.execution_size == BRW_EXECUTE_16)
2356 	    dispatch_16 = true;
2357 
2358 	 newmask = ~newmask & WRITEMASK_XYZW;
2359 
2360 	 brw_push_insn_state(p);
2361 
2362 	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2363 	 brw_set_mask_control(p, BRW_MASK_DISABLE);
2364 
2365 	 brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
2366 		 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
2367   	 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
2368 
2369 	 brw_pop_insn_state(p);
2370 
2371   	 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
2372 	 dest = offset(dest, dst_offset);
2373 
2374 	 /* For 16-wide dispatch, masked channels are skipped in the
2375 	  * response.  For 8-wide, masked channels still take up slots,
2376 	  * and are just not written to.
2377 	  */
2378 	 if (dispatch_16)
2379 	    response_length = len * 2;
2380       }
2381    }
2382 
2383    {
2384       struct brw_instruction *insn;
2385 
2386       gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2387 
2388       insn = next_insn(p, BRW_OPCODE_SEND);
2389       insn->header.predicate_control = 0; /* XXX */
2390       insn->header.compression_control = BRW_COMPRESSION_NONE;
2391       if (intel->gen < 6)
2392 	  insn->header.destreg__conditionalmod = msg_reg_nr;
2393 
2394       brw_set_dest(p, insn, dest);
2395       brw_set_src0(p, insn, src0);
2396       brw_set_sampler_message(p, insn,
2397 			      binding_table_index,
2398 			      sampler,
2399 			      msg_type,
2400 			      response_length,
2401 			      msg_length,
2402 			      header_present,
2403 			      simd_mode,
2404 			      return_format);
2405    }
2406 
2407    if (need_stall) {
2408       struct brw_reg reg = vec8(offset(dest, response_length-1));
2409 
2410       /*  mov (8) r9.0<1>:f    r9.0<8;8,1>:f    { Align1 }
2411        */
2412       brw_push_insn_state(p);
2413       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2414       brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD),
2415 	      retype(reg, BRW_REGISTER_TYPE_UD));
2416       brw_pop_insn_state(p);
2417    }
2418 
2419 }
2420 
2421 /* All these variables are pretty confusing - we might be better off
2422  * using bitmasks and macros for this, in the old style.  Or perhaps
2423  * just having the caller instantiate the fields in dword3 itself.
2424  */
brw_urb_WRITE(struct brw_compile * p,struct brw_reg dest,GLuint msg_reg_nr,struct brw_reg src0,bool allocate,bool used,GLuint msg_length,GLuint response_length,bool eot,bool writes_complete,GLuint offset,GLuint swizzle)2425 void brw_urb_WRITE(struct brw_compile *p,
2426 		   struct brw_reg dest,
2427 		   GLuint msg_reg_nr,
2428 		   struct brw_reg src0,
2429 		   bool allocate,
2430 		   bool used,
2431 		   GLuint msg_length,
2432 		   GLuint response_length,
2433 		   bool eot,
2434 		   bool writes_complete,
2435 		   GLuint offset,
2436 		   GLuint swizzle)
2437 {
2438    struct intel_context *intel = &p->brw->intel;
2439    struct brw_instruction *insn;
2440 
2441    gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2442 
2443    if (intel->gen == 7) {
2444       /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2445       brw_push_insn_state(p);
2446       brw_set_access_mode(p, BRW_ALIGN_1);
2447       brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2448 		       BRW_REGISTER_TYPE_UD),
2449 	        retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2450 		brw_imm_ud(0xff00));
2451       brw_pop_insn_state(p);
2452    }
2453 
2454    insn = next_insn(p, BRW_OPCODE_SEND);
2455 
2456    assert(msg_length < BRW_MAX_MRF);
2457 
2458    brw_set_dest(p, insn, dest);
2459    brw_set_src0(p, insn, src0);
2460    brw_set_src1(p, insn, brw_imm_d(0));
2461 
2462    if (intel->gen < 6)
2463       insn->header.destreg__conditionalmod = msg_reg_nr;
2464 
2465    brw_set_urb_message(p,
2466 		       insn,
2467 		       allocate,
2468 		       used,
2469 		       msg_length,
2470 		       response_length,
2471 		       eot,
2472 		       writes_complete,
2473 		       offset,
2474 		       swizzle);
2475 }
2476 
2477 static int
brw_find_next_block_end(struct brw_compile * p,int start)2478 brw_find_next_block_end(struct brw_compile *p, int start)
2479 {
2480    int ip;
2481 
2482    for (ip = start + 1; ip < p->nr_insn; ip++) {
2483       struct brw_instruction *insn = &p->store[ip];
2484 
2485       switch (insn->header.opcode) {
2486       case BRW_OPCODE_ENDIF:
2487       case BRW_OPCODE_ELSE:
2488       case BRW_OPCODE_WHILE:
2489 	 return ip;
2490       }
2491    }
2492    assert(!"not reached");
2493    return start + 1;
2494 }
2495 
2496 /* There is no DO instruction on gen6, so to find the end of the loop
2497  * we have to see if the loop is jumping back before our start
2498  * instruction.
2499  */
2500 static int
brw_find_loop_end(struct brw_compile * p,int start)2501 brw_find_loop_end(struct brw_compile *p, int start)
2502 {
2503    struct intel_context *intel = &p->brw->intel;
2504    int ip;
2505    int br = 2;
2506 
2507    for (ip = start + 1; ip < p->nr_insn; ip++) {
2508       struct brw_instruction *insn = &p->store[ip];
2509 
2510       if (insn->header.opcode == BRW_OPCODE_WHILE) {
2511 	 int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
2512 				   : insn->bits3.break_cont.jip;
2513 	 if (ip + jip / br <= start)
2514 	    return ip;
2515       }
2516    }
2517    assert(!"not reached");
2518    return start + 1;
2519 }
2520 
2521 /* After program generation, go back and update the UIP and JIP of
2522  * BREAK and CONT instructions to their correct locations.
2523  */
2524 void
brw_set_uip_jip(struct brw_compile * p)2525 brw_set_uip_jip(struct brw_compile *p)
2526 {
2527    struct intel_context *intel = &p->brw->intel;
2528    int ip;
2529    int br = 2;
2530 
2531    if (intel->gen < 6)
2532       return;
2533 
2534    for (ip = 0; ip < p->nr_insn; ip++) {
2535       struct brw_instruction *insn = &p->store[ip];
2536 
2537       switch (insn->header.opcode) {
2538       case BRW_OPCODE_BREAK:
2539 	 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2540 	 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2541 	 insn->bits3.break_cont.uip =
2542 	    br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0));
2543 	 break;
2544       case BRW_OPCODE_CONTINUE:
2545 	 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2546 	 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
2547 
2548 	 assert(insn->bits3.break_cont.uip != 0);
2549 	 assert(insn->bits3.break_cont.jip != 0);
2550 	 break;
2551       }
2552    }
2553 }
2554 
brw_ff_sync(struct brw_compile * p,struct brw_reg dest,GLuint msg_reg_nr,struct brw_reg src0,bool allocate,GLuint response_length,bool eot)2555 void brw_ff_sync(struct brw_compile *p,
2556 		   struct brw_reg dest,
2557 		   GLuint msg_reg_nr,
2558 		   struct brw_reg src0,
2559 		   bool allocate,
2560 		   GLuint response_length,
2561 		   bool eot)
2562 {
2563    struct intel_context *intel = &p->brw->intel;
2564    struct brw_instruction *insn;
2565 
2566    gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2567 
2568    insn = next_insn(p, BRW_OPCODE_SEND);
2569    brw_set_dest(p, insn, dest);
2570    brw_set_src0(p, insn, src0);
2571    brw_set_src1(p, insn, brw_imm_d(0));
2572 
2573    if (intel->gen < 6)
2574       insn->header.destreg__conditionalmod = msg_reg_nr;
2575 
2576    brw_set_ff_sync_message(p,
2577 			   insn,
2578 			   allocate,
2579 			   response_length,
2580 			   eot);
2581 }
2582 
2583 /**
2584  * Emit the SEND instruction necessary to generate stream output data on Gen6
2585  * (for transform feedback).
2586  *
2587  * If send_commit_msg is true, this is the last piece of stream output data
2588  * from this thread, so send the data as a committed write.  According to the
2589  * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2590  *
2591  *   "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2592  *   writes are complete by sending the final write as a committed write."
2593  */
2594 void
brw_svb_write(struct brw_compile * p,struct brw_reg dest,GLuint msg_reg_nr,struct brw_reg src0,GLuint binding_table_index,bool send_commit_msg)2595 brw_svb_write(struct brw_compile *p,
2596               struct brw_reg dest,
2597               GLuint msg_reg_nr,
2598               struct brw_reg src0,
2599               GLuint binding_table_index,
2600               bool   send_commit_msg)
2601 {
2602    struct brw_instruction *insn;
2603 
2604    gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2605 
2606    insn = next_insn(p, BRW_OPCODE_SEND);
2607    brw_set_dest(p, insn, dest);
2608    brw_set_src0(p, insn, src0);
2609    brw_set_src1(p, insn, brw_imm_d(0));
2610    brw_set_dp_write_message(p, insn,
2611                             binding_table_index,
2612                             0, /* msg_control: ignored */
2613                             GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
2614                             1, /* msg_length */
2615                             true, /* header_present */
2616                             0, /* last_render_target: ignored */
2617                             send_commit_msg, /* response_length */
2618                             0, /* end_of_thread */
2619                             send_commit_msg); /* send_commit_msg */
2620 }
2621