1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37 #include "glsl/ralloc.h"
38
39 /***********************************************************************
40 * Internal helper for constructing instructions
41 */
42
guess_execution_size(struct brw_compile * p,struct brw_instruction * insn,struct brw_reg reg)43 static void guess_execution_size(struct brw_compile *p,
44 struct brw_instruction *insn,
45 struct brw_reg reg)
46 {
47 if (reg.width == BRW_WIDTH_8 && p->compressed)
48 insn->header.execution_size = BRW_EXECUTE_16;
49 else
50 insn->header.execution_size = reg.width; /* note - definitions are compatible */
51 }
52
53
54 /**
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
57 *
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
60 */
61 void
gen6_resolve_implied_move(struct brw_compile * p,struct brw_reg * src,GLuint msg_reg_nr)62 gen6_resolve_implied_move(struct brw_compile *p,
63 struct brw_reg *src,
64 GLuint msg_reg_nr)
65 {
66 struct intel_context *intel = &p->brw->intel;
67 if (intel->gen < 6)
68 return;
69
70 if (src->file == BRW_MESSAGE_REGISTER_FILE)
71 return;
72
73 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
74 brw_push_insn_state(p);
75 brw_set_mask_control(p, BRW_MASK_DISABLE);
76 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
77 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
78 retype(*src, BRW_REGISTER_TYPE_UD));
79 brw_pop_insn_state(p);
80 }
81 *src = brw_message_reg(msg_reg_nr);
82 }
83
84 static void
gen7_convert_mrf_to_grf(struct brw_compile * p,struct brw_reg * reg)85 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
86 {
87 /* From the BSpec / ISA Reference / send - [DevIVB+]:
88 * "The send with EOT should use register space R112-R127 for <src>. This is
89 * to enable loading of a new thread into the same slot while the message
90 * with EOT for current thread is pending dispatch."
91 *
92 * Since we're pretending to have 16 MRFs anyway, we may as well use the
93 * registers required for messages with EOT.
94 */
95 struct intel_context *intel = &p->brw->intel;
96 if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
97 reg->file = BRW_GENERAL_REGISTER_FILE;
98 reg->nr += GEN7_MRF_HACK_START;
99 }
100 }
101
102
103 void
brw_set_dest(struct brw_compile * p,struct brw_instruction * insn,struct brw_reg dest)104 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
105 struct brw_reg dest)
106 {
107 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
108 dest.file != BRW_MESSAGE_REGISTER_FILE)
109 assert(dest.nr < 128);
110
111 gen7_convert_mrf_to_grf(p, &dest);
112
113 insn->bits1.da1.dest_reg_file = dest.file;
114 insn->bits1.da1.dest_reg_type = dest.type;
115 insn->bits1.da1.dest_address_mode = dest.address_mode;
116
117 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
118 insn->bits1.da1.dest_reg_nr = dest.nr;
119
120 if (insn->header.access_mode == BRW_ALIGN_1) {
121 insn->bits1.da1.dest_subreg_nr = dest.subnr;
122 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
123 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
124 insn->bits1.da1.dest_horiz_stride = dest.hstride;
125 }
126 else {
127 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
128 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
129 /* even ignored in da16, still need to set as '01' */
130 insn->bits1.da16.dest_horiz_stride = 1;
131 }
132 }
133 else {
134 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
135
136 /* These are different sizes in align1 vs align16:
137 */
138 if (insn->header.access_mode == BRW_ALIGN_1) {
139 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
140 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
141 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
142 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
143 }
144 else {
145 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
146 /* even ignored in da16, still need to set as '01' */
147 insn->bits1.ia16.dest_horiz_stride = 1;
148 }
149 }
150
151 /* NEW: Set the execution size based on dest.width and
152 * insn->compression_control:
153 */
154 guess_execution_size(p, insn, dest);
155 }
156
157 extern int reg_type_size[];
158
159 static void
validate_reg(struct brw_instruction * insn,struct brw_reg reg)160 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
161 {
162 int hstride_for_reg[] = {0, 1, 2, 4};
163 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
164 int width_for_reg[] = {1, 2, 4, 8, 16};
165 int execsize_for_reg[] = {1, 2, 4, 8, 16};
166 int width, hstride, vstride, execsize;
167
168 if (reg.file == BRW_IMMEDIATE_VALUE) {
169 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
170 * mean the destination has to be 128-bit aligned and the
171 * destination horiz stride has to be a word.
172 */
173 if (reg.type == BRW_REGISTER_TYPE_V) {
174 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
175 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
176 }
177
178 return;
179 }
180
181 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
182 reg.file == BRW_ARF_NULL)
183 return;
184
185 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
186 hstride = hstride_for_reg[reg.hstride];
187
188 if (reg.vstride == 0xf) {
189 vstride = -1;
190 } else {
191 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
192 vstride = vstride_for_reg[reg.vstride];
193 }
194
195 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
196 width = width_for_reg[reg.width];
197
198 assert(insn->header.execution_size >= 0 &&
199 insn->header.execution_size < Elements(execsize_for_reg));
200 execsize = execsize_for_reg[insn->header.execution_size];
201
202 /* Restrictions from 3.3.10: Register Region Restrictions. */
203 /* 3. */
204 assert(execsize >= width);
205
206 /* 4. */
207 if (execsize == width && hstride != 0) {
208 assert(vstride == -1 || vstride == width * hstride);
209 }
210
211 /* 5. */
212 if (execsize == width && hstride == 0) {
213 /* no restriction on vstride. */
214 }
215
216 /* 6. */
217 if (width == 1) {
218 assert(hstride == 0);
219 }
220
221 /* 7. */
222 if (execsize == 1 && width == 1) {
223 assert(hstride == 0);
224 assert(vstride == 0);
225 }
226
227 /* 8. */
228 if (vstride == 0 && hstride == 0) {
229 assert(width == 1);
230 }
231
232 /* 10. Check destination issues. */
233 }
234
235 void
brw_set_src0(struct brw_compile * p,struct brw_instruction * insn,struct brw_reg reg)236 brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
237 struct brw_reg reg)
238 {
239 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
240 assert(reg.nr < 128);
241
242 gen7_convert_mrf_to_grf(p, ®);
243
244 validate_reg(insn, reg);
245
246 insn->bits1.da1.src0_reg_file = reg.file;
247 insn->bits1.da1.src0_reg_type = reg.type;
248 insn->bits2.da1.src0_abs = reg.abs;
249 insn->bits2.da1.src0_negate = reg.negate;
250 insn->bits2.da1.src0_address_mode = reg.address_mode;
251
252 if (reg.file == BRW_IMMEDIATE_VALUE) {
253 insn->bits3.ud = reg.dw1.ud;
254
255 /* Required to set some fields in src1 as well:
256 */
257 insn->bits1.da1.src1_reg_file = 0; /* arf */
258 insn->bits1.da1.src1_reg_type = reg.type;
259 }
260 else
261 {
262 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
263 if (insn->header.access_mode == BRW_ALIGN_1) {
264 insn->bits2.da1.src0_subreg_nr = reg.subnr;
265 insn->bits2.da1.src0_reg_nr = reg.nr;
266 }
267 else {
268 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
269 insn->bits2.da16.src0_reg_nr = reg.nr;
270 }
271 }
272 else {
273 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
274
275 if (insn->header.access_mode == BRW_ALIGN_1) {
276 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
277 }
278 else {
279 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
280 }
281 }
282
283 if (insn->header.access_mode == BRW_ALIGN_1) {
284 if (reg.width == BRW_WIDTH_1 &&
285 insn->header.execution_size == BRW_EXECUTE_1) {
286 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
287 insn->bits2.da1.src0_width = BRW_WIDTH_1;
288 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
289 }
290 else {
291 insn->bits2.da1.src0_horiz_stride = reg.hstride;
292 insn->bits2.da1.src0_width = reg.width;
293 insn->bits2.da1.src0_vert_stride = reg.vstride;
294 }
295 }
296 else {
297 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
298 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
299 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
300 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
301
302 /* This is an oddity of the fact we're using the same
303 * descriptions for registers in align_16 as align_1:
304 */
305 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
306 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
307 else
308 insn->bits2.da16.src0_vert_stride = reg.vstride;
309 }
310 }
311 }
312
313
brw_set_src1(struct brw_compile * p,struct brw_instruction * insn,struct brw_reg reg)314 void brw_set_src1(struct brw_compile *p,
315 struct brw_instruction *insn,
316 struct brw_reg reg)
317 {
318 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
319
320 assert(reg.nr < 128);
321
322 gen7_convert_mrf_to_grf(p, ®);
323
324 validate_reg(insn, reg);
325
326 insn->bits1.da1.src1_reg_file = reg.file;
327 insn->bits1.da1.src1_reg_type = reg.type;
328 insn->bits3.da1.src1_abs = reg.abs;
329 insn->bits3.da1.src1_negate = reg.negate;
330
331 /* Only src1 can be immediate in two-argument instructions.
332 */
333 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
334
335 if (reg.file == BRW_IMMEDIATE_VALUE) {
336 insn->bits3.ud = reg.dw1.ud;
337 }
338 else {
339 /* This is a hardware restriction, which may or may not be lifted
340 * in the future:
341 */
342 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
343 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
344
345 if (insn->header.access_mode == BRW_ALIGN_1) {
346 insn->bits3.da1.src1_subreg_nr = reg.subnr;
347 insn->bits3.da1.src1_reg_nr = reg.nr;
348 }
349 else {
350 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
351 insn->bits3.da16.src1_reg_nr = reg.nr;
352 }
353
354 if (insn->header.access_mode == BRW_ALIGN_1) {
355 if (reg.width == BRW_WIDTH_1 &&
356 insn->header.execution_size == BRW_EXECUTE_1) {
357 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
358 insn->bits3.da1.src1_width = BRW_WIDTH_1;
359 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
360 }
361 else {
362 insn->bits3.da1.src1_horiz_stride = reg.hstride;
363 insn->bits3.da1.src1_width = reg.width;
364 insn->bits3.da1.src1_vert_stride = reg.vstride;
365 }
366 }
367 else {
368 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
369 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
370 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
371 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
372
373 /* This is an oddity of the fact we're using the same
374 * descriptions for registers in align_16 as align_1:
375 */
376 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
377 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
378 else
379 insn->bits3.da16.src1_vert_stride = reg.vstride;
380 }
381 }
382 }
383
384 /**
385 * Set the Message Descriptor and Extended Message Descriptor fields
386 * for SEND messages.
387 *
388 * \note This zeroes out the Function Control bits, so it must be called
389 * \b before filling out any message-specific data. Callers can
390 * choose not to fill in irrelevant bits; they will be zero.
391 */
392 static void
brw_set_message_descriptor(struct brw_compile * p,struct brw_instruction * inst,enum brw_message_target sfid,unsigned msg_length,unsigned response_length,bool header_present,bool end_of_thread)393 brw_set_message_descriptor(struct brw_compile *p,
394 struct brw_instruction *inst,
395 enum brw_message_target sfid,
396 unsigned msg_length,
397 unsigned response_length,
398 bool header_present,
399 bool end_of_thread)
400 {
401 struct intel_context *intel = &p->brw->intel;
402
403 brw_set_src1(p, inst, brw_imm_d(0));
404
405 if (intel->gen >= 5) {
406 inst->bits3.generic_gen5.header_present = header_present;
407 inst->bits3.generic_gen5.response_length = response_length;
408 inst->bits3.generic_gen5.msg_length = msg_length;
409 inst->bits3.generic_gen5.end_of_thread = end_of_thread;
410
411 if (intel->gen >= 6) {
412 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
413 inst->header.destreg__conditionalmod = sfid;
414 } else {
415 /* Set Extended Message Descriptor (ex_desc) */
416 inst->bits2.send_gen5.sfid = sfid;
417 inst->bits2.send_gen5.end_of_thread = end_of_thread;
418 }
419 } else {
420 inst->bits3.generic.response_length = response_length;
421 inst->bits3.generic.msg_length = msg_length;
422 inst->bits3.generic.msg_target = sfid;
423 inst->bits3.generic.end_of_thread = end_of_thread;
424 }
425 }
426
brw_set_math_message(struct brw_compile * p,struct brw_instruction * insn,GLuint function,GLuint integer_type,bool low_precision,GLuint dataType)427 static void brw_set_math_message( struct brw_compile *p,
428 struct brw_instruction *insn,
429 GLuint function,
430 GLuint integer_type,
431 bool low_precision,
432 GLuint dataType )
433 {
434 struct brw_context *brw = p->brw;
435 struct intel_context *intel = &brw->intel;
436 unsigned msg_length;
437 unsigned response_length;
438
439 /* Infer message length from the function */
440 switch (function) {
441 case BRW_MATH_FUNCTION_POW:
442 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
443 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
444 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
445 msg_length = 2;
446 break;
447 default:
448 msg_length = 1;
449 break;
450 }
451
452 /* Infer response length from the function */
453 switch (function) {
454 case BRW_MATH_FUNCTION_SINCOS:
455 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
456 response_length = 2;
457 break;
458 default:
459 response_length = 1;
460 break;
461 }
462
463
464 brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
465 msg_length, response_length, false, false);
466 if (intel->gen == 5) {
467 insn->bits3.math_gen5.function = function;
468 insn->bits3.math_gen5.int_type = integer_type;
469 insn->bits3.math_gen5.precision = low_precision;
470 insn->bits3.math_gen5.saturate = insn->header.saturate;
471 insn->bits3.math_gen5.data_type = dataType;
472 insn->bits3.math_gen5.snapshot = 0;
473 } else {
474 insn->bits3.math.function = function;
475 insn->bits3.math.int_type = integer_type;
476 insn->bits3.math.precision = low_precision;
477 insn->bits3.math.saturate = insn->header.saturate;
478 insn->bits3.math.data_type = dataType;
479 }
480 insn->header.saturate = 0;
481 }
482
483
brw_set_ff_sync_message(struct brw_compile * p,struct brw_instruction * insn,bool allocate,GLuint response_length,bool end_of_thread)484 static void brw_set_ff_sync_message(struct brw_compile *p,
485 struct brw_instruction *insn,
486 bool allocate,
487 GLuint response_length,
488 bool end_of_thread)
489 {
490 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
491 1, response_length, true, end_of_thread);
492 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
493 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
494 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
495 insn->bits3.urb_gen5.allocate = allocate;
496 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
497 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
498 }
499
brw_set_urb_message(struct brw_compile * p,struct brw_instruction * insn,bool allocate,bool used,GLuint msg_length,GLuint response_length,bool end_of_thread,bool complete,GLuint offset,GLuint swizzle_control)500 static void brw_set_urb_message( struct brw_compile *p,
501 struct brw_instruction *insn,
502 bool allocate,
503 bool used,
504 GLuint msg_length,
505 GLuint response_length,
506 bool end_of_thread,
507 bool complete,
508 GLuint offset,
509 GLuint swizzle_control )
510 {
511 struct brw_context *brw = p->brw;
512 struct intel_context *intel = &brw->intel;
513
514 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
515 msg_length, response_length, true, end_of_thread);
516 if (intel->gen == 7) {
517 insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
518 insn->bits3.urb_gen7.offset = offset;
519 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
520 insn->bits3.urb_gen7.swizzle_control = swizzle_control;
521 /* per_slot_offset = 0 makes it ignore offsets in message header */
522 insn->bits3.urb_gen7.per_slot_offset = 0;
523 insn->bits3.urb_gen7.complete = complete;
524 } else if (intel->gen >= 5) {
525 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
526 insn->bits3.urb_gen5.offset = offset;
527 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
528 insn->bits3.urb_gen5.allocate = allocate;
529 insn->bits3.urb_gen5.used = used; /* ? */
530 insn->bits3.urb_gen5.complete = complete;
531 } else {
532 insn->bits3.urb.opcode = 0; /* ? */
533 insn->bits3.urb.offset = offset;
534 insn->bits3.urb.swizzle_control = swizzle_control;
535 insn->bits3.urb.allocate = allocate;
536 insn->bits3.urb.used = used; /* ? */
537 insn->bits3.urb.complete = complete;
538 }
539 }
540
541 void
brw_set_dp_write_message(struct brw_compile * p,struct brw_instruction * insn,GLuint binding_table_index,GLuint msg_control,GLuint msg_type,GLuint msg_length,bool header_present,GLuint last_render_target,GLuint response_length,GLuint end_of_thread,GLuint send_commit_msg)542 brw_set_dp_write_message(struct brw_compile *p,
543 struct brw_instruction *insn,
544 GLuint binding_table_index,
545 GLuint msg_control,
546 GLuint msg_type,
547 GLuint msg_length,
548 bool header_present,
549 GLuint last_render_target,
550 GLuint response_length,
551 GLuint end_of_thread,
552 GLuint send_commit_msg)
553 {
554 struct brw_context *brw = p->brw;
555 struct intel_context *intel = &brw->intel;
556 unsigned sfid;
557
558 if (intel->gen >= 7) {
559 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
560 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
561 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
562 else
563 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
564 } else if (intel->gen == 6) {
565 /* Use the render cache for all write messages. */
566 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
567 } else {
568 sfid = BRW_SFID_DATAPORT_WRITE;
569 }
570
571 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
572 header_present, end_of_thread);
573
574 if (intel->gen >= 7) {
575 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
576 insn->bits3.gen7_dp.msg_control = msg_control;
577 insn->bits3.gen7_dp.last_render_target = last_render_target;
578 insn->bits3.gen7_dp.msg_type = msg_type;
579 } else if (intel->gen == 6) {
580 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
581 insn->bits3.gen6_dp.msg_control = msg_control;
582 insn->bits3.gen6_dp.last_render_target = last_render_target;
583 insn->bits3.gen6_dp.msg_type = msg_type;
584 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
585 } else if (intel->gen == 5) {
586 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
587 insn->bits3.dp_write_gen5.msg_control = msg_control;
588 insn->bits3.dp_write_gen5.last_render_target = last_render_target;
589 insn->bits3.dp_write_gen5.msg_type = msg_type;
590 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
591 } else {
592 insn->bits3.dp_write.binding_table_index = binding_table_index;
593 insn->bits3.dp_write.msg_control = msg_control;
594 insn->bits3.dp_write.last_render_target = last_render_target;
595 insn->bits3.dp_write.msg_type = msg_type;
596 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
597 }
598 }
599
600 void
brw_set_dp_read_message(struct brw_compile * p,struct brw_instruction * insn,GLuint binding_table_index,GLuint msg_control,GLuint msg_type,GLuint target_cache,GLuint msg_length,GLuint response_length)601 brw_set_dp_read_message(struct brw_compile *p,
602 struct brw_instruction *insn,
603 GLuint binding_table_index,
604 GLuint msg_control,
605 GLuint msg_type,
606 GLuint target_cache,
607 GLuint msg_length,
608 GLuint response_length)
609 {
610 struct brw_context *brw = p->brw;
611 struct intel_context *intel = &brw->intel;
612 unsigned sfid;
613
614 if (intel->gen >= 7) {
615 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
616 } else if (intel->gen == 6) {
617 if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
618 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
619 else
620 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
621 } else {
622 sfid = BRW_SFID_DATAPORT_READ;
623 }
624
625 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
626 true, false);
627
628 if (intel->gen >= 7) {
629 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
630 insn->bits3.gen7_dp.msg_control = msg_control;
631 insn->bits3.gen7_dp.last_render_target = 0;
632 insn->bits3.gen7_dp.msg_type = msg_type;
633 } else if (intel->gen == 6) {
634 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
635 insn->bits3.gen6_dp.msg_control = msg_control;
636 insn->bits3.gen6_dp.last_render_target = 0;
637 insn->bits3.gen6_dp.msg_type = msg_type;
638 insn->bits3.gen6_dp.send_commit_msg = 0;
639 } else if (intel->gen == 5) {
640 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
641 insn->bits3.dp_read_gen5.msg_control = msg_control;
642 insn->bits3.dp_read_gen5.msg_type = msg_type;
643 insn->bits3.dp_read_gen5.target_cache = target_cache;
644 } else if (intel->is_g4x) {
645 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
646 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
647 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
648 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
649 } else {
650 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
651 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
652 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
653 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
654 }
655 }
656
657 void
brw_set_sampler_message(struct brw_compile * p,struct brw_instruction * insn,GLuint binding_table_index,GLuint sampler,GLuint msg_type,GLuint response_length,GLuint msg_length,GLuint header_present,GLuint simd_mode,GLuint return_format)658 brw_set_sampler_message(struct brw_compile *p,
659 struct brw_instruction *insn,
660 GLuint binding_table_index,
661 GLuint sampler,
662 GLuint msg_type,
663 GLuint response_length,
664 GLuint msg_length,
665 GLuint header_present,
666 GLuint simd_mode,
667 GLuint return_format)
668 {
669 struct brw_context *brw = p->brw;
670 struct intel_context *intel = &brw->intel;
671
672 brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length,
673 response_length, header_present, false);
674
675 if (intel->gen >= 7) {
676 insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
677 insn->bits3.sampler_gen7.sampler = sampler;
678 insn->bits3.sampler_gen7.msg_type = msg_type;
679 insn->bits3.sampler_gen7.simd_mode = simd_mode;
680 } else if (intel->gen >= 5) {
681 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
682 insn->bits3.sampler_gen5.sampler = sampler;
683 insn->bits3.sampler_gen5.msg_type = msg_type;
684 insn->bits3.sampler_gen5.simd_mode = simd_mode;
685 } else if (intel->is_g4x) {
686 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
687 insn->bits3.sampler_g4x.sampler = sampler;
688 insn->bits3.sampler_g4x.msg_type = msg_type;
689 } else {
690 insn->bits3.sampler.binding_table_index = binding_table_index;
691 insn->bits3.sampler.sampler = sampler;
692 insn->bits3.sampler.msg_type = msg_type;
693 insn->bits3.sampler.return_format = return_format;
694 }
695 }
696
697
698 #define next_insn brw_next_insn
699 struct brw_instruction *
brw_next_insn(struct brw_compile * p,GLuint opcode)700 brw_next_insn(struct brw_compile *p, GLuint opcode)
701 {
702 struct brw_instruction *insn;
703
704 if (p->nr_insn + 1 > p->store_size) {
705 if (0)
706 printf("incresing the store size to %d\n", p->store_size << 1);
707 p->store_size <<= 1;
708 p->store = reralloc(p->mem_ctx, p->store,
709 struct brw_instruction, p->store_size);
710 if (!p->store)
711 assert(!"realloc eu store memeory failed");
712 }
713
714 insn = &p->store[p->nr_insn++];
715 memcpy(insn, p->current, sizeof(*insn));
716
717 /* Reset this one-shot flag:
718 */
719
720 if (p->current->header.destreg__conditionalmod) {
721 p->current->header.destreg__conditionalmod = 0;
722 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
723 }
724
725 insn->header.opcode = opcode;
726 return insn;
727 }
728
brw_alu1(struct brw_compile * p,GLuint opcode,struct brw_reg dest,struct brw_reg src)729 static struct brw_instruction *brw_alu1( struct brw_compile *p,
730 GLuint opcode,
731 struct brw_reg dest,
732 struct brw_reg src )
733 {
734 struct brw_instruction *insn = next_insn(p, opcode);
735 brw_set_dest(p, insn, dest);
736 brw_set_src0(p, insn, src);
737 return insn;
738 }
739
brw_alu2(struct brw_compile * p,GLuint opcode,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)740 static struct brw_instruction *brw_alu2(struct brw_compile *p,
741 GLuint opcode,
742 struct brw_reg dest,
743 struct brw_reg src0,
744 struct brw_reg src1 )
745 {
746 struct brw_instruction *insn = next_insn(p, opcode);
747 brw_set_dest(p, insn, dest);
748 brw_set_src0(p, insn, src0);
749 brw_set_src1(p, insn, src1);
750 return insn;
751 }
752
753 static int
get_3src_subreg_nr(struct brw_reg reg)754 get_3src_subreg_nr(struct brw_reg reg)
755 {
756 if (reg.vstride == BRW_VERTICAL_STRIDE_0) {
757 assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle));
758 return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0);
759 } else {
760 return reg.subnr / 4;
761 }
762 }
763
brw_alu3(struct brw_compile * p,GLuint opcode,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1,struct brw_reg src2)764 static struct brw_instruction *brw_alu3(struct brw_compile *p,
765 GLuint opcode,
766 struct brw_reg dest,
767 struct brw_reg src0,
768 struct brw_reg src1,
769 struct brw_reg src2)
770 {
771 struct brw_instruction *insn = next_insn(p, opcode);
772
773 gen7_convert_mrf_to_grf(p, &dest);
774
775 assert(insn->header.access_mode == BRW_ALIGN_16);
776
777 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
778 dest.file == BRW_MESSAGE_REGISTER_FILE);
779 assert(dest.nr < 128);
780 assert(dest.address_mode == BRW_ADDRESS_DIRECT);
781 assert(dest.type = BRW_REGISTER_TYPE_F);
782 insn->bits1.da3src.dest_reg_file = (dest.file == BRW_MESSAGE_REGISTER_FILE);
783 insn->bits1.da3src.dest_reg_nr = dest.nr;
784 insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16;
785 insn->bits1.da3src.dest_writemask = dest.dw1.bits.writemask;
786 guess_execution_size(p, insn, dest);
787
788 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
789 assert(src0.address_mode == BRW_ADDRESS_DIRECT);
790 assert(src0.nr < 128);
791 assert(src0.type == BRW_REGISTER_TYPE_F);
792 insn->bits2.da3src.src0_swizzle = src0.dw1.bits.swizzle;
793 insn->bits2.da3src.src0_subreg_nr = get_3src_subreg_nr(src0);
794 insn->bits2.da3src.src0_reg_nr = src0.nr;
795 insn->bits1.da3src.src0_abs = src0.abs;
796 insn->bits1.da3src.src0_negate = src0.negate;
797 insn->bits2.da3src.src0_rep_ctrl = src0.vstride == BRW_VERTICAL_STRIDE_0;
798
799 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
800 assert(src1.address_mode == BRW_ADDRESS_DIRECT);
801 assert(src1.nr < 128);
802 assert(src1.type == BRW_REGISTER_TYPE_F);
803 insn->bits2.da3src.src1_swizzle = src1.dw1.bits.swizzle;
804 insn->bits2.da3src.src1_subreg_nr_low = get_3src_subreg_nr(src1) & 0x3;
805 insn->bits3.da3src.src1_subreg_nr_high = get_3src_subreg_nr(src1) >> 2;
806 insn->bits2.da3src.src1_rep_ctrl = src1.vstride == BRW_VERTICAL_STRIDE_0;
807 insn->bits3.da3src.src1_reg_nr = src1.nr;
808 insn->bits1.da3src.src1_abs = src1.abs;
809 insn->bits1.da3src.src1_negate = src1.negate;
810
811 assert(src2.file == BRW_GENERAL_REGISTER_FILE);
812 assert(src2.address_mode == BRW_ADDRESS_DIRECT);
813 assert(src2.nr < 128);
814 assert(src2.type == BRW_REGISTER_TYPE_F);
815 insn->bits3.da3src.src2_swizzle = src2.dw1.bits.swizzle;
816 insn->bits3.da3src.src2_subreg_nr = get_3src_subreg_nr(src2);
817 insn->bits3.da3src.src2_rep_ctrl = src2.vstride == BRW_VERTICAL_STRIDE_0;
818 insn->bits3.da3src.src2_reg_nr = src2.nr;
819 insn->bits1.da3src.src2_abs = src2.abs;
820 insn->bits1.da3src.src2_negate = src2.negate;
821
822 return insn;
823 }
824
825
826 /***********************************************************************
827 * Convenience routines.
828 */
829 #define ALU1(OP) \
830 struct brw_instruction *brw_##OP(struct brw_compile *p, \
831 struct brw_reg dest, \
832 struct brw_reg src0) \
833 { \
834 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
835 }
836
837 #define ALU2(OP) \
838 struct brw_instruction *brw_##OP(struct brw_compile *p, \
839 struct brw_reg dest, \
840 struct brw_reg src0, \
841 struct brw_reg src1) \
842 { \
843 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
844 }
845
846 #define ALU3(OP) \
847 struct brw_instruction *brw_##OP(struct brw_compile *p, \
848 struct brw_reg dest, \
849 struct brw_reg src0, \
850 struct brw_reg src1, \
851 struct brw_reg src2) \
852 { \
853 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
854 }
855
856 /* Rounding operations (other than RNDD) require two instructions - the first
857 * stores a rounded value (possibly the wrong way) in the dest register, but
858 * also sets a per-channel "increment bit" in the flag register. A predicated
859 * add of 1.0 fixes dest to contain the desired result.
860 *
861 * Sandybridge and later appear to round correctly without an ADD.
862 */
863 #define ROUND(OP) \
864 void brw_##OP(struct brw_compile *p, \
865 struct brw_reg dest, \
866 struct brw_reg src) \
867 { \
868 struct brw_instruction *rnd, *add; \
869 rnd = next_insn(p, BRW_OPCODE_##OP); \
870 brw_set_dest(p, rnd, dest); \
871 brw_set_src0(p, rnd, src); \
872 \
873 if (p->brw->intel.gen < 6) { \
874 /* turn on round-increments */ \
875 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
876 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
877 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
878 } \
879 }
880
881
882 ALU1(MOV)
ALU2(SEL)883 ALU2(SEL)
884 ALU1(NOT)
885 ALU2(AND)
886 ALU2(OR)
887 ALU2(XOR)
888 ALU2(SHR)
889 ALU2(SHL)
890 ALU2(RSR)
891 ALU2(RSL)
892 ALU2(ASR)
893 ALU1(FRC)
894 ALU1(RNDD)
895 ALU2(MAC)
896 ALU2(MACH)
897 ALU1(LZD)
898 ALU2(DP4)
899 ALU2(DPH)
900 ALU2(DP3)
901 ALU2(DP2)
902 ALU2(LINE)
903 ALU2(PLN)
904 ALU3(MAD)
905
906 ROUND(RNDZ)
907 ROUND(RNDE)
908
909
910 struct brw_instruction *brw_ADD(struct brw_compile *p,
911 struct brw_reg dest,
912 struct brw_reg src0,
913 struct brw_reg src1)
914 {
915 /* 6.2.2: add */
916 if (src0.type == BRW_REGISTER_TYPE_F ||
917 (src0.file == BRW_IMMEDIATE_VALUE &&
918 src0.type == BRW_REGISTER_TYPE_VF)) {
919 assert(src1.type != BRW_REGISTER_TYPE_UD);
920 assert(src1.type != BRW_REGISTER_TYPE_D);
921 }
922
923 if (src1.type == BRW_REGISTER_TYPE_F ||
924 (src1.file == BRW_IMMEDIATE_VALUE &&
925 src1.type == BRW_REGISTER_TYPE_VF)) {
926 assert(src0.type != BRW_REGISTER_TYPE_UD);
927 assert(src0.type != BRW_REGISTER_TYPE_D);
928 }
929
930 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
931 }
932
brw_AVG(struct brw_compile * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)933 struct brw_instruction *brw_AVG(struct brw_compile *p,
934 struct brw_reg dest,
935 struct brw_reg src0,
936 struct brw_reg src1)
937 {
938 assert(dest.type == src0.type);
939 assert(src0.type == src1.type);
940 switch (src0.type) {
941 case BRW_REGISTER_TYPE_B:
942 case BRW_REGISTER_TYPE_UB:
943 case BRW_REGISTER_TYPE_W:
944 case BRW_REGISTER_TYPE_UW:
945 case BRW_REGISTER_TYPE_D:
946 case BRW_REGISTER_TYPE_UD:
947 break;
948 default:
949 assert(!"Bad type for brw_AVG");
950 }
951
952 return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
953 }
954
brw_MUL(struct brw_compile * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)955 struct brw_instruction *brw_MUL(struct brw_compile *p,
956 struct brw_reg dest,
957 struct brw_reg src0,
958 struct brw_reg src1)
959 {
960 /* 6.32.38: mul */
961 if (src0.type == BRW_REGISTER_TYPE_D ||
962 src0.type == BRW_REGISTER_TYPE_UD ||
963 src1.type == BRW_REGISTER_TYPE_D ||
964 src1.type == BRW_REGISTER_TYPE_UD) {
965 assert(dest.type != BRW_REGISTER_TYPE_F);
966 }
967
968 if (src0.type == BRW_REGISTER_TYPE_F ||
969 (src0.file == BRW_IMMEDIATE_VALUE &&
970 src0.type == BRW_REGISTER_TYPE_VF)) {
971 assert(src1.type != BRW_REGISTER_TYPE_UD);
972 assert(src1.type != BRW_REGISTER_TYPE_D);
973 }
974
975 if (src1.type == BRW_REGISTER_TYPE_F ||
976 (src1.file == BRW_IMMEDIATE_VALUE &&
977 src1.type == BRW_REGISTER_TYPE_VF)) {
978 assert(src0.type != BRW_REGISTER_TYPE_UD);
979 assert(src0.type != BRW_REGISTER_TYPE_D);
980 }
981
982 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
983 src0.nr != BRW_ARF_ACCUMULATOR);
984 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
985 src1.nr != BRW_ARF_ACCUMULATOR);
986
987 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
988 }
989
990
brw_NOP(struct brw_compile * p)991 void brw_NOP(struct brw_compile *p)
992 {
993 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
994 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
995 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
996 brw_set_src1(p, insn, brw_imm_ud(0x0));
997 }
998
999
1000
1001
1002
1003 /***********************************************************************
1004 * Comparisons, if/else/endif
1005 */
1006
brw_JMPI(struct brw_compile * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)1007 struct brw_instruction *brw_JMPI(struct brw_compile *p,
1008 struct brw_reg dest,
1009 struct brw_reg src0,
1010 struct brw_reg src1)
1011 {
1012 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
1013
1014 insn->header.execution_size = 1;
1015 insn->header.compression_control = BRW_COMPRESSION_NONE;
1016 insn->header.mask_control = BRW_MASK_DISABLE;
1017
1018 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1019
1020 return insn;
1021 }
1022
1023 static void
push_if_stack(struct brw_compile * p,struct brw_instruction * inst)1024 push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
1025 {
1026 p->if_stack[p->if_stack_depth] = inst - p->store;
1027
1028 p->if_stack_depth++;
1029 if (p->if_stack_array_size <= p->if_stack_depth) {
1030 p->if_stack_array_size *= 2;
1031 p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
1032 p->if_stack_array_size);
1033 }
1034 }
1035
1036 static struct brw_instruction *
pop_if_stack(struct brw_compile * p)1037 pop_if_stack(struct brw_compile *p)
1038 {
1039 p->if_stack_depth--;
1040 return &p->store[p->if_stack[p->if_stack_depth]];
1041 }
1042
1043 static void
push_loop_stack(struct brw_compile * p,struct brw_instruction * inst)1044 push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
1045 {
1046 if (p->loop_stack_array_size < p->loop_stack_depth) {
1047 p->loop_stack_array_size *= 2;
1048 p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
1049 p->loop_stack_array_size);
1050 p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
1051 p->loop_stack_array_size);
1052 }
1053
1054 p->loop_stack[p->loop_stack_depth] = inst - p->store;
1055 p->loop_stack_depth++;
1056 p->if_depth_in_loop[p->loop_stack_depth] = 0;
1057 }
1058
1059 static struct brw_instruction *
get_inner_do_insn(struct brw_compile * p)1060 get_inner_do_insn(struct brw_compile *p)
1061 {
1062 return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
1063 }
1064
1065 /* EU takes the value from the flag register and pushes it onto some
1066 * sort of a stack (presumably merging with any flag value already on
1067 * the stack). Within an if block, the flags at the top of the stack
1068 * control execution on each channel of the unit, eg. on each of the
1069 * 16 pixel values in our wm programs.
1070 *
1071 * When the matching 'else' instruction is reached (presumably by
1072 * countdown of the instruction count patched in by our ELSE/ENDIF
1073 * functions), the relevent flags are inverted.
1074 *
1075 * When the matching 'endif' instruction is reached, the flags are
1076 * popped off. If the stack is now empty, normal execution resumes.
1077 */
1078 struct brw_instruction *
brw_IF(struct brw_compile * p,GLuint execute_size)1079 brw_IF(struct brw_compile *p, GLuint execute_size)
1080 {
1081 struct intel_context *intel = &p->brw->intel;
1082 struct brw_instruction *insn;
1083
1084 insn = next_insn(p, BRW_OPCODE_IF);
1085
1086 /* Override the defaults for this instruction:
1087 */
1088 if (intel->gen < 6) {
1089 brw_set_dest(p, insn, brw_ip_reg());
1090 brw_set_src0(p, insn, brw_ip_reg());
1091 brw_set_src1(p, insn, brw_imm_d(0x0));
1092 } else if (intel->gen == 6) {
1093 brw_set_dest(p, insn, brw_imm_w(0));
1094 insn->bits1.branch_gen6.jump_count = 0;
1095 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1096 brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1097 } else {
1098 brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1099 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1100 brw_set_src1(p, insn, brw_imm_ud(0));
1101 insn->bits3.break_cont.jip = 0;
1102 insn->bits3.break_cont.uip = 0;
1103 }
1104
1105 insn->header.execution_size = execute_size;
1106 insn->header.compression_control = BRW_COMPRESSION_NONE;
1107 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
1108 insn->header.mask_control = BRW_MASK_ENABLE;
1109 if (!p->single_program_flow)
1110 insn->header.thread_control = BRW_THREAD_SWITCH;
1111
1112 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1113
1114 push_if_stack(p, insn);
1115 p->if_depth_in_loop[p->loop_stack_depth]++;
1116 return insn;
1117 }
1118
1119 /* This function is only used for gen6-style IF instructions with an
1120 * embedded comparison (conditional modifier). It is not used on gen7.
1121 */
1122 struct brw_instruction *
gen6_IF(struct brw_compile * p,uint32_t conditional,struct brw_reg src0,struct brw_reg src1)1123 gen6_IF(struct brw_compile *p, uint32_t conditional,
1124 struct brw_reg src0, struct brw_reg src1)
1125 {
1126 struct brw_instruction *insn;
1127
1128 insn = next_insn(p, BRW_OPCODE_IF);
1129
1130 brw_set_dest(p, insn, brw_imm_w(0));
1131 if (p->compressed) {
1132 insn->header.execution_size = BRW_EXECUTE_16;
1133 } else {
1134 insn->header.execution_size = BRW_EXECUTE_8;
1135 }
1136 insn->bits1.branch_gen6.jump_count = 0;
1137 brw_set_src0(p, insn, src0);
1138 brw_set_src1(p, insn, src1);
1139
1140 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
1141 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1142 insn->header.destreg__conditionalmod = conditional;
1143
1144 if (!p->single_program_flow)
1145 insn->header.thread_control = BRW_THREAD_SWITCH;
1146
1147 push_if_stack(p, insn);
1148 return insn;
1149 }
1150
1151 /**
1152 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1153 */
1154 static void
convert_IF_ELSE_to_ADD(struct brw_compile * p,struct brw_instruction * if_inst,struct brw_instruction * else_inst)1155 convert_IF_ELSE_to_ADD(struct brw_compile *p,
1156 struct brw_instruction *if_inst,
1157 struct brw_instruction *else_inst)
1158 {
1159 /* The next instruction (where the ENDIF would be, if it existed) */
1160 struct brw_instruction *next_inst = &p->store[p->nr_insn];
1161
1162 assert(p->single_program_flow);
1163 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1164 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1165 assert(if_inst->header.execution_size == BRW_EXECUTE_1);
1166
1167 /* Convert IF to an ADD instruction that moves the instruction pointer
1168 * to the first instruction of the ELSE block. If there is no ELSE
1169 * block, point to where ENDIF would be. Reverse the predicate.
1170 *
1171 * There's no need to execute an ENDIF since we don't need to do any
1172 * stack operations, and if we're currently executing, we just want to
1173 * continue normally.
1174 */
1175 if_inst->header.opcode = BRW_OPCODE_ADD;
1176 if_inst->header.predicate_inverse = 1;
1177
1178 if (else_inst != NULL) {
1179 /* Convert ELSE to an ADD instruction that points where the ENDIF
1180 * would be.
1181 */
1182 else_inst->header.opcode = BRW_OPCODE_ADD;
1183
1184 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
1185 else_inst->bits3.ud = (next_inst - else_inst) * 16;
1186 } else {
1187 if_inst->bits3.ud = (next_inst - if_inst) * 16;
1188 }
1189 }
1190
1191 /**
1192 * Patch IF and ELSE instructions with appropriate jump targets.
1193 */
1194 static void
patch_IF_ELSE(struct brw_compile * p,struct brw_instruction * if_inst,struct brw_instruction * else_inst,struct brw_instruction * endif_inst)1195 patch_IF_ELSE(struct brw_compile *p,
1196 struct brw_instruction *if_inst,
1197 struct brw_instruction *else_inst,
1198 struct brw_instruction *endif_inst)
1199 {
1200 struct intel_context *intel = &p->brw->intel;
1201
1202 /* We shouldn't be patching IF and ELSE instructions in single program flow
1203 * mode when gen < 6, because in single program flow mode on those
1204 * platforms, we convert flow control instructions to conditional ADDs that
1205 * operate on IP (see brw_ENDIF).
1206 *
1207 * However, on Gen6, writing to IP doesn't work in single program flow mode
1208 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1209 * not be updated by non-flow control instructions."). And on later
1210 * platforms, there is no significant benefit to converting control flow
1211 * instructions to conditional ADDs. So we do patch IF and ELSE
1212 * instructions in single program flow mode on those platforms.
1213 */
1214 if (intel->gen < 6)
1215 assert(!p->single_program_flow);
1216
1217 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1218 assert(endif_inst != NULL);
1219 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1220
1221 unsigned br = 1;
1222 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1223 * requires 2 chunks.
1224 */
1225 if (intel->gen >= 5)
1226 br = 2;
1227
1228 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
1229 endif_inst->header.execution_size = if_inst->header.execution_size;
1230
1231 if (else_inst == NULL) {
1232 /* Patch IF -> ENDIF */
1233 if (intel->gen < 6) {
1234 /* Turn it into an IFF, which means no mask stack operations for
1235 * all-false and jumping past the ENDIF.
1236 */
1237 if_inst->header.opcode = BRW_OPCODE_IFF;
1238 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
1239 if_inst->bits3.if_else.pop_count = 0;
1240 if_inst->bits3.if_else.pad0 = 0;
1241 } else if (intel->gen == 6) {
1242 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1243 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
1244 } else {
1245 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1246 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
1247 }
1248 } else {
1249 else_inst->header.execution_size = if_inst->header.execution_size;
1250
1251 /* Patch IF -> ELSE */
1252 if (intel->gen < 6) {
1253 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
1254 if_inst->bits3.if_else.pop_count = 0;
1255 if_inst->bits3.if_else.pad0 = 0;
1256 } else if (intel->gen == 6) {
1257 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
1258 }
1259
1260 /* Patch ELSE -> ENDIF */
1261 if (intel->gen < 6) {
1262 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1263 * matching ENDIF.
1264 */
1265 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
1266 else_inst->bits3.if_else.pop_count = 1;
1267 else_inst->bits3.if_else.pad0 = 0;
1268 } else if (intel->gen == 6) {
1269 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1270 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
1271 } else {
1272 /* The IF instruction's JIP should point just past the ELSE */
1273 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
1274 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1275 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1276 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
1277 }
1278 }
1279 }
1280
1281 void
brw_ELSE(struct brw_compile * p)1282 brw_ELSE(struct brw_compile *p)
1283 {
1284 struct intel_context *intel = &p->brw->intel;
1285 struct brw_instruction *insn;
1286
1287 insn = next_insn(p, BRW_OPCODE_ELSE);
1288
1289 if (intel->gen < 6) {
1290 brw_set_dest(p, insn, brw_ip_reg());
1291 brw_set_src0(p, insn, brw_ip_reg());
1292 brw_set_src1(p, insn, brw_imm_d(0x0));
1293 } else if (intel->gen == 6) {
1294 brw_set_dest(p, insn, brw_imm_w(0));
1295 insn->bits1.branch_gen6.jump_count = 0;
1296 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1297 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1298 } else {
1299 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1300 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1301 brw_set_src1(p, insn, brw_imm_ud(0));
1302 insn->bits3.break_cont.jip = 0;
1303 insn->bits3.break_cont.uip = 0;
1304 }
1305
1306 insn->header.compression_control = BRW_COMPRESSION_NONE;
1307 insn->header.mask_control = BRW_MASK_ENABLE;
1308 if (!p->single_program_flow)
1309 insn->header.thread_control = BRW_THREAD_SWITCH;
1310
1311 push_if_stack(p, insn);
1312 }
1313
1314 void
brw_ENDIF(struct brw_compile * p)1315 brw_ENDIF(struct brw_compile *p)
1316 {
1317 struct intel_context *intel = &p->brw->intel;
1318 struct brw_instruction *insn = NULL;
1319 struct brw_instruction *else_inst = NULL;
1320 struct brw_instruction *if_inst = NULL;
1321 struct brw_instruction *tmp;
1322 bool emit_endif = true;
1323
1324 /* In single program flow mode, we can express IF and ELSE instructions
1325 * equivalently as ADD instructions that operate on IP. On platforms prior
1326 * to Gen6, flow control instructions cause an implied thread switch, so
1327 * this is a significant savings.
1328 *
1329 * However, on Gen6, writing to IP doesn't work in single program flow mode
1330 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1331 * not be updated by non-flow control instructions."). And on later
1332 * platforms, there is no significant benefit to converting control flow
1333 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1334 * Gen5.
1335 */
1336 if (intel->gen < 6 && p->single_program_flow)
1337 emit_endif = false;
1338
1339 /*
1340 * A single next_insn() may change the base adress of instruction store
1341 * memory(p->store), so call it first before referencing the instruction
1342 * store pointer from an index
1343 */
1344 if (emit_endif)
1345 insn = next_insn(p, BRW_OPCODE_ENDIF);
1346
1347 /* Pop the IF and (optional) ELSE instructions from the stack */
1348 p->if_depth_in_loop[p->loop_stack_depth]--;
1349 tmp = pop_if_stack(p);
1350 if (tmp->header.opcode == BRW_OPCODE_ELSE) {
1351 else_inst = tmp;
1352 tmp = pop_if_stack(p);
1353 }
1354 if_inst = tmp;
1355
1356 if (!emit_endif) {
1357 /* ENDIF is useless; don't bother emitting it. */
1358 convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1359 return;
1360 }
1361
1362 if (intel->gen < 6) {
1363 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1364 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1365 brw_set_src1(p, insn, brw_imm_d(0x0));
1366 } else if (intel->gen == 6) {
1367 brw_set_dest(p, insn, brw_imm_w(0));
1368 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1369 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1370 } else {
1371 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1372 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1373 brw_set_src1(p, insn, brw_imm_ud(0));
1374 }
1375
1376 insn->header.compression_control = BRW_COMPRESSION_NONE;
1377 insn->header.mask_control = BRW_MASK_ENABLE;
1378 insn->header.thread_control = BRW_THREAD_SWITCH;
1379
1380 /* Also pop item off the stack in the endif instruction: */
1381 if (intel->gen < 6) {
1382 insn->bits3.if_else.jump_count = 0;
1383 insn->bits3.if_else.pop_count = 1;
1384 insn->bits3.if_else.pad0 = 0;
1385 } else if (intel->gen == 6) {
1386 insn->bits1.branch_gen6.jump_count = 2;
1387 } else {
1388 insn->bits3.break_cont.jip = 2;
1389 }
1390 patch_IF_ELSE(p, if_inst, else_inst, insn);
1391 }
1392
brw_BREAK(struct brw_compile * p)1393 struct brw_instruction *brw_BREAK(struct brw_compile *p)
1394 {
1395 struct intel_context *intel = &p->brw->intel;
1396 struct brw_instruction *insn;
1397
1398 insn = next_insn(p, BRW_OPCODE_BREAK);
1399 if (intel->gen >= 6) {
1400 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1401 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1402 brw_set_src1(p, insn, brw_imm_d(0x0));
1403 } else {
1404 brw_set_dest(p, insn, brw_ip_reg());
1405 brw_set_src0(p, insn, brw_ip_reg());
1406 brw_set_src1(p, insn, brw_imm_d(0x0));
1407 insn->bits3.if_else.pad0 = 0;
1408 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1409 }
1410 insn->header.compression_control = BRW_COMPRESSION_NONE;
1411 insn->header.execution_size = BRW_EXECUTE_8;
1412
1413 return insn;
1414 }
1415
gen6_CONT(struct brw_compile * p)1416 struct brw_instruction *gen6_CONT(struct brw_compile *p)
1417 {
1418 struct brw_instruction *insn;
1419
1420 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1421 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1422 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1423 brw_set_dest(p, insn, brw_ip_reg());
1424 brw_set_src0(p, insn, brw_ip_reg());
1425 brw_set_src1(p, insn, brw_imm_d(0x0));
1426
1427 insn->header.compression_control = BRW_COMPRESSION_NONE;
1428 insn->header.execution_size = BRW_EXECUTE_8;
1429 return insn;
1430 }
1431
brw_CONT(struct brw_compile * p)1432 struct brw_instruction *brw_CONT(struct brw_compile *p)
1433 {
1434 struct brw_instruction *insn;
1435 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1436 brw_set_dest(p, insn, brw_ip_reg());
1437 brw_set_src0(p, insn, brw_ip_reg());
1438 brw_set_src1(p, insn, brw_imm_d(0x0));
1439 insn->header.compression_control = BRW_COMPRESSION_NONE;
1440 insn->header.execution_size = BRW_EXECUTE_8;
1441 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1442 insn->bits3.if_else.pad0 = 0;
1443 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1444 return insn;
1445 }
1446
1447 /* DO/WHILE loop:
1448 *
1449 * The DO/WHILE is just an unterminated loop -- break or continue are
1450 * used for control within the loop. We have a few ways they can be
1451 * done.
1452 *
1453 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1454 * jip and no DO instruction.
1455 *
1456 * For non-uniform control flow pre-gen6, there's a DO instruction to
1457 * push the mask, and a WHILE to jump back, and BREAK to get out and
1458 * pop the mask.
1459 *
1460 * For gen6, there's no more mask stack, so no need for DO. WHILE
1461 * just points back to the first instruction of the loop.
1462 */
brw_DO(struct brw_compile * p,GLuint execute_size)1463 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1464 {
1465 struct intel_context *intel = &p->brw->intel;
1466
1467 if (intel->gen >= 6 || p->single_program_flow) {
1468 push_loop_stack(p, &p->store[p->nr_insn]);
1469 return &p->store[p->nr_insn];
1470 } else {
1471 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1472
1473 push_loop_stack(p, insn);
1474
1475 /* Override the defaults for this instruction:
1476 */
1477 brw_set_dest(p, insn, brw_null_reg());
1478 brw_set_src0(p, insn, brw_null_reg());
1479 brw_set_src1(p, insn, brw_null_reg());
1480
1481 insn->header.compression_control = BRW_COMPRESSION_NONE;
1482 insn->header.execution_size = execute_size;
1483 insn->header.predicate_control = BRW_PREDICATE_NONE;
1484 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1485 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1486
1487 return insn;
1488 }
1489 }
1490
1491 /**
1492 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1493 * instruction here.
1494 *
1495 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1496 * nesting, since it can always just point to the end of the block/current loop.
1497 */
1498 static void
brw_patch_break_cont(struct brw_compile * p,struct brw_instruction * while_inst)1499 brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst)
1500 {
1501 struct intel_context *intel = &p->brw->intel;
1502 struct brw_instruction *do_inst = get_inner_do_insn(p);
1503 struct brw_instruction *inst;
1504 int br = (intel->gen == 5) ? 2 : 1;
1505
1506 for (inst = while_inst - 1; inst != do_inst; inst--) {
1507 /* If the jump count is != 0, that means that this instruction has already
1508 * been patched because it's part of a loop inside of the one we're
1509 * patching.
1510 */
1511 if (inst->header.opcode == BRW_OPCODE_BREAK &&
1512 inst->bits3.if_else.jump_count == 0) {
1513 inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1);
1514 } else if (inst->header.opcode == BRW_OPCODE_CONTINUE &&
1515 inst->bits3.if_else.jump_count == 0) {
1516 inst->bits3.if_else.jump_count = br * (while_inst - inst);
1517 }
1518 }
1519 }
1520
brw_WHILE(struct brw_compile * p)1521 struct brw_instruction *brw_WHILE(struct brw_compile *p)
1522 {
1523 struct intel_context *intel = &p->brw->intel;
1524 struct brw_instruction *insn, *do_insn;
1525 GLuint br = 1;
1526
1527 if (intel->gen >= 5)
1528 br = 2;
1529
1530 if (intel->gen >= 7) {
1531 insn = next_insn(p, BRW_OPCODE_WHILE);
1532 do_insn = get_inner_do_insn(p);
1533
1534 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1535 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1536 brw_set_src1(p, insn, brw_imm_ud(0));
1537 insn->bits3.break_cont.jip = br * (do_insn - insn);
1538
1539 insn->header.execution_size = BRW_EXECUTE_8;
1540 } else if (intel->gen == 6) {
1541 insn = next_insn(p, BRW_OPCODE_WHILE);
1542 do_insn = get_inner_do_insn(p);
1543
1544 brw_set_dest(p, insn, brw_imm_w(0));
1545 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1546 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1547 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1548
1549 insn->header.execution_size = BRW_EXECUTE_8;
1550 } else {
1551 if (p->single_program_flow) {
1552 insn = next_insn(p, BRW_OPCODE_ADD);
1553 do_insn = get_inner_do_insn(p);
1554
1555 brw_set_dest(p, insn, brw_ip_reg());
1556 brw_set_src0(p, insn, brw_ip_reg());
1557 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1558 insn->header.execution_size = BRW_EXECUTE_1;
1559 } else {
1560 insn = next_insn(p, BRW_OPCODE_WHILE);
1561 do_insn = get_inner_do_insn(p);
1562
1563 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1564
1565 brw_set_dest(p, insn, brw_ip_reg());
1566 brw_set_src0(p, insn, brw_ip_reg());
1567 brw_set_src1(p, insn, brw_imm_d(0));
1568
1569 insn->header.execution_size = do_insn->header.execution_size;
1570 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1571 insn->bits3.if_else.pop_count = 0;
1572 insn->bits3.if_else.pad0 = 0;
1573
1574 brw_patch_break_cont(p, insn);
1575 }
1576 }
1577 insn->header.compression_control = BRW_COMPRESSION_NONE;
1578 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1579
1580 p->loop_stack_depth--;
1581
1582 return insn;
1583 }
1584
1585
1586 /* FORWARD JUMPS:
1587 */
brw_land_fwd_jump(struct brw_compile * p,int jmp_insn_idx)1588 void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx)
1589 {
1590 struct intel_context *intel = &p->brw->intel;
1591 struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx];
1592 GLuint jmpi = 1;
1593
1594 if (intel->gen >= 5)
1595 jmpi = 2;
1596
1597 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1598 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1599
1600 jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1);
1601 }
1602
1603
1604
1605 /* To integrate with the above, it makes sense that the comparison
1606 * instruction should populate the flag register. It might be simpler
1607 * just to use the flag reg for most WM tasks?
1608 */
brw_CMP(struct brw_compile * p,struct brw_reg dest,GLuint conditional,struct brw_reg src0,struct brw_reg src1)1609 void brw_CMP(struct brw_compile *p,
1610 struct brw_reg dest,
1611 GLuint conditional,
1612 struct brw_reg src0,
1613 struct brw_reg src1)
1614 {
1615 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1616
1617 insn->header.destreg__conditionalmod = conditional;
1618 brw_set_dest(p, insn, dest);
1619 brw_set_src0(p, insn, src0);
1620 brw_set_src1(p, insn, src1);
1621
1622 /* guess_execution_size(insn, src0); */
1623
1624
1625 /* Make it so that future instructions will use the computed flag
1626 * value until brw_set_predicate_control_flag_value() is called
1627 * again.
1628 */
1629 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1630 dest.nr == 0) {
1631 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1632 p->flag_value = 0xff;
1633 }
1634 }
1635
1636 /* Issue 'wait' instruction for n1, host could program MMIO
1637 to wake up thread. */
brw_WAIT(struct brw_compile * p)1638 void brw_WAIT (struct brw_compile *p)
1639 {
1640 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1641 struct brw_reg src = brw_notification_1_reg();
1642
1643 brw_set_dest(p, insn, src);
1644 brw_set_src0(p, insn, src);
1645 brw_set_src1(p, insn, brw_null_reg());
1646 insn->header.execution_size = 0; /* must */
1647 insn->header.predicate_control = 0;
1648 insn->header.compression_control = 0;
1649 }
1650
1651
1652 /***********************************************************************
1653 * Helpers for the various SEND message types:
1654 */
1655
1656 /** Extended math function, float[8].
1657 */
brw_math(struct brw_compile * p,struct brw_reg dest,GLuint function,GLuint msg_reg_nr,struct brw_reg src,GLuint data_type,GLuint precision)1658 void brw_math( struct brw_compile *p,
1659 struct brw_reg dest,
1660 GLuint function,
1661 GLuint msg_reg_nr,
1662 struct brw_reg src,
1663 GLuint data_type,
1664 GLuint precision )
1665 {
1666 struct intel_context *intel = &p->brw->intel;
1667
1668 if (intel->gen >= 6) {
1669 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1670
1671 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1672 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1673
1674 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1675 if (intel->gen == 6)
1676 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1677
1678 /* Source modifiers are ignored for extended math instructions on Gen6. */
1679 if (intel->gen == 6) {
1680 assert(!src.negate);
1681 assert(!src.abs);
1682 }
1683
1684 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1685 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1686 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1687 assert(src.type != BRW_REGISTER_TYPE_F);
1688 } else {
1689 assert(src.type == BRW_REGISTER_TYPE_F);
1690 }
1691
1692 /* Math is the same ISA format as other opcodes, except that CondModifier
1693 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1694 */
1695 insn->header.destreg__conditionalmod = function;
1696
1697 brw_set_dest(p, insn, dest);
1698 brw_set_src0(p, insn, src);
1699 brw_set_src1(p, insn, brw_null_reg());
1700 } else {
1701 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1702
1703 /* Example code doesn't set predicate_control for send
1704 * instructions.
1705 */
1706 insn->header.predicate_control = 0;
1707 insn->header.destreg__conditionalmod = msg_reg_nr;
1708
1709 brw_set_dest(p, insn, dest);
1710 brw_set_src0(p, insn, src);
1711 brw_set_math_message(p,
1712 insn,
1713 function,
1714 src.type == BRW_REGISTER_TYPE_D,
1715 precision,
1716 data_type);
1717 }
1718 }
1719
1720 /** Extended math function, float[8].
1721 */
brw_math2(struct brw_compile * p,struct brw_reg dest,GLuint function,struct brw_reg src0,struct brw_reg src1)1722 void brw_math2(struct brw_compile *p,
1723 struct brw_reg dest,
1724 GLuint function,
1725 struct brw_reg src0,
1726 struct brw_reg src1)
1727 {
1728 struct intel_context *intel = &p->brw->intel;
1729 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1730
1731 assert(intel->gen >= 6);
1732 (void) intel;
1733
1734
1735 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1736 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1737 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1738
1739 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1740 if (intel->gen == 6) {
1741 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1742 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1743 }
1744
1745 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1746 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1747 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1748 assert(src0.type != BRW_REGISTER_TYPE_F);
1749 assert(src1.type != BRW_REGISTER_TYPE_F);
1750 } else {
1751 assert(src0.type == BRW_REGISTER_TYPE_F);
1752 assert(src1.type == BRW_REGISTER_TYPE_F);
1753 }
1754
1755 /* Source modifiers are ignored for extended math instructions on Gen6. */
1756 if (intel->gen == 6) {
1757 assert(!src0.negate);
1758 assert(!src0.abs);
1759 assert(!src1.negate);
1760 assert(!src1.abs);
1761 }
1762
1763 /* Math is the same ISA format as other opcodes, except that CondModifier
1764 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1765 */
1766 insn->header.destreg__conditionalmod = function;
1767
1768 brw_set_dest(p, insn, dest);
1769 brw_set_src0(p, insn, src0);
1770 brw_set_src1(p, insn, src1);
1771 }
1772
1773 /**
1774 * Extended math function, float[16].
1775 * Use 2 send instructions.
1776 */
brw_math_16(struct brw_compile * p,struct brw_reg dest,GLuint function,GLuint msg_reg_nr,struct brw_reg src,GLuint precision)1777 void brw_math_16( struct brw_compile *p,
1778 struct brw_reg dest,
1779 GLuint function,
1780 GLuint msg_reg_nr,
1781 struct brw_reg src,
1782 GLuint precision )
1783 {
1784 struct intel_context *intel = &p->brw->intel;
1785 struct brw_instruction *insn;
1786
1787 if (intel->gen >= 6) {
1788 insn = next_insn(p, BRW_OPCODE_MATH);
1789
1790 /* Math is the same ISA format as other opcodes, except that CondModifier
1791 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1792 */
1793 insn->header.destreg__conditionalmod = function;
1794
1795 /* Source modifiers are ignored for extended math instructions. */
1796 assert(!src.negate);
1797 assert(!src.abs);
1798
1799 brw_set_dest(p, insn, dest);
1800 brw_set_src0(p, insn, src);
1801 brw_set_src1(p, insn, brw_null_reg());
1802 return;
1803 }
1804
1805 /* First instruction:
1806 */
1807 brw_push_insn_state(p);
1808 brw_set_predicate_control_flag_value(p, 0xff);
1809 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1810
1811 insn = next_insn(p, BRW_OPCODE_SEND);
1812 insn->header.destreg__conditionalmod = msg_reg_nr;
1813
1814 brw_set_dest(p, insn, dest);
1815 brw_set_src0(p, insn, src);
1816 brw_set_math_message(p,
1817 insn,
1818 function,
1819 BRW_MATH_INTEGER_UNSIGNED,
1820 precision,
1821 BRW_MATH_DATA_VECTOR);
1822
1823 /* Second instruction:
1824 */
1825 insn = next_insn(p, BRW_OPCODE_SEND);
1826 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1827 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1828
1829 brw_set_dest(p, insn, offset(dest,1));
1830 brw_set_src0(p, insn, src);
1831 brw_set_math_message(p,
1832 insn,
1833 function,
1834 BRW_MATH_INTEGER_UNSIGNED,
1835 precision,
1836 BRW_MATH_DATA_VECTOR);
1837
1838 brw_pop_insn_state(p);
1839 }
1840
1841
1842 /**
1843 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1844 * using a constant offset per channel.
1845 *
1846 * The offset must be aligned to oword size (16 bytes). Used for
1847 * register spilling.
1848 */
brw_oword_block_write_scratch(struct brw_compile * p,struct brw_reg mrf,int num_regs,GLuint offset)1849 void brw_oword_block_write_scratch(struct brw_compile *p,
1850 struct brw_reg mrf,
1851 int num_regs,
1852 GLuint offset)
1853 {
1854 struct intel_context *intel = &p->brw->intel;
1855 uint32_t msg_control, msg_type;
1856 int mlen;
1857
1858 if (intel->gen >= 6)
1859 offset /= 16;
1860
1861 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1862
1863 if (num_regs == 1) {
1864 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1865 mlen = 2;
1866 } else {
1867 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1868 mlen = 3;
1869 }
1870
1871 /* Set up the message header. This is g0, with g0.2 filled with
1872 * the offset. We don't want to leave our offset around in g0 or
1873 * it'll screw up texture samples, so set it up inside the message
1874 * reg.
1875 */
1876 {
1877 brw_push_insn_state(p);
1878 brw_set_mask_control(p, BRW_MASK_DISABLE);
1879 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1880
1881 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1882
1883 /* set message header global offset field (reg 0, element 2) */
1884 brw_MOV(p,
1885 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1886 mrf.nr,
1887 2), BRW_REGISTER_TYPE_UD),
1888 brw_imm_ud(offset));
1889
1890 brw_pop_insn_state(p);
1891 }
1892
1893 {
1894 struct brw_reg dest;
1895 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1896 int send_commit_msg;
1897 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1898 BRW_REGISTER_TYPE_UW);
1899
1900 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1901 insn->header.compression_control = BRW_COMPRESSION_NONE;
1902 src_header = vec16(src_header);
1903 }
1904 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1905 insn->header.destreg__conditionalmod = mrf.nr;
1906
1907 /* Until gen6, writes followed by reads from the same location
1908 * are not guaranteed to be ordered unless write_commit is set.
1909 * If set, then a no-op write is issued to the destination
1910 * register to set a dependency, and a read from the destination
1911 * can be used to ensure the ordering.
1912 *
1913 * For gen6, only writes between different threads need ordering
1914 * protection. Our use of DP writes is all about register
1915 * spilling within a thread.
1916 */
1917 if (intel->gen >= 6) {
1918 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1919 send_commit_msg = 0;
1920 } else {
1921 dest = src_header;
1922 send_commit_msg = 1;
1923 }
1924
1925 brw_set_dest(p, insn, dest);
1926 if (intel->gen >= 6) {
1927 brw_set_src0(p, insn, mrf);
1928 } else {
1929 brw_set_src0(p, insn, brw_null_reg());
1930 }
1931
1932 if (intel->gen >= 6)
1933 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1934 else
1935 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1936
1937 brw_set_dp_write_message(p,
1938 insn,
1939 255, /* binding table index (255=stateless) */
1940 msg_control,
1941 msg_type,
1942 mlen,
1943 true, /* header_present */
1944 0, /* not a render target */
1945 send_commit_msg, /* response_length */
1946 0, /* eot */
1947 send_commit_msg);
1948 }
1949 }
1950
1951
1952 /**
1953 * Read a block of owords (half a GRF each) from the scratch buffer
1954 * using a constant index per channel.
1955 *
1956 * Offset must be aligned to oword size (16 bytes). Used for register
1957 * spilling.
1958 */
1959 void
brw_oword_block_read_scratch(struct brw_compile * p,struct brw_reg dest,struct brw_reg mrf,int num_regs,GLuint offset)1960 brw_oword_block_read_scratch(struct brw_compile *p,
1961 struct brw_reg dest,
1962 struct brw_reg mrf,
1963 int num_regs,
1964 GLuint offset)
1965 {
1966 struct intel_context *intel = &p->brw->intel;
1967 uint32_t msg_control;
1968 int rlen;
1969
1970 if (intel->gen >= 6)
1971 offset /= 16;
1972
1973 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1974 dest = retype(dest, BRW_REGISTER_TYPE_UW);
1975
1976 if (num_regs == 1) {
1977 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1978 rlen = 1;
1979 } else {
1980 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1981 rlen = 2;
1982 }
1983
1984 {
1985 brw_push_insn_state(p);
1986 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1987 brw_set_mask_control(p, BRW_MASK_DISABLE);
1988
1989 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1990
1991 /* set message header global offset field (reg 0, element 2) */
1992 brw_MOV(p,
1993 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1994 mrf.nr,
1995 2), BRW_REGISTER_TYPE_UD),
1996 brw_imm_ud(offset));
1997
1998 brw_pop_insn_state(p);
1999 }
2000
2001 {
2002 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2003
2004 assert(insn->header.predicate_control == 0);
2005 insn->header.compression_control = BRW_COMPRESSION_NONE;
2006 insn->header.destreg__conditionalmod = mrf.nr;
2007
2008 brw_set_dest(p, insn, dest); /* UW? */
2009 if (intel->gen >= 6) {
2010 brw_set_src0(p, insn, mrf);
2011 } else {
2012 brw_set_src0(p, insn, brw_null_reg());
2013 }
2014
2015 brw_set_dp_read_message(p,
2016 insn,
2017 255, /* binding table index (255=stateless) */
2018 msg_control,
2019 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
2020 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
2021 1, /* msg_length */
2022 rlen);
2023 }
2024 }
2025
2026 /**
2027 * Read a float[4] vector from the data port Data Cache (const buffer).
2028 * Location (in buffer) should be a multiple of 16.
2029 * Used for fetching shader constants.
2030 */
brw_oword_block_read(struct brw_compile * p,struct brw_reg dest,struct brw_reg mrf,uint32_t offset,uint32_t bind_table_index)2031 void brw_oword_block_read(struct brw_compile *p,
2032 struct brw_reg dest,
2033 struct brw_reg mrf,
2034 uint32_t offset,
2035 uint32_t bind_table_index)
2036 {
2037 struct intel_context *intel = &p->brw->intel;
2038
2039 /* On newer hardware, offset is in units of owords. */
2040 if (intel->gen >= 6)
2041 offset /= 16;
2042
2043 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2044
2045 brw_push_insn_state(p);
2046 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2047 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2048 brw_set_mask_control(p, BRW_MASK_DISABLE);
2049
2050 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2051
2052 /* set message header global offset field (reg 0, element 2) */
2053 brw_MOV(p,
2054 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2055 mrf.nr,
2056 2), BRW_REGISTER_TYPE_UD),
2057 brw_imm_ud(offset));
2058
2059 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2060 insn->header.destreg__conditionalmod = mrf.nr;
2061
2062 /* cast dest to a uword[8] vector */
2063 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
2064
2065 brw_set_dest(p, insn, dest);
2066 if (intel->gen >= 6) {
2067 brw_set_src0(p, insn, mrf);
2068 } else {
2069 brw_set_src0(p, insn, brw_null_reg());
2070 }
2071
2072 brw_set_dp_read_message(p,
2073 insn,
2074 bind_table_index,
2075 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
2076 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
2077 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2078 1, /* msg_length */
2079 1); /* response_length (1 reg, 2 owords!) */
2080
2081 brw_pop_insn_state(p);
2082 }
2083
2084 /**
2085 * Read a set of dwords from the data port Data Cache (const buffer).
2086 *
2087 * Location (in buffer) appears as UD offsets in the register after
2088 * the provided mrf header reg.
2089 */
brw_dword_scattered_read(struct brw_compile * p,struct brw_reg dest,struct brw_reg mrf,uint32_t bind_table_index)2090 void brw_dword_scattered_read(struct brw_compile *p,
2091 struct brw_reg dest,
2092 struct brw_reg mrf,
2093 uint32_t bind_table_index)
2094 {
2095 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2096
2097 brw_push_insn_state(p);
2098 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2099 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2100 brw_set_mask_control(p, BRW_MASK_DISABLE);
2101 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2102 brw_pop_insn_state(p);
2103
2104 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2105 insn->header.destreg__conditionalmod = mrf.nr;
2106
2107 /* cast dest to a uword[8] vector */
2108 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
2109
2110 brw_set_dest(p, insn, dest);
2111 brw_set_src0(p, insn, brw_null_reg());
2112
2113 brw_set_dp_read_message(p,
2114 insn,
2115 bind_table_index,
2116 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
2117 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
2118 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2119 2, /* msg_length */
2120 1); /* response_length */
2121 }
2122
2123
2124
2125 /**
2126 * Read float[4] constant(s) from VS constant buffer.
2127 * For relative addressing, two float[4] constants will be read into 'dest'.
2128 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
2129 */
brw_dp_READ_4_vs(struct brw_compile * p,struct brw_reg dest,GLuint location,GLuint bind_table_index)2130 void brw_dp_READ_4_vs(struct brw_compile *p,
2131 struct brw_reg dest,
2132 GLuint location,
2133 GLuint bind_table_index)
2134 {
2135 struct intel_context *intel = &p->brw->intel;
2136 struct brw_instruction *insn;
2137 GLuint msg_reg_nr = 1;
2138
2139 if (intel->gen >= 6)
2140 location /= 16;
2141
2142 /* Setup MRF[1] with location/offset into const buffer */
2143 brw_push_insn_state(p);
2144 brw_set_access_mode(p, BRW_ALIGN_1);
2145 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2146 brw_set_mask_control(p, BRW_MASK_DISABLE);
2147 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2148 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2),
2149 BRW_REGISTER_TYPE_UD),
2150 brw_imm_ud(location));
2151 brw_pop_insn_state(p);
2152
2153 insn = next_insn(p, BRW_OPCODE_SEND);
2154
2155 insn->header.predicate_control = BRW_PREDICATE_NONE;
2156 insn->header.compression_control = BRW_COMPRESSION_NONE;
2157 insn->header.destreg__conditionalmod = msg_reg_nr;
2158 insn->header.mask_control = BRW_MASK_DISABLE;
2159
2160 brw_set_dest(p, insn, dest);
2161 if (intel->gen >= 6) {
2162 brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
2163 } else {
2164 brw_set_src0(p, insn, brw_null_reg());
2165 }
2166
2167 brw_set_dp_read_message(p,
2168 insn,
2169 bind_table_index,
2170 0,
2171 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
2172 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2173 1, /* msg_length */
2174 1); /* response_length (1 Oword) */
2175 }
2176
2177 /**
2178 * Read a float[4] constant per vertex from VS constant buffer, with
2179 * relative addressing.
2180 */
brw_dp_READ_4_vs_relative(struct brw_compile * p,struct brw_reg dest,struct brw_reg addr_reg,GLuint offset,GLuint bind_table_index)2181 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
2182 struct brw_reg dest,
2183 struct brw_reg addr_reg,
2184 GLuint offset,
2185 GLuint bind_table_index)
2186 {
2187 struct intel_context *intel = &p->brw->intel;
2188 struct brw_reg src = brw_vec8_grf(0, 0);
2189 int msg_type;
2190
2191 /* Setup MRF[1] with offset into const buffer */
2192 brw_push_insn_state(p);
2193 brw_set_access_mode(p, BRW_ALIGN_1);
2194 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2195 brw_set_mask_control(p, BRW_MASK_DISABLE);
2196 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2197
2198 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
2199 * fields ignored.
2200 */
2201 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D),
2202 addr_reg, brw_imm_d(offset));
2203 brw_pop_insn_state(p);
2204
2205 gen6_resolve_implied_move(p, &src, 0);
2206 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2207
2208 insn->header.predicate_control = BRW_PREDICATE_NONE;
2209 insn->header.compression_control = BRW_COMPRESSION_NONE;
2210 insn->header.destreg__conditionalmod = 0;
2211 insn->header.mask_control = BRW_MASK_DISABLE;
2212
2213 brw_set_dest(p, insn, dest);
2214 brw_set_src0(p, insn, src);
2215
2216 if (intel->gen >= 6)
2217 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2218 else if (intel->gen == 5 || intel->is_g4x)
2219 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2220 else
2221 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2222
2223 brw_set_dp_read_message(p,
2224 insn,
2225 bind_table_index,
2226 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
2227 msg_type,
2228 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2229 2, /* msg_length */
2230 1); /* response_length */
2231 }
2232
2233
2234
brw_fb_WRITE(struct brw_compile * p,int dispatch_width,GLuint msg_reg_nr,struct brw_reg src0,GLuint msg_control,GLuint binding_table_index,GLuint msg_length,GLuint response_length,bool eot,bool header_present)2235 void brw_fb_WRITE(struct brw_compile *p,
2236 int dispatch_width,
2237 GLuint msg_reg_nr,
2238 struct brw_reg src0,
2239 GLuint msg_control,
2240 GLuint binding_table_index,
2241 GLuint msg_length,
2242 GLuint response_length,
2243 bool eot,
2244 bool header_present)
2245 {
2246 struct intel_context *intel = &p->brw->intel;
2247 struct brw_instruction *insn;
2248 GLuint msg_type;
2249 struct brw_reg dest;
2250
2251 if (dispatch_width == 16)
2252 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2253 else
2254 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2255
2256 if (intel->gen >= 6) {
2257 insn = next_insn(p, BRW_OPCODE_SENDC);
2258 } else {
2259 insn = next_insn(p, BRW_OPCODE_SEND);
2260 }
2261 /* The execution mask is ignored for render target writes. */
2262 insn->header.predicate_control = 0;
2263 insn->header.compression_control = BRW_COMPRESSION_NONE;
2264
2265 if (intel->gen >= 6) {
2266 /* headerless version, just submit color payload */
2267 src0 = brw_message_reg(msg_reg_nr);
2268
2269 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2270 } else {
2271 insn->header.destreg__conditionalmod = msg_reg_nr;
2272
2273 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2274 }
2275
2276 brw_set_dest(p, insn, dest);
2277 brw_set_src0(p, insn, src0);
2278 brw_set_dp_write_message(p,
2279 insn,
2280 binding_table_index,
2281 msg_control,
2282 msg_type,
2283 msg_length,
2284 header_present,
2285 eot, /* last render target write */
2286 response_length,
2287 eot,
2288 0 /* send_commit_msg */);
2289 }
2290
2291
2292 /**
2293 * Texture sample instruction.
2294 * Note: the msg_type plus msg_length values determine exactly what kind
2295 * of sampling operation is performed. See volume 4, page 161 of docs.
2296 */
brw_SAMPLE(struct brw_compile * p,struct brw_reg dest,GLuint msg_reg_nr,struct brw_reg src0,GLuint binding_table_index,GLuint sampler,GLuint writemask,GLuint msg_type,GLuint response_length,GLuint msg_length,GLuint header_present,GLuint simd_mode,GLuint return_format)2297 void brw_SAMPLE(struct brw_compile *p,
2298 struct brw_reg dest,
2299 GLuint msg_reg_nr,
2300 struct brw_reg src0,
2301 GLuint binding_table_index,
2302 GLuint sampler,
2303 GLuint writemask,
2304 GLuint msg_type,
2305 GLuint response_length,
2306 GLuint msg_length,
2307 GLuint header_present,
2308 GLuint simd_mode,
2309 GLuint return_format)
2310 {
2311 struct intel_context *intel = &p->brw->intel;
2312 bool need_stall = 0;
2313
2314 if (writemask == 0) {
2315 /*printf("%s: zero writemask??\n", __FUNCTION__); */
2316 return;
2317 }
2318
2319 /* Hardware doesn't do destination dependency checking on send
2320 * instructions properly. Add a workaround which generates the
2321 * dependency by other means. In practice it seems like this bug
2322 * only crops up for texture samples, and only where registers are
2323 * written by the send and then written again later without being
2324 * read in between. Luckily for us, we already track that
2325 * information and use it to modify the writemask for the
2326 * instruction, so that is a guide for whether a workaround is
2327 * needed.
2328 */
2329 if (writemask != WRITEMASK_XYZW) {
2330 GLuint dst_offset = 0;
2331 GLuint i, newmask = 0, len = 0;
2332
2333 for (i = 0; i < 4; i++) {
2334 if (writemask & (1<<i))
2335 break;
2336 dst_offset += 2;
2337 }
2338 for (; i < 4; i++) {
2339 if (!(writemask & (1<<i)))
2340 break;
2341 newmask |= 1<<i;
2342 len++;
2343 }
2344
2345 if (newmask != writemask) {
2346 need_stall = 1;
2347 /* printf("need stall %x %x\n", newmask , writemask); */
2348 }
2349 else {
2350 bool dispatch_16 = false;
2351
2352 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
2353
2354 guess_execution_size(p, p->current, dest);
2355 if (p->current->header.execution_size == BRW_EXECUTE_16)
2356 dispatch_16 = true;
2357
2358 newmask = ~newmask & WRITEMASK_XYZW;
2359
2360 brw_push_insn_state(p);
2361
2362 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2363 brw_set_mask_control(p, BRW_MASK_DISABLE);
2364
2365 brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
2366 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
2367 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
2368
2369 brw_pop_insn_state(p);
2370
2371 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
2372 dest = offset(dest, dst_offset);
2373
2374 /* For 16-wide dispatch, masked channels are skipped in the
2375 * response. For 8-wide, masked channels still take up slots,
2376 * and are just not written to.
2377 */
2378 if (dispatch_16)
2379 response_length = len * 2;
2380 }
2381 }
2382
2383 {
2384 struct brw_instruction *insn;
2385
2386 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2387
2388 insn = next_insn(p, BRW_OPCODE_SEND);
2389 insn->header.predicate_control = 0; /* XXX */
2390 insn->header.compression_control = BRW_COMPRESSION_NONE;
2391 if (intel->gen < 6)
2392 insn->header.destreg__conditionalmod = msg_reg_nr;
2393
2394 brw_set_dest(p, insn, dest);
2395 brw_set_src0(p, insn, src0);
2396 brw_set_sampler_message(p, insn,
2397 binding_table_index,
2398 sampler,
2399 msg_type,
2400 response_length,
2401 msg_length,
2402 header_present,
2403 simd_mode,
2404 return_format);
2405 }
2406
2407 if (need_stall) {
2408 struct brw_reg reg = vec8(offset(dest, response_length-1));
2409
2410 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
2411 */
2412 brw_push_insn_state(p);
2413 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2414 brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD),
2415 retype(reg, BRW_REGISTER_TYPE_UD));
2416 brw_pop_insn_state(p);
2417 }
2418
2419 }
2420
2421 /* All these variables are pretty confusing - we might be better off
2422 * using bitmasks and macros for this, in the old style. Or perhaps
2423 * just having the caller instantiate the fields in dword3 itself.
2424 */
brw_urb_WRITE(struct brw_compile * p,struct brw_reg dest,GLuint msg_reg_nr,struct brw_reg src0,bool allocate,bool used,GLuint msg_length,GLuint response_length,bool eot,bool writes_complete,GLuint offset,GLuint swizzle)2425 void brw_urb_WRITE(struct brw_compile *p,
2426 struct brw_reg dest,
2427 GLuint msg_reg_nr,
2428 struct brw_reg src0,
2429 bool allocate,
2430 bool used,
2431 GLuint msg_length,
2432 GLuint response_length,
2433 bool eot,
2434 bool writes_complete,
2435 GLuint offset,
2436 GLuint swizzle)
2437 {
2438 struct intel_context *intel = &p->brw->intel;
2439 struct brw_instruction *insn;
2440
2441 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2442
2443 if (intel->gen == 7) {
2444 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2445 brw_push_insn_state(p);
2446 brw_set_access_mode(p, BRW_ALIGN_1);
2447 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2448 BRW_REGISTER_TYPE_UD),
2449 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2450 brw_imm_ud(0xff00));
2451 brw_pop_insn_state(p);
2452 }
2453
2454 insn = next_insn(p, BRW_OPCODE_SEND);
2455
2456 assert(msg_length < BRW_MAX_MRF);
2457
2458 brw_set_dest(p, insn, dest);
2459 brw_set_src0(p, insn, src0);
2460 brw_set_src1(p, insn, brw_imm_d(0));
2461
2462 if (intel->gen < 6)
2463 insn->header.destreg__conditionalmod = msg_reg_nr;
2464
2465 brw_set_urb_message(p,
2466 insn,
2467 allocate,
2468 used,
2469 msg_length,
2470 response_length,
2471 eot,
2472 writes_complete,
2473 offset,
2474 swizzle);
2475 }
2476
2477 static int
brw_find_next_block_end(struct brw_compile * p,int start)2478 brw_find_next_block_end(struct brw_compile *p, int start)
2479 {
2480 int ip;
2481
2482 for (ip = start + 1; ip < p->nr_insn; ip++) {
2483 struct brw_instruction *insn = &p->store[ip];
2484
2485 switch (insn->header.opcode) {
2486 case BRW_OPCODE_ENDIF:
2487 case BRW_OPCODE_ELSE:
2488 case BRW_OPCODE_WHILE:
2489 return ip;
2490 }
2491 }
2492 assert(!"not reached");
2493 return start + 1;
2494 }
2495
2496 /* There is no DO instruction on gen6, so to find the end of the loop
2497 * we have to see if the loop is jumping back before our start
2498 * instruction.
2499 */
2500 static int
brw_find_loop_end(struct brw_compile * p,int start)2501 brw_find_loop_end(struct brw_compile *p, int start)
2502 {
2503 struct intel_context *intel = &p->brw->intel;
2504 int ip;
2505 int br = 2;
2506
2507 for (ip = start + 1; ip < p->nr_insn; ip++) {
2508 struct brw_instruction *insn = &p->store[ip];
2509
2510 if (insn->header.opcode == BRW_OPCODE_WHILE) {
2511 int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
2512 : insn->bits3.break_cont.jip;
2513 if (ip + jip / br <= start)
2514 return ip;
2515 }
2516 }
2517 assert(!"not reached");
2518 return start + 1;
2519 }
2520
2521 /* After program generation, go back and update the UIP and JIP of
2522 * BREAK and CONT instructions to their correct locations.
2523 */
2524 void
brw_set_uip_jip(struct brw_compile * p)2525 brw_set_uip_jip(struct brw_compile *p)
2526 {
2527 struct intel_context *intel = &p->brw->intel;
2528 int ip;
2529 int br = 2;
2530
2531 if (intel->gen < 6)
2532 return;
2533
2534 for (ip = 0; ip < p->nr_insn; ip++) {
2535 struct brw_instruction *insn = &p->store[ip];
2536
2537 switch (insn->header.opcode) {
2538 case BRW_OPCODE_BREAK:
2539 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2540 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2541 insn->bits3.break_cont.uip =
2542 br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0));
2543 break;
2544 case BRW_OPCODE_CONTINUE:
2545 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2546 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
2547
2548 assert(insn->bits3.break_cont.uip != 0);
2549 assert(insn->bits3.break_cont.jip != 0);
2550 break;
2551 }
2552 }
2553 }
2554
brw_ff_sync(struct brw_compile * p,struct brw_reg dest,GLuint msg_reg_nr,struct brw_reg src0,bool allocate,GLuint response_length,bool eot)2555 void brw_ff_sync(struct brw_compile *p,
2556 struct brw_reg dest,
2557 GLuint msg_reg_nr,
2558 struct brw_reg src0,
2559 bool allocate,
2560 GLuint response_length,
2561 bool eot)
2562 {
2563 struct intel_context *intel = &p->brw->intel;
2564 struct brw_instruction *insn;
2565
2566 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2567
2568 insn = next_insn(p, BRW_OPCODE_SEND);
2569 brw_set_dest(p, insn, dest);
2570 brw_set_src0(p, insn, src0);
2571 brw_set_src1(p, insn, brw_imm_d(0));
2572
2573 if (intel->gen < 6)
2574 insn->header.destreg__conditionalmod = msg_reg_nr;
2575
2576 brw_set_ff_sync_message(p,
2577 insn,
2578 allocate,
2579 response_length,
2580 eot);
2581 }
2582
2583 /**
2584 * Emit the SEND instruction necessary to generate stream output data on Gen6
2585 * (for transform feedback).
2586 *
2587 * If send_commit_msg is true, this is the last piece of stream output data
2588 * from this thread, so send the data as a committed write. According to the
2589 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2590 *
2591 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2592 * writes are complete by sending the final write as a committed write."
2593 */
2594 void
brw_svb_write(struct brw_compile * p,struct brw_reg dest,GLuint msg_reg_nr,struct brw_reg src0,GLuint binding_table_index,bool send_commit_msg)2595 brw_svb_write(struct brw_compile *p,
2596 struct brw_reg dest,
2597 GLuint msg_reg_nr,
2598 struct brw_reg src0,
2599 GLuint binding_table_index,
2600 bool send_commit_msg)
2601 {
2602 struct brw_instruction *insn;
2603
2604 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2605
2606 insn = next_insn(p, BRW_OPCODE_SEND);
2607 brw_set_dest(p, insn, dest);
2608 brw_set_src0(p, insn, src0);
2609 brw_set_src1(p, insn, brw_imm_d(0));
2610 brw_set_dp_write_message(p, insn,
2611 binding_table_index,
2612 0, /* msg_control: ignored */
2613 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
2614 1, /* msg_length */
2615 true, /* header_present */
2616 0, /* last_render_target: ignored */
2617 send_commit_msg, /* response_length */
2618 0, /* end_of_thread */
2619 send_commit_msg); /* send_commit_msg */
2620 }
2621