1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4  develop this 3D driver.
5 
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13 
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <keith@tungstengraphics.com>
30   */
31 
32 
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 
37 #include "intel_batchbuffer.h"
38 
39 #include "brw_defines.h"
40 #include "brw_context.h"
41 #include "brw_eu.h"
42 #include "brw_util.h"
43 #include "brw_sf.h"
44 
45 
46 /**
47  * Determine the vert_result corresponding to the given half of the given
48  * register.  half=0 means the first half of a register, half=1 means the
49  * second half.
50  */
vert_reg_to_vert_result(struct brw_sf_compile * c,GLuint reg,int half)51 static inline int vert_reg_to_vert_result(struct brw_sf_compile *c, GLuint reg,
52                                           int half)
53 {
54    int vue_slot = (reg + c->urb_entry_read_offset) * 2 + half;
55    return c->vue_map.slot_to_vert_result[vue_slot];
56 }
57 
58 /**
59  * Determine the register corresponding to the given vert_result.
60  */
get_vert_result(struct brw_sf_compile * c,struct brw_reg vert,GLuint vert_result)61 static struct brw_reg get_vert_result(struct brw_sf_compile *c,
62                                       struct brw_reg vert,
63                                       GLuint vert_result)
64 {
65    int vue_slot = c->vue_map.vert_result_to_slot[vert_result];
66    assert (vue_slot >= c->urb_entry_read_offset);
67    GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
68    GLuint sub = vue_slot % 2;
69 
70    return brw_vec4_grf(vert.nr + off, sub * 4);
71 }
72 
73 static bool
have_attr(struct brw_sf_compile * c,GLuint attr)74 have_attr(struct brw_sf_compile *c, GLuint attr)
75 {
76    return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0;
77 }
78 
79 /***********************************************************************
80  * Twoside lighting
81  */
copy_bfc(struct brw_sf_compile * c,struct brw_reg vert)82 static void copy_bfc( struct brw_sf_compile *c,
83 		      struct brw_reg vert )
84 {
85    struct brw_compile *p = &c->func;
86    GLuint i;
87 
88    for (i = 0; i < 2; i++) {
89       if (have_attr(c, VERT_RESULT_COL0+i) &&
90 	  have_attr(c, VERT_RESULT_BFC0+i))
91 	 brw_MOV(p,
92 		 get_vert_result(c, vert, VERT_RESULT_COL0+i),
93 		 get_vert_result(c, vert, VERT_RESULT_BFC0+i));
94    }
95 }
96 
97 
do_twoside_color(struct brw_sf_compile * c)98 static void do_twoside_color( struct brw_sf_compile *c )
99 {
100    struct brw_compile *p = &c->func;
101    GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
102 
103    /* Already done in clip program:
104     */
105    if (c->key.primitive == SF_UNFILLED_TRIS)
106       return;
107 
108    /* XXX: What happens if BFC isn't present?  This could only happen
109     * for user-supplied vertex programs, as t_vp_build.c always does
110     * the right thing.
111     */
112    if (!(have_attr(c, VERT_RESULT_COL0) && have_attr(c, VERT_RESULT_BFC0)) &&
113        !(have_attr(c, VERT_RESULT_COL1) && have_attr(c, VERT_RESULT_BFC1)))
114       return;
115 
116    /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
117     * to get all channels active inside the IF.  In the clipping code
118     * we run with NoMask, so it's not an option and we can use
119     * BRW_EXECUTE_1 for all comparisions.
120     */
121    brw_push_insn_state(p);
122    brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
123    brw_IF(p, BRW_EXECUTE_4);
124    {
125       switch (c->nr_verts) {
126       case 3: copy_bfc(c, c->vert[2]);
127       case 2: copy_bfc(c, c->vert[1]);
128       case 1: copy_bfc(c, c->vert[0]);
129       }
130    }
131    brw_ENDIF(p);
132    brw_pop_insn_state(p);
133 }
134 
135 
136 
137 /***********************************************************************
138  * Flat shading
139  */
140 
141 #define VERT_RESULT_COLOR_BITS (BITFIELD64_BIT(VERT_RESULT_COL0) | \
142 				BITFIELD64_BIT(VERT_RESULT_COL1))
143 
copy_colors(struct brw_sf_compile * c,struct brw_reg dst,struct brw_reg src)144 static void copy_colors( struct brw_sf_compile *c,
145 		     struct brw_reg dst,
146 		     struct brw_reg src)
147 {
148    struct brw_compile *p = &c->func;
149    GLuint i;
150 
151    for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) {
152       if (have_attr(c,i))
153 	 brw_MOV(p,
154 		 get_vert_result(c, dst, i),
155 		 get_vert_result(c, src, i));
156    }
157 }
158 
159 
160 
161 /* Need to use a computed jump to copy flatshaded attributes as the
162  * vertices are ordered according to y-coordinate before reaching this
163  * point, so the PV could be anywhere.
164  */
do_flatshade_triangle(struct brw_sf_compile * c)165 static void do_flatshade_triangle( struct brw_sf_compile *c )
166 {
167    struct brw_compile *p = &c->func;
168    struct intel_context *intel = &p->brw->intel;
169    struct brw_reg ip = brw_ip_reg();
170    GLuint nr = _mesa_bitcount_64(c->key.attrs & VERT_RESULT_COLOR_BITS);
171    GLuint jmpi = 1;
172 
173    if (!nr)
174       return;
175 
176    /* Already done in clip program:
177     */
178    if (c->key.primitive == SF_UNFILLED_TRIS)
179       return;
180 
181    if (intel->gen == 5)
182        jmpi = 2;
183 
184    brw_push_insn_state(p);
185 
186    brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
187    brw_JMPI(p, ip, ip, c->pv);
188 
189    copy_colors(c, c->vert[1], c->vert[0]);
190    copy_colors(c, c->vert[2], c->vert[0]);
191    brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1)));
192 
193    copy_colors(c, c->vert[0], c->vert[1]);
194    copy_colors(c, c->vert[2], c->vert[1]);
195    brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2));
196 
197    copy_colors(c, c->vert[0], c->vert[2]);
198    copy_colors(c, c->vert[1], c->vert[2]);
199 
200    brw_pop_insn_state(p);
201 }
202 
203 
do_flatshade_line(struct brw_sf_compile * c)204 static void do_flatshade_line( struct brw_sf_compile *c )
205 {
206    struct brw_compile *p = &c->func;
207    struct intel_context *intel = &p->brw->intel;
208    struct brw_reg ip = brw_ip_reg();
209    GLuint nr = _mesa_bitcount_64(c->key.attrs & VERT_RESULT_COLOR_BITS);
210    GLuint jmpi = 1;
211 
212    if (!nr)
213       return;
214 
215    /* Already done in clip program:
216     */
217    if (c->key.primitive == SF_UNFILLED_TRIS)
218       return;
219 
220    if (intel->gen == 5)
221        jmpi = 2;
222 
223    brw_push_insn_state(p);
224 
225    brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
226    brw_JMPI(p, ip, ip, c->pv);
227    copy_colors(c, c->vert[1], c->vert[0]);
228 
229    brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr));
230    copy_colors(c, c->vert[0], c->vert[1]);
231 
232    brw_pop_insn_state(p);
233 }
234 
235 
236 
237 /***********************************************************************
238  * Triangle setup.
239  */
240 
241 
alloc_regs(struct brw_sf_compile * c)242 static void alloc_regs( struct brw_sf_compile *c )
243 {
244    GLuint reg, i;
245 
246    /* Values computed by fixed function unit:
247     */
248    c->pv  = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D);
249    c->det = brw_vec1_grf(1, 2);
250    c->dx0 = brw_vec1_grf(1, 3);
251    c->dx2 = brw_vec1_grf(1, 4);
252    c->dy0 = brw_vec1_grf(1, 5);
253    c->dy2 = brw_vec1_grf(1, 6);
254 
255    /* z and 1/w passed in seperately:
256     */
257    c->z[0]     = brw_vec1_grf(2, 0);
258    c->inv_w[0] = brw_vec1_grf(2, 1);
259    c->z[1]     = brw_vec1_grf(2, 2);
260    c->inv_w[1] = brw_vec1_grf(2, 3);
261    c->z[2]     = brw_vec1_grf(2, 4);
262    c->inv_w[2] = brw_vec1_grf(2, 5);
263 
264    /* The vertices:
265     */
266    reg = 3;
267    for (i = 0; i < c->nr_verts; i++) {
268       c->vert[i] = brw_vec8_grf(reg, 0);
269       reg += c->nr_attr_regs;
270    }
271 
272    /* Temporaries, allocated after last vertex reg.
273     */
274    c->inv_det = brw_vec1_grf(reg, 0);  reg++;
275    c->a1_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
276    c->a2_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
277    c->tmp = brw_vec8_grf(reg, 0);  reg++;
278 
279    /* Note grf allocation:
280     */
281    c->prog_data.total_grf = reg;
282 
283 
284    /* Outputs of this program - interpolation coefficients for
285     * rasterization:
286     */
287    c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
288    c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
289    c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
290 }
291 
292 
copy_z_inv_w(struct brw_sf_compile * c)293 static void copy_z_inv_w( struct brw_sf_compile *c )
294 {
295    struct brw_compile *p = &c->func;
296    GLuint i;
297 
298    brw_push_insn_state(p);
299 
300    /* Copy both scalars with a single MOV:
301     */
302    for (i = 0; i < c->nr_verts; i++)
303       brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
304 
305    brw_pop_insn_state(p);
306 }
307 
308 
invert_det(struct brw_sf_compile * c)309 static void invert_det( struct brw_sf_compile *c)
310 {
311    /* Looks like we invert all 8 elements just to get 1/det in
312     * position 2 !?!
313     */
314    brw_math(&c->func,
315 	    c->inv_det,
316 	    BRW_MATH_FUNCTION_INV,
317 	    0,
318 	    c->det,
319 	    BRW_MATH_DATA_SCALAR,
320 	    BRW_MATH_PRECISION_FULL);
321 
322 }
323 
324 
325 static bool
calculate_masks(struct brw_sf_compile * c,GLuint reg,GLushort * pc,GLushort * pc_persp,GLushort * pc_linear)326 calculate_masks(struct brw_sf_compile *c,
327 	        GLuint reg,
328 		GLushort *pc,
329 		GLushort *pc_persp,
330 		GLushort *pc_linear)
331 {
332    bool is_last_attr = (reg == c->nr_setup_regs - 1);
333    GLbitfield64 persp_mask;
334    GLbitfield64 linear_mask;
335 
336    if (c->key.do_flat_shading)
337       persp_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_HPOS) |
338                                     BITFIELD64_BIT(VERT_RESULT_COL0) |
339                                     BITFIELD64_BIT(VERT_RESULT_COL1));
340    else
341       persp_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_HPOS));
342 
343    if (c->key.do_flat_shading)
344       linear_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_COL0) |
345                                      BITFIELD64_BIT(VERT_RESULT_COL1));
346    else
347       linear_mask = c->key.attrs;
348 
349    *pc_persp = 0;
350    *pc_linear = 0;
351    *pc = 0xf;
352 
353    if (persp_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 0)))
354       *pc_persp = 0xf;
355 
356    if (linear_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 0)))
357       *pc_linear = 0xf;
358 
359    /* Maybe only processs one attribute on the final round:
360     */
361    if (vert_reg_to_vert_result(c, reg, 1) != BRW_VERT_RESULT_MAX) {
362       *pc |= 0xf0;
363 
364       if (persp_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 1)))
365 	 *pc_persp |= 0xf0;
366 
367       if (linear_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 1)))
368 	 *pc_linear |= 0xf0;
369    }
370 
371    return is_last_attr;
372 }
373 
374 /* Calculates the predicate control for which channels of a reg
375  * (containing 2 attrs) to do point sprite coordinate replacement on.
376  */
377 static uint16_t
calculate_point_sprite_mask(struct brw_sf_compile * c,GLuint reg)378 calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg)
379 {
380    int vert_result1, vert_result2;
381    uint16_t pc = 0;
382 
383    vert_result1 = vert_reg_to_vert_result(c, reg, 0);
384    if (vert_result1 >= VERT_RESULT_TEX0 && vert_result1 <= VERT_RESULT_TEX7) {
385       if (c->key.point_sprite_coord_replace & (1 << (vert_result1 - VERT_RESULT_TEX0)))
386 	 pc |= 0x0f;
387    }
388    if (vert_result1 == BRW_VERT_RESULT_PNTC)
389       pc |= 0x0f;
390 
391    vert_result2 = vert_reg_to_vert_result(c, reg, 1);
392    if (vert_result2 >= VERT_RESULT_TEX0 && vert_result2 <= VERT_RESULT_TEX7) {
393       if (c->key.point_sprite_coord_replace & (1 << (vert_result2 -
394                                                      VERT_RESULT_TEX0)))
395          pc |= 0xf0;
396    }
397    if (vert_result2 == BRW_VERT_RESULT_PNTC)
398       pc |= 0xf0;
399 
400    return pc;
401 }
402 
403 
404 
brw_emit_tri_setup(struct brw_sf_compile * c,bool allocate)405 void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
406 {
407    struct brw_compile *p = &c->func;
408    GLuint i;
409 
410    c->nr_verts = 3;
411 
412    if (allocate)
413       alloc_regs(c);
414 
415    invert_det(c);
416    copy_z_inv_w(c);
417 
418    if (c->key.do_twoside_color)
419       do_twoside_color(c);
420 
421    if (c->key.do_flat_shading)
422       do_flatshade_triangle(c);
423 
424 
425    for (i = 0; i < c->nr_setup_regs; i++)
426    {
427       /* Pair of incoming attributes:
428        */
429       struct brw_reg a0 = offset(c->vert[0], i);
430       struct brw_reg a1 = offset(c->vert[1], i);
431       struct brw_reg a2 = offset(c->vert[2], i);
432       GLushort pc, pc_persp, pc_linear;
433       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
434 
435       if (pc_persp)
436       {
437 	 brw_set_predicate_control_flag_value(p, pc_persp);
438 	 brw_MUL(p, a0, a0, c->inv_w[0]);
439 	 brw_MUL(p, a1, a1, c->inv_w[1]);
440 	 brw_MUL(p, a2, a2, c->inv_w[2]);
441       }
442 
443 
444       /* Calculate coefficients for interpolated values:
445        */
446       if (pc_linear)
447       {
448 	 brw_set_predicate_control_flag_value(p, pc_linear);
449 
450 	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
451 	 brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
452 
453 	 /* calculate dA/dx
454 	  */
455 	 brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
456 	 brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
457 	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
458 
459 	 /* calculate dA/dy
460 	  */
461 	 brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
462 	 brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
463 	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
464       }
465 
466       {
467 	 brw_set_predicate_control_flag_value(p, pc);
468 	 /* start point for interpolation
469 	  */
470 	 brw_MOV(p, c->m3C0, a0);
471 
472 	 /* Copy m0..m3 to URB.  m0 is implicitly copied from r0 in
473 	  * the send instruction:
474 	  */
475 	 brw_urb_WRITE(p,
476 		       brw_null_reg(),
477 		       0,
478 		       brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
479 		       0, 	/* allocate */
480 		       1,	/* used */
481 		       4, 	/* msg len */
482 		       0,	/* response len */
483 		       last,	/* eot */
484 		       last, 	/* writes complete */
485 		       i*4,	/* offset */
486 		       BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
487       }
488    }
489 }
490 
491 
492 
brw_emit_line_setup(struct brw_sf_compile * c,bool allocate)493 void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)
494 {
495    struct brw_compile *p = &c->func;
496    GLuint i;
497 
498 
499    c->nr_verts = 2;
500 
501    if (allocate)
502       alloc_regs(c);
503 
504    invert_det(c);
505    copy_z_inv_w(c);
506 
507    if (c->key.do_flat_shading)
508       do_flatshade_line(c);
509 
510    for (i = 0; i < c->nr_setup_regs; i++)
511    {
512       /* Pair of incoming attributes:
513        */
514       struct brw_reg a0 = offset(c->vert[0], i);
515       struct brw_reg a1 = offset(c->vert[1], i);
516       GLushort pc, pc_persp, pc_linear;
517       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
518 
519       if (pc_persp)
520       {
521 	 brw_set_predicate_control_flag_value(p, pc_persp);
522 	 brw_MUL(p, a0, a0, c->inv_w[0]);
523 	 brw_MUL(p, a1, a1, c->inv_w[1]);
524       }
525 
526       /* Calculate coefficients for position, color:
527        */
528       if (pc_linear) {
529 	 brw_set_predicate_control_flag_value(p, pc_linear);
530 
531 	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
532 
533  	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
534 	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
535 
536 	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
537 	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
538       }
539 
540       {
541 	 brw_set_predicate_control_flag_value(p, pc);
542 
543 	 /* start point for interpolation
544 	  */
545 	 brw_MOV(p, c->m3C0, a0);
546 
547 	 /* Copy m0..m3 to URB.
548 	  */
549 	 brw_urb_WRITE(p,
550 		       brw_null_reg(),
551 		       0,
552 		       brw_vec8_grf(0, 0),
553 		       0, 	/* allocate */
554 		       1, 	/* used */
555 		       4, 	/* msg len */
556 		       0,	/* response len */
557 		       last, 	/* eot */
558 		       last, 	/* writes complete */
559 		       i*4,	/* urb destination offset */
560 		       BRW_URB_SWIZZLE_TRANSPOSE);
561       }
562    }
563 }
564 
brw_emit_point_sprite_setup(struct brw_sf_compile * c,bool allocate)565 void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
566 {
567    struct brw_compile *p = &c->func;
568    GLuint i;
569 
570    c->nr_verts = 1;
571 
572    if (allocate)
573       alloc_regs(c);
574 
575    copy_z_inv_w(c);
576    for (i = 0; i < c->nr_setup_regs; i++)
577    {
578       struct brw_reg a0 = offset(c->vert[0], i);
579       GLushort pc, pc_persp, pc_linear, pc_coord_replace;
580       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
581 
582       pc_coord_replace = calculate_point_sprite_mask(c, i);
583       pc_persp &= ~pc_coord_replace;
584 
585       if (pc_persp) {
586 	 brw_set_predicate_control_flag_value(p, pc_persp);
587 	 brw_MUL(p, a0, a0, c->inv_w[0]);
588       }
589 
590       /* Point sprite coordinate replacement: A texcoord with this
591        * enabled gets replaced with the value (x, y, 0, 1) where x and
592        * y vary from 0 to 1 across the horizontal and vertical of the
593        * point.
594        */
595       if (pc_coord_replace) {
596 	 brw_set_predicate_control_flag_value(p, pc_coord_replace);
597 	 /* Caculate 1.0/PointWidth */
598 	 brw_math(&c->func,
599 		  c->tmp,
600 		  BRW_MATH_FUNCTION_INV,
601 		  0,
602 		  c->dx0,
603 		  BRW_MATH_DATA_SCALAR,
604 		  BRW_MATH_PRECISION_FULL);
605 
606 	 brw_set_access_mode(p, BRW_ALIGN_16);
607 
608 	 /* dA/dx, dA/dy */
609 	 brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
610 	 brw_MOV(p, c->m2Cy, brw_imm_f(0.0));
611 	 brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
612 	 if (c->key.sprite_origin_lower_left) {
613 	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
614 	 } else {
615 	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
616 	 }
617 
618 	 /* attribute constant offset */
619 	 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
620 	 if (c->key.sprite_origin_lower_left) {
621 	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));
622 	 } else {
623 	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
624 	 }
625 
626 	 brw_set_access_mode(p, BRW_ALIGN_1);
627       }
628 
629       if (pc & ~pc_coord_replace) {
630 	 brw_set_predicate_control_flag_value(p, pc & ~pc_coord_replace);
631 	 brw_MOV(p, c->m1Cx, brw_imm_ud(0));
632 	 brw_MOV(p, c->m2Cy, brw_imm_ud(0));
633 	 brw_MOV(p, c->m3C0, a0); /* constant value */
634       }
635 
636 
637       brw_set_predicate_control_flag_value(p, pc);
638       /* Copy m0..m3 to URB. */
639       brw_urb_WRITE(p,
640 		    brw_null_reg(),
641 		    0,
642 		    brw_vec8_grf(0, 0),
643 		    0, 	/* allocate */
644 		    1,	/* used */
645 		    4, 	/* msg len */
646 		    0,	/* response len */
647 		    last, 	/* eot */
648 		    last, 	/* writes complete */
649 		    i*4,	/* urb destination offset */
650 		    BRW_URB_SWIZZLE_TRANSPOSE);
651    }
652 }
653 
654 /* Points setup - several simplifications as all attributes are
655  * constant across the face of the point (point sprites excluded!)
656  */
brw_emit_point_setup(struct brw_sf_compile * c,bool allocate)657 void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate)
658 {
659    struct brw_compile *p = &c->func;
660    GLuint i;
661 
662    c->nr_verts = 1;
663 
664    if (allocate)
665       alloc_regs(c);
666 
667    copy_z_inv_w(c);
668 
669    brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
670    brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
671 
672    for (i = 0; i < c->nr_setup_regs; i++)
673    {
674       struct brw_reg a0 = offset(c->vert[0], i);
675       GLushort pc, pc_persp, pc_linear;
676       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
677 
678       if (pc_persp)
679       {
680 	 /* This seems odd as the values are all constant, but the
681 	  * fragment shader will be expecting it:
682 	  */
683 	 brw_set_predicate_control_flag_value(p, pc_persp);
684 	 brw_MUL(p, a0, a0, c->inv_w[0]);
685       }
686 
687 
688       /* The delta values are always zero, just send the starting
689        * coordinate.  Again, this is to fit in with the interpolation
690        * code in the fragment shader.
691        */
692       {
693 	 brw_set_predicate_control_flag_value(p, pc);
694 
695 	 brw_MOV(p, c->m3C0, a0); /* constant value */
696 
697 	 /* Copy m0..m3 to URB.
698 	  */
699 	 brw_urb_WRITE(p,
700 		       brw_null_reg(),
701 		       0,
702 		       brw_vec8_grf(0, 0),
703 		       0, 	/* allocate */
704 		       1,	/* used */
705 		       4, 	/* msg len */
706 		       0,	/* response len */
707 		       last, 	/* eot */
708 		       last, 	/* writes complete */
709 		       i*4,	/* urb destination offset */
710 		       BRW_URB_SWIZZLE_TRANSPOSE);
711       }
712    }
713 }
714 
brw_emit_anyprim_setup(struct brw_sf_compile * c)715 void brw_emit_anyprim_setup( struct brw_sf_compile *c )
716 {
717    struct brw_compile *p = &c->func;
718    struct brw_reg ip = brw_ip_reg();
719    struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
720    struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
721    struct brw_reg primmask;
722    int jmp;
723    struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
724 
725    GLuint saveflag;
726 
727    c->nr_verts = 3;
728    alloc_regs(c);
729 
730    primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
731 
732    brw_MOV(p, primmask, brw_imm_ud(1));
733    brw_SHL(p, primmask, primmask, payload_prim);
734 
735    brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
736    brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
737 					       (1<<_3DPRIM_TRISTRIP) |
738 					       (1<<_3DPRIM_TRIFAN) |
739 					       (1<<_3DPRIM_TRISTRIP_REVERSE) |
740 					       (1<<_3DPRIM_POLYGON) |
741 					       (1<<_3DPRIM_RECTLIST) |
742 					       (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
743    jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)) - p->store;
744    {
745       saveflag = p->flag_value;
746       brw_push_insn_state(p);
747       brw_emit_tri_setup( c, false );
748       brw_pop_insn_state(p);
749       p->flag_value = saveflag;
750       /* note - thread killed in subroutine, so must
751        * restore the flag which is changed when building
752        * the subroutine. fix #13240
753        */
754    }
755    brw_land_fwd_jump(p, jmp);
756 
757    brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
758    brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
759 					       (1<<_3DPRIM_LINESTRIP) |
760 					       (1<<_3DPRIM_LINELOOP) |
761 					       (1<<_3DPRIM_LINESTRIP_CONT) |
762 					       (1<<_3DPRIM_LINESTRIP_BF) |
763 					       (1<<_3DPRIM_LINESTRIP_CONT_BF)));
764    jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)) - p->store;
765    {
766       saveflag = p->flag_value;
767       brw_push_insn_state(p);
768       brw_emit_line_setup( c, false );
769       brw_pop_insn_state(p);
770       p->flag_value = saveflag;
771       /* note - thread killed in subroutine */
772    }
773    brw_land_fwd_jump(p, jmp);
774 
775    brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
776    brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
777    jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)) - p->store;
778    {
779       saveflag = p->flag_value;
780       brw_push_insn_state(p);
781       brw_emit_point_sprite_setup( c, false );
782       brw_pop_insn_state(p);
783       p->flag_value = saveflag;
784    }
785    brw_land_fwd_jump(p, jmp);
786 
787    brw_emit_point_setup( c, false );
788 }
789 
790 
791 
792 
793