1 /**************************************************************************
2  *
3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /*
29  * Binning code for triangles
30  */
31 
32 #include "util/u_math.h"
33 #include "util/u_memory.h"
34 #include "util/u_rect.h"
35 #include "util/u_sse.h"
36 #include "lp_perf.h"
37 #include "lp_setup_context.h"
38 #include "lp_rast.h"
39 #include "lp_state_fs.h"
40 #include "lp_state_setup.h"
41 
42 #define NUM_CHANNELS 4
43 
44 #if defined(PIPE_ARCH_SSE)
45 #include <emmintrin.h>
46 #endif
47 
48 static INLINE int
subpixel_snap(float a)49 subpixel_snap(float a)
50 {
51    return util_iround(FIXED_ONE * a);
52 }
53 
54 static INLINE float
fixed_to_float(int a)55 fixed_to_float(int a)
56 {
57    return a * (1.0 / FIXED_ONE);
58 }
59 
60 
61 /* Position and area in fixed point coordinates */
62 struct fixed_position {
63    int x[4];
64    int y[4];
65    int area;
66    int dx01;
67    int dy01;
68    int dx20;
69    int dy20;
70 };
71 
72 
73 /**
74  * Alloc space for a new triangle plus the input.a0/dadx/dady arrays
75  * immediately after it.
76  * The memory is allocated from the per-scene pool, not per-tile.
77  * \param tri_size  returns number of bytes allocated
78  * \param num_inputs  number of fragment shader inputs
79  * \return pointer to triangle space
80  */
81 struct lp_rast_triangle *
lp_setup_alloc_triangle(struct lp_scene * scene,unsigned nr_inputs,unsigned nr_planes,unsigned * tri_size)82 lp_setup_alloc_triangle(struct lp_scene *scene,
83                         unsigned nr_inputs,
84                         unsigned nr_planes,
85                         unsigned *tri_size)
86 {
87    unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float);
88    unsigned plane_sz = nr_planes * sizeof(struct lp_rast_plane);
89    struct lp_rast_triangle *tri;
90 
91    *tri_size = (sizeof(struct lp_rast_triangle) +
92                 3 * input_array_sz +
93                 plane_sz);
94 
95    tri = lp_scene_alloc_aligned( scene, *tri_size, 16 );
96    if (tri == NULL)
97       return NULL;
98 
99    tri->inputs.stride = input_array_sz;
100 
101    {
102       char *a = (char *)tri;
103       char *b = (char *)&GET_PLANES(tri)[nr_planes];
104       assert(b - a == *tri_size);
105    }
106 
107    return tri;
108 }
109 
110 void
lp_setup_print_vertex(struct lp_setup_context * setup,const char * name,const float (* v)[4])111 lp_setup_print_vertex(struct lp_setup_context *setup,
112                       const char *name,
113                       const float (*v)[4])
114 {
115    const struct lp_setup_variant_key *key = &setup->setup.variant->key;
116    int i, j;
117 
118    debug_printf("   wpos (%s[0]) xyzw %f %f %f %f\n",
119                 name,
120                 v[0][0], v[0][1], v[0][2], v[0][3]);
121 
122    for (i = 0; i < key->num_inputs; i++) {
123       const float *in = v[key->inputs[i].src_index];
124 
125       debug_printf("  in[%d] (%s[%d]) %s%s%s%s ",
126                    i,
127                    name, key->inputs[i].src_index,
128                    (key->inputs[i].usage_mask & 0x1) ? "x" : " ",
129                    (key->inputs[i].usage_mask & 0x2) ? "y" : " ",
130                    (key->inputs[i].usage_mask & 0x4) ? "z" : " ",
131                    (key->inputs[i].usage_mask & 0x8) ? "w" : " ");
132 
133       for (j = 0; j < 4; j++)
134          if (key->inputs[i].usage_mask & (1<<j))
135             debug_printf("%.5f ", in[j]);
136 
137       debug_printf("\n");
138    }
139 }
140 
141 
142 /**
143  * Print triangle vertex attribs (for debug).
144  */
145 void
lp_setup_print_triangle(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4])146 lp_setup_print_triangle(struct lp_setup_context *setup,
147                         const float (*v0)[4],
148                         const float (*v1)[4],
149                         const float (*v2)[4])
150 {
151    debug_printf("triangle\n");
152 
153    {
154       const float ex = v0[0][0] - v2[0][0];
155       const float ey = v0[0][1] - v2[0][1];
156       const float fx = v1[0][0] - v2[0][0];
157       const float fy = v1[0][1] - v2[0][1];
158 
159       /* det = cross(e,f).z */
160       const float det = ex * fy - ey * fx;
161       if (det < 0.0f)
162          debug_printf("   - ccw\n");
163       else if (det > 0.0f)
164          debug_printf("   - cw\n");
165       else
166          debug_printf("   - zero area\n");
167    }
168 
169    lp_setup_print_vertex(setup, "v0", v0);
170    lp_setup_print_vertex(setup, "v1", v1);
171    lp_setup_print_vertex(setup, "v2", v2);
172 }
173 
174 
175 #define MAX_PLANES 8
176 static unsigned
177 lp_rast_tri_tab[MAX_PLANES+1] = {
178    0,               /* should be impossible */
179    LP_RAST_OP_TRIANGLE_1,
180    LP_RAST_OP_TRIANGLE_2,
181    LP_RAST_OP_TRIANGLE_3,
182    LP_RAST_OP_TRIANGLE_4,
183    LP_RAST_OP_TRIANGLE_5,
184    LP_RAST_OP_TRIANGLE_6,
185    LP_RAST_OP_TRIANGLE_7,
186    LP_RAST_OP_TRIANGLE_8
187 };
188 
189 
190 
191 /**
192  * The primitive covers the whole tile- shade whole tile.
193  *
194  * \param tx, ty  the tile position in tiles, not pixels
195  */
196 static boolean
lp_setup_whole_tile(struct lp_setup_context * setup,const struct lp_rast_shader_inputs * inputs,int tx,int ty)197 lp_setup_whole_tile(struct lp_setup_context *setup,
198                     const struct lp_rast_shader_inputs *inputs,
199                     int tx, int ty)
200 {
201    struct lp_scene *scene = setup->scene;
202 
203    LP_COUNT(nr_fully_covered_64);
204 
205    /* if variant is opaque and scissor doesn't effect the tile */
206    if (inputs->opaque) {
207       if (!scene->fb.zsbuf) {
208          /*
209           * All previous rendering will be overwritten so reset the bin.
210           */
211          lp_scene_bin_reset( scene, tx, ty );
212       }
213 
214       LP_COUNT(nr_shade_opaque_64);
215       return lp_scene_bin_cmd_with_state( scene, tx, ty,
216                                           setup->fs.stored,
217                                           LP_RAST_OP_SHADE_TILE_OPAQUE,
218                                           lp_rast_arg_inputs(inputs) );
219    } else {
220       LP_COUNT(nr_shade_64);
221       return lp_scene_bin_cmd_with_state( scene, tx, ty,
222                                           setup->fs.stored,
223                                           LP_RAST_OP_SHADE_TILE,
224                                           lp_rast_arg_inputs(inputs) );
225    }
226 }
227 
228 
229 /**
230  * Do basic setup for triangle rasterization and determine which
231  * framebuffer tiles are touched.  Put the triangle in the scene's
232  * bins for the tiles which we overlap.
233  */
234 static boolean
do_triangle_ccw(struct lp_setup_context * setup,struct fixed_position * position,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4],boolean frontfacing)235 do_triangle_ccw(struct lp_setup_context *setup,
236                 struct fixed_position* position,
237                 const float (*v0)[4],
238                 const float (*v1)[4],
239                 const float (*v2)[4],
240                 boolean frontfacing )
241 {
242    struct lp_scene *scene = setup->scene;
243    const struct lp_setup_variant_key *key = &setup->setup.variant->key;
244    struct lp_rast_triangle *tri;
245    struct lp_rast_plane *plane;
246    struct u_rect bbox;
247    unsigned tri_bytes;
248    int nr_planes = 3;
249 
250    /* Area should always be positive here */
251    assert(position->area > 0);
252 
253    if (0)
254       lp_setup_print_triangle(setup, v0, v1, v2);
255 
256    if (setup->scissor_test) {
257       nr_planes = 7;
258    }
259    else {
260       nr_planes = 3;
261    }
262 
263    /* Bounding rectangle (in pixels) */
264    {
265       /* Yes this is necessary to accurately calculate bounding boxes
266        * with the two fill-conventions we support.  GL (normally) ends
267        * up needing a bottom-left fill convention, which requires
268        * slightly different rounding.
269        */
270       int adj = (setup->pixel_offset != 0) ? 1 : 0;
271 
272       /* Inclusive x0, exclusive x1 */
273       bbox.x0 =  MIN3(position->x[0], position->x[1], position->x[2]) >> FIXED_ORDER;
274       bbox.x1 = (MAX3(position->x[0], position->x[1], position->x[2]) - 1) >> FIXED_ORDER;
275 
276       /* Inclusive / exclusive depending upon adj (bottom-left or top-right) */
277       bbox.y0 = (MIN3(position->y[0], position->y[1], position->y[2]) + adj) >> FIXED_ORDER;
278       bbox.y1 = (MAX3(position->y[0], position->y[1], position->y[2]) - 1 + adj) >> FIXED_ORDER;
279    }
280 
281    if (bbox.x1 < bbox.x0 ||
282        bbox.y1 < bbox.y0) {
283       if (0) debug_printf("empty bounding box\n");
284       LP_COUNT(nr_culled_tris);
285       return TRUE;
286    }
287 
288    if (!u_rect_test_intersection(&setup->draw_region, &bbox)) {
289       if (0) debug_printf("offscreen\n");
290       LP_COUNT(nr_culled_tris);
291       return TRUE;
292    }
293 
294    /* Can safely discard negative regions, but need to keep hold of
295     * information about when the triangle extends past screen
296     * boundaries.  See trimmed_box in lp_setup_bin_triangle().
297     */
298    bbox.x0 = MAX2(bbox.x0, 0);
299    bbox.y0 = MAX2(bbox.y0, 0);
300 
301    tri = lp_setup_alloc_triangle(scene,
302                                  key->num_inputs,
303                                  nr_planes,
304                                  &tri_bytes);
305    if (!tri)
306       return FALSE;
307 
308 #if 0
309    tri->v[0][0] = v0[0][0];
310    tri->v[1][0] = v1[0][0];
311    tri->v[2][0] = v2[0][0];
312    tri->v[0][1] = v0[0][1];
313    tri->v[1][1] = v1[0][1];
314    tri->v[2][1] = v2[0][1];
315 #endif
316 
317    LP_COUNT(nr_tris);
318 
319    /* Setup parameter interpolants:
320     */
321    setup->setup.variant->jit_function( v0,
322 				       v1,
323 				       v2,
324 				       frontfacing,
325 				       GET_A0(&tri->inputs),
326 				       GET_DADX(&tri->inputs),
327 				       GET_DADY(&tri->inputs) );
328 
329    tri->inputs.frontfacing = frontfacing;
330    tri->inputs.disable = FALSE;
331    tri->inputs.opaque = setup->fs.current.variant->opaque;
332 
333    if (0)
334       lp_dump_setup_coef(&setup->setup.variant->key,
335 			 (const float (*)[4])GET_A0(&tri->inputs),
336 			 (const float (*)[4])GET_DADX(&tri->inputs),
337 			 (const float (*)[4])GET_DADY(&tri->inputs));
338 
339    plane = GET_PLANES(tri);
340 
341 #if defined(PIPE_ARCH_SSE)
342    {
343       __m128i vertx, verty;
344       __m128i shufx, shufy;
345       __m128i dcdx, dcdy, c;
346       __m128i unused;
347       __m128i dcdx_neg_mask;
348       __m128i dcdy_neg_mask;
349       __m128i dcdx_zero_mask;
350       __m128i top_left_flag;
351       __m128i c_inc_mask, c_inc;
352       __m128i eo, p0, p1, p2;
353       __m128i zero = _mm_setzero_si128();
354 
355       vertx = _mm_loadu_si128((__m128i *)position->x); /* vertex x coords */
356       verty = _mm_loadu_si128((__m128i *)position->y); /* vertex y coords */
357 
358       shufx = _mm_shuffle_epi32(vertx, _MM_SHUFFLE(3,0,2,1));
359       shufy = _mm_shuffle_epi32(verty, _MM_SHUFFLE(3,0,2,1));
360 
361       dcdx = _mm_sub_epi32(verty, shufy);
362       dcdy = _mm_sub_epi32(vertx, shufx);
363 
364       dcdx_neg_mask = _mm_srai_epi32(dcdx, 31);
365       dcdx_zero_mask = _mm_cmpeq_epi32(dcdx, zero);
366       dcdy_neg_mask = _mm_srai_epi32(dcdy, 31);
367 
368       top_left_flag = _mm_set1_epi32((setup->pixel_offset == 0) ? ~0 : 0);
369 
370       c_inc_mask = _mm_or_si128(dcdx_neg_mask,
371                                 _mm_and_si128(dcdx_zero_mask,
372                                               _mm_xor_si128(dcdy_neg_mask,
373                                                             top_left_flag)));
374 
375       c_inc = _mm_srli_epi32(c_inc_mask, 31);
376 
377       c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx),
378                         mm_mullo_epi32(dcdy, verty));
379 
380       c = _mm_add_epi32(c, c_inc);
381 
382       /* Scale up to match c:
383        */
384       dcdx = _mm_slli_epi32(dcdx, FIXED_ORDER);
385       dcdy = _mm_slli_epi32(dcdy, FIXED_ORDER);
386 
387       /* Calculate trivial reject values:
388        */
389       eo = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy),
390                          _mm_and_si128(dcdx_neg_mask, dcdx));
391 
392       /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */
393 
394       /* Pointless transpose which gets undone immediately in
395        * rasterization:
396        */
397       transpose4_epi32(&c, &dcdx, &dcdy, &eo,
398                        &p0, &p1, &p2, &unused);
399 
400       _mm_store_si128((__m128i *)&plane[0], p0);
401       _mm_store_si128((__m128i *)&plane[1], p1);
402       _mm_store_si128((__m128i *)&plane[2], p2);
403    }
404 #else
405    {
406       int i;
407       plane[0].dcdy = position->dx01;
408       plane[1].dcdy = position->x[1] - position->x[2];
409       plane[2].dcdy = position->dx20;
410       plane[0].dcdx = position->dy01;
411       plane[1].dcdx = position->y[1] - position->y[2];
412       plane[2].dcdx = position->dy20;
413 
414       for (i = 0; i < 3; i++) {
415          /* half-edge constants, will be interated over the whole render
416           * target.
417           */
418          plane[i].c = plane[i].dcdx * position->x[i] - plane[i].dcdy * position->y[i];
419 
420          /* correct for top-left vs. bottom-left fill convention.
421           *
422           * note that we're overloading gl_rasterization_rules to mean
423           * both (0.5,0.5) pixel centers *and* bottom-left filling
424           * convention.
425           *
426           * GL actually has a top-left filling convention, but GL's
427           * notion of "top" differs from gallium's...
428           *
429           * Also, sometimes (in FBO cases) GL will render upside down
430           * to its usual method, in which case it will probably want
431           * to use the opposite, top-left convention.
432           */
433          if (plane[i].dcdx < 0) {
434             /* both fill conventions want this - adjust for left edges */
435             plane[i].c++;
436          }
437          else if (plane[i].dcdx == 0) {
438             if (setup->pixel_offset == 0) {
439                /* correct for top-left fill convention:
440                 */
441                if (plane[i].dcdy > 0) plane[i].c++;
442             }
443             else {
444                /* correct for bottom-left fill convention:
445                 */
446                if (plane[i].dcdy < 0) plane[i].c++;
447             }
448          }
449 
450          plane[i].dcdx *= FIXED_ONE;
451          plane[i].dcdy *= FIXED_ONE;
452 
453          /* find trivial reject offsets for each edge for a single-pixel
454           * sized block.  These will be scaled up at each recursive level to
455           * match the active blocksize.  Scaling in this way works best if
456           * the blocks are square.
457           */
458          plane[i].eo = 0;
459          if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx;
460          if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy;
461       }
462    }
463 #endif
464 
465    if (0) {
466       debug_printf("p0: %08x/%08x/%08x/%08x\n",
467                    plane[0].c,
468                    plane[0].dcdx,
469                    plane[0].dcdy,
470                    plane[0].eo);
471 
472       debug_printf("p1: %08x/%08x/%08x/%08x\n",
473                    plane[1].c,
474                    plane[1].dcdx,
475                    plane[1].dcdy,
476                    plane[1].eo);
477 
478       debug_printf("p0: %08x/%08x/%08x/%08x\n",
479                    plane[2].c,
480                    plane[2].dcdx,
481                    plane[2].dcdy,
482                    plane[2].eo);
483    }
484 
485 
486    /*
487     * When rasterizing scissored tris, use the intersection of the
488     * triangle bounding box and the scissor rect to generate the
489     * scissor planes.
490     *
491     * This permits us to cut off the triangle "tails" that are present
492     * in the intermediate recursive levels caused when two of the
493     * triangles edges don't diverge quickly enough to trivially reject
494     * exterior blocks from the triangle.
495     *
496     * It's not really clear if it's worth worrying about these tails,
497     * but since we generate the planes for each scissored tri, it's
498     * free to trim them in this case.
499     *
500     * Note that otherwise, the scissor planes only vary in 'C' value,
501     * and even then only on state-changes.  Could alternatively store
502     * these planes elsewhere.
503     */
504    if (nr_planes == 7) {
505       const struct u_rect *scissor = &setup->scissor;
506 
507       plane[3].dcdx = -1;
508       plane[3].dcdy = 0;
509       plane[3].c = 1-scissor->x0;
510       plane[3].eo = 1;
511 
512       plane[4].dcdx = 1;
513       plane[4].dcdy = 0;
514       plane[4].c = scissor->x1+1;
515       plane[4].eo = 0;
516 
517       plane[5].dcdx = 0;
518       plane[5].dcdy = 1;
519       plane[5].c = 1-scissor->y0;
520       plane[5].eo = 1;
521 
522       plane[6].dcdx = 0;
523       plane[6].dcdy = -1;
524       plane[6].c = scissor->y1+1;
525       plane[6].eo = 0;
526    }
527 
528    return lp_setup_bin_triangle( setup, tri, &bbox, nr_planes );
529 }
530 
531 /*
532  * Round to nearest less or equal power of two of the input.
533  *
534  * Undefined if no bit set exists, so code should check against 0 first.
535  */
536 static INLINE uint32_t
floor_pot(uint32_t n)537 floor_pot(uint32_t n)
538 {
539 #if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
540    if (n == 0)
541       return 0;
542 
543    __asm__("bsr %1,%0"
544           : "=r" (n)
545           : "rm" (n));
546    return 1 << n;
547 #else
548    n |= (n >>  1);
549    n |= (n >>  2);
550    n |= (n >>  4);
551    n |= (n >>  8);
552    n |= (n >> 16);
553    return n - (n >> 1);
554 #endif
555 }
556 
557 
558 boolean
lp_setup_bin_triangle(struct lp_setup_context * setup,struct lp_rast_triangle * tri,const struct u_rect * bbox,int nr_planes)559 lp_setup_bin_triangle( struct lp_setup_context *setup,
560                        struct lp_rast_triangle *tri,
561                        const struct u_rect *bbox,
562                        int nr_planes )
563 {
564    struct lp_scene *scene = setup->scene;
565    struct u_rect trimmed_box = *bbox;
566    int i;
567 
568    /* What is the largest power-of-two boundary this triangle crosses:
569     */
570    int dx = floor_pot((bbox->x0 ^ bbox->x1) |
571 		      (bbox->y0 ^ bbox->y1));
572 
573    /* The largest dimension of the rasterized area of the triangle
574     * (aligned to a 4x4 grid), rounded down to the nearest power of two:
575     */
576    int sz = floor_pot((bbox->x1 - (bbox->x0 & ~3)) |
577 		      (bbox->y1 - (bbox->y0 & ~3)));
578 
579    /* Now apply scissor, etc to the bounding box.  Could do this
580     * earlier, but it confuses the logic for tri-16 and would force
581     * the rasterizer to also respect scissor, etc, just for the rare
582     * cases where a small triangle extends beyond the scissor.
583     */
584    u_rect_find_intersection(&setup->draw_region, &trimmed_box);
585 
586    /* Determine which tile(s) intersect the triangle's bounding box
587     */
588    if (dx < TILE_SIZE)
589    {
590       int ix0 = bbox->x0 / TILE_SIZE;
591       int iy0 = bbox->y0 / TILE_SIZE;
592       unsigned px = bbox->x0 & 63 & ~3;
593       unsigned py = bbox->y0 & 63 & ~3;
594 
595       assert(iy0 == bbox->y1 / TILE_SIZE &&
596 	     ix0 == bbox->x1 / TILE_SIZE);
597 
598       if (nr_planes == 3) {
599          if (sz < 4)
600          {
601             /* Triangle is contained in a single 4x4 stamp:
602              */
603             assert(px + 4 <= TILE_SIZE);
604             assert(py + 4 <= TILE_SIZE);
605             return lp_scene_bin_cmd_with_state( scene, ix0, iy0,
606                                                 setup->fs.stored,
607                                                 LP_RAST_OP_TRIANGLE_3_4,
608                                                 lp_rast_arg_triangle_contained(tri, px, py) );
609          }
610 
611          if (sz < 16)
612          {
613             /* Triangle is contained in a single 16x16 block:
614              */
615 
616             /*
617              * The 16x16 block is only 4x4 aligned, and can exceed the tile
618              * dimensions if the triangle is 16 pixels in one dimension but 4
619              * in the other. So budge the 16x16 back inside the tile.
620              */
621             px = MIN2(px, TILE_SIZE - 16);
622             py = MIN2(py, TILE_SIZE - 16);
623 
624             assert(px + 16 <= TILE_SIZE);
625             assert(py + 16 <= TILE_SIZE);
626 
627             return lp_scene_bin_cmd_with_state( scene, ix0, iy0,
628                                                 setup->fs.stored,
629                                                 LP_RAST_OP_TRIANGLE_3_16,
630                                                 lp_rast_arg_triangle_contained(tri, px, py) );
631          }
632       }
633       else if (nr_planes == 4 && sz < 16)
634       {
635          px = MIN2(px, TILE_SIZE - 16);
636          py = MIN2(py, TILE_SIZE - 16);
637 
638          assert(px + 16 <= TILE_SIZE);
639          assert(py + 16 <= TILE_SIZE);
640 
641          return lp_scene_bin_cmd_with_state(scene, ix0, iy0,
642                                             setup->fs.stored,
643                                             LP_RAST_OP_TRIANGLE_4_16,
644                                             lp_rast_arg_triangle_contained(tri, px, py));
645       }
646 
647 
648       /* Triangle is contained in a single tile:
649        */
650       return lp_scene_bin_cmd_with_state( scene, ix0, iy0, setup->fs.stored,
651                                           lp_rast_tri_tab[nr_planes],
652                                           lp_rast_arg_triangle(tri, (1<<nr_planes)-1) );
653    }
654    else
655    {
656       struct lp_rast_plane *plane = GET_PLANES(tri);
657       int c[MAX_PLANES];
658       int ei[MAX_PLANES];
659 
660       int eo[MAX_PLANES];
661       int xstep[MAX_PLANES];
662       int ystep[MAX_PLANES];
663       int x, y;
664 
665       int ix0 = trimmed_box.x0 / TILE_SIZE;
666       int iy0 = trimmed_box.y0 / TILE_SIZE;
667       int ix1 = trimmed_box.x1 / TILE_SIZE;
668       int iy1 = trimmed_box.y1 / TILE_SIZE;
669 
670       for (i = 0; i < nr_planes; i++) {
671          c[i] = (plane[i].c +
672                  plane[i].dcdy * iy0 * TILE_SIZE -
673                  plane[i].dcdx * ix0 * TILE_SIZE);
674 
675          ei[i] = (plane[i].dcdy -
676                   plane[i].dcdx -
677                   plane[i].eo) << TILE_ORDER;
678 
679          eo[i] = plane[i].eo << TILE_ORDER;
680          xstep[i] = -(plane[i].dcdx << TILE_ORDER);
681          ystep[i] = plane[i].dcdy << TILE_ORDER;
682       }
683 
684 
685 
686       /* Test tile-sized blocks against the triangle.
687        * Discard blocks fully outside the tri.  If the block is fully
688        * contained inside the tri, bin an lp_rast_shade_tile command.
689        * Else, bin a lp_rast_triangle command.
690        */
691       for (y = iy0; y <= iy1; y++)
692       {
693 	 boolean in = FALSE;  /* are we inside the triangle? */
694 	 int cx[MAX_PLANES];
695 
696          for (i = 0; i < nr_planes; i++)
697             cx[i] = c[i];
698 
699 	 for (x = ix0; x <= ix1; x++)
700 	 {
701             int out = 0;
702             int partial = 0;
703 
704             for (i = 0; i < nr_planes; i++) {
705                int planeout = cx[i] + eo[i];
706                int planepartial = cx[i] + ei[i] - 1;
707                out |= (planeout >> 31);
708                partial |= (planepartial >> 31) & (1<<i);
709             }
710 
711             if (out) {
712                /* do nothing */
713                if (in)
714                   break;  /* exiting triangle, all done with this row */
715                LP_COUNT(nr_empty_64);
716             }
717             else if (partial) {
718                /* Not trivially accepted by at least one plane -
719                 * rasterize/shade partial tile
720                 */
721                int count = util_bitcount(partial);
722                in = TRUE;
723 
724                if (!lp_scene_bin_cmd_with_state( scene, x, y,
725                                                  setup->fs.stored,
726                                                  lp_rast_tri_tab[count],
727                                                  lp_rast_arg_triangle(tri, partial) ))
728                   goto fail;
729 
730                LP_COUNT(nr_partially_covered_64);
731             }
732             else {
733                /* triangle covers the whole tile- shade whole tile */
734                LP_COUNT(nr_fully_covered_64);
735                in = TRUE;
736                if (!lp_setup_whole_tile(setup, &tri->inputs, x, y))
737                   goto fail;
738             }
739 
740 	    /* Iterate cx values across the region:
741 	     */
742             for (i = 0; i < nr_planes; i++)
743                cx[i] += xstep[i];
744 	 }
745 
746 	 /* Iterate c values down the region:
747 	  */
748          for (i = 0; i < nr_planes; i++)
749             c[i] += ystep[i];
750       }
751    }
752 
753    return TRUE;
754 
755 fail:
756    /* Need to disable any partially binned triangle.  This is easier
757     * than trying to locate all the triangle, shade-tile, etc,
758     * commands which may have been binned.
759     */
760    tri->inputs.disable = TRUE;
761    return FALSE;
762 }
763 
764 
765 /**
766  * Try to draw the triangle, restart the scene on failure.
767  */
retry_triangle_ccw(struct lp_setup_context * setup,struct fixed_position * position,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4],boolean front)768 static void retry_triangle_ccw( struct lp_setup_context *setup,
769                                 struct fixed_position* position,
770                                 const float (*v0)[4],
771                                 const float (*v1)[4],
772                                 const float (*v2)[4],
773                                 boolean front)
774 {
775    if (!do_triangle_ccw( setup, position, v0, v1, v2, front ))
776    {
777       if (!lp_setup_flush_and_restart(setup))
778          return;
779 
780       if (!do_triangle_ccw( setup, position, v0, v1, v2, front ))
781          return;
782    }
783 }
784 
785 
786 /**
787  * Calculate fixed position data for a triangle
788  */
789 static INLINE void
calc_fixed_position(struct lp_setup_context * setup,struct fixed_position * position,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4])790 calc_fixed_position( struct lp_setup_context *setup,
791                      struct fixed_position* position,
792                      const float (*v0)[4],
793                      const float (*v1)[4],
794                      const float (*v2)[4])
795 {
796    position->x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset);
797    position->x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset);
798    position->x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset);
799    position->x[3] = 0;
800 
801    position->y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset);
802    position->y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset);
803    position->y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset);
804    position->y[3] = 0;
805 
806    position->dx01 = position->x[0] - position->x[1];
807    position->dy01 = position->y[0] - position->y[1];
808 
809    position->dx20 = position->x[2] - position->x[0];
810    position->dy20 = position->y[2] - position->y[0];
811 
812    position->area = position->dx01 * position->dy20 - position->dx20 * position->dy01;
813 }
814 
815 
816 /**
817  * Rotate a triangle, flipping its clockwise direction,
818  * Swaps values for xy[0] and xy[1]
819  */
820 static INLINE void
rotate_fixed_position_01(struct fixed_position * position)821 rotate_fixed_position_01( struct fixed_position* position )
822 {
823    int x, y;
824 
825    x = position->x[1];
826    y = position->y[1];
827    position->x[1] = position->x[0];
828    position->y[1] = position->y[0];
829    position->x[0] = x;
830    position->y[0] = y;
831 
832    position->dx01 = -position->dx01;
833    position->dy01 = -position->dy01;
834    position->dx20 = position->x[2] - position->x[0];
835    position->dy20 = position->y[2] - position->y[0];
836 
837    position->area = -position->area;
838 }
839 
840 
841 /**
842  * Rotate a triangle, flipping its clockwise direction,
843  * Swaps values for xy[1] and xy[2]
844  */
845 static INLINE void
rotate_fixed_position_12(struct fixed_position * position)846 rotate_fixed_position_12( struct fixed_position* position )
847 {
848    int x, y;
849 
850    x = position->x[2];
851    y = position->y[2];
852    position->x[2] = position->x[1];
853    position->y[2] = position->y[1];
854    position->x[1] = x;
855    position->y[1] = y;
856 
857    x = position->dx01;
858    y = position->dy01;
859    position->dx01 = -position->dx20;
860    position->dy01 = -position->dy20;
861    position->dx20 = -x;
862    position->dy20 = -y;
863 
864    position->area = -position->area;
865 }
866 
867 
868 /**
869  * Draw triangle if it's CW, cull otherwise.
870  */
triangle_cw(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4])871 static void triangle_cw( struct lp_setup_context *setup,
872 			 const float (*v0)[4],
873 			 const float (*v1)[4],
874 			 const float (*v2)[4] )
875 {
876    struct fixed_position position;
877    calc_fixed_position(setup, &position, v0, v1, v2);
878 
879    if (position.area < 0) {
880       if (setup->flatshade_first) {
881          rotate_fixed_position_12(&position);
882          retry_triangle_ccw(setup, &position, v0, v2, v1, !setup->ccw_is_frontface);
883       } else {
884          rotate_fixed_position_01(&position);
885          retry_triangle_ccw(setup, &position, v1, v0, v2, !setup->ccw_is_frontface);
886       }
887    }
888 }
889 
890 
triangle_ccw(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4])891 static void triangle_ccw( struct lp_setup_context *setup,
892                           const float (*v0)[4],
893                           const float (*v1)[4],
894                           const float (*v2)[4])
895 {
896    struct fixed_position position;
897    calc_fixed_position(setup, &position, v0, v1, v2);
898 
899    if (position.area > 0)
900       retry_triangle_ccw(setup, &position, v0, v1, v2, setup->ccw_is_frontface);
901 }
902 
903 /**
904  * Draw triangle whether it's CW or CCW.
905  */
triangle_both(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4])906 static void triangle_both( struct lp_setup_context *setup,
907 			   const float (*v0)[4],
908 			   const float (*v1)[4],
909 			   const float (*v2)[4] )
910 {
911    struct fixed_position position;
912    calc_fixed_position(setup, &position, v0, v1, v2);
913 
914    if (0) {
915       assert(!util_is_inf_or_nan(v0[0][0]));
916       assert(!util_is_inf_or_nan(v0[0][1]));
917       assert(!util_is_inf_or_nan(v1[0][0]));
918       assert(!util_is_inf_or_nan(v1[0][1]));
919       assert(!util_is_inf_or_nan(v2[0][0]));
920       assert(!util_is_inf_or_nan(v2[0][1]));
921    }
922 
923    if (position.area > 0)
924       retry_triangle_ccw( setup, &position, v0, v1, v2, setup->ccw_is_frontface );
925    else if (position.area < 0) {
926       if (setup->flatshade_first) {
927          rotate_fixed_position_12( &position );
928          retry_triangle_ccw( setup, &position, v0, v2, v1, !setup->ccw_is_frontface );
929       } else {
930          rotate_fixed_position_01( &position );
931          retry_triangle_ccw( setup, &position, v1, v0, v2, !setup->ccw_is_frontface );
932       }
933    }
934 }
935 
936 
triangle_nop(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4])937 static void triangle_nop( struct lp_setup_context *setup,
938 			  const float (*v0)[4],
939 			  const float (*v1)[4],
940 			  const float (*v2)[4] )
941 {
942 }
943 
944 
945 void
lp_setup_choose_triangle(struct lp_setup_context * setup)946 lp_setup_choose_triangle( struct lp_setup_context *setup )
947 {
948    switch (setup->cullmode) {
949    case PIPE_FACE_NONE:
950       setup->triangle = triangle_both;
951       break;
952    case PIPE_FACE_BACK:
953       setup->triangle = setup->ccw_is_frontface ? triangle_ccw : triangle_cw;
954       break;
955    case PIPE_FACE_FRONT:
956       setup->triangle = setup->ccw_is_frontface ? triangle_cw : triangle_ccw;
957       break;
958    default:
959       setup->triangle = triangle_nop;
960       break;
961    }
962 }
963