1 /* libs/opengles/primitives.cpp
2 **
3 ** Copyright 2006, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 **     http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <math.h>
21 
22 #include "context.h"
23 #include "primitives.h"
24 #include "light.h"
25 #include "matrix.h"
26 #include "vertex.h"
27 #include "fp.h"
28 #include "TextureObjectManager.h"
29 
30 extern "C" void iterators0032(const void* that,
31         int32_t* it, int32_t c0, int32_t c1, int32_t c2);
32 
33 namespace android {
34 
35 // ----------------------------------------------------------------------------
36 
37 static void primitive_point(ogles_context_t* c, vertex_t* v);
38 static void primitive_line(ogles_context_t* c, vertex_t* v0, vertex_t* v1);
39 static void primitive_clip_triangle(ogles_context_t* c,
40         vertex_t* v0, vertex_t* v1, vertex_t* v2);
41 
42 static void primitive_nop_point(ogles_context_t* c, vertex_t* v);
43 static void primitive_nop_line(ogles_context_t* c, vertex_t* v0, vertex_t* v1);
44 static void primitive_nop_triangle(ogles_context_t* c,
45         vertex_t* v0, vertex_t* v1, vertex_t* v2);
46 
47 static inline bool cull_triangle(ogles_context_t* c,
48         vertex_t* v0, vertex_t* v1, vertex_t* v2);
49 
50 static void lerp_triangle(ogles_context_t* c,
51         vertex_t* v0, vertex_t* v1, vertex_t* v2);
52 
53 static void lerp_texcoords(ogles_context_t* c,
54         vertex_t* v0, vertex_t* v1, vertex_t* v2);
55 
56 static void lerp_texcoords_w(ogles_context_t* c,
57         vertex_t* v0, vertex_t* v1, vertex_t* v2);
58 
59 static void triangle(ogles_context_t* c,
60         vertex_t* v0, vertex_t* v1, vertex_t* v2);
61 
62 static void clip_triangle(ogles_context_t* c,
63         vertex_t* v0, vertex_t* v1, vertex_t* v2);
64 
65 static unsigned int clip_line(ogles_context_t* c,
66         vertex_t* s, vertex_t* p);
67 
68 // ----------------------------------------------------------------------------
69 #if 0
70 #pragma mark -
71 #endif
72 
lightTriangleDarkSmooth(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)73 static void lightTriangleDarkSmooth(ogles_context_t* c,
74         vertex_t* v0, vertex_t* v1, vertex_t* v2)
75 {
76     if (!(v0->flags & vertex_t::LIT)) {
77         v0->flags |= vertex_t::LIT;
78         const GLvoid* cp = c->arrays.color.element(
79                 v0->index & vertex_cache_t::INDEX_MASK);
80         c->arrays.color.fetch(c, v0->color.v, cp);
81     }
82     if (!(v1->flags & vertex_t::LIT)) {
83         v1->flags |= vertex_t::LIT;
84         const GLvoid* cp = c->arrays.color.element(
85                 v1->index & vertex_cache_t::INDEX_MASK);
86         c->arrays.color.fetch(c, v1->color.v, cp);
87     }
88     if(!(v2->flags & vertex_t::LIT)) {
89         v2->flags |= vertex_t::LIT;
90         const GLvoid* cp = c->arrays.color.element(
91                 v2->index & vertex_cache_t::INDEX_MASK);
92         c->arrays.color.fetch(c, v2->color.v, cp);
93     }
94 }
95 
lightTriangleDarkFlat(ogles_context_t * c,vertex_t *,vertex_t *,vertex_t * v2)96 static void lightTriangleDarkFlat(ogles_context_t* c,
97         vertex_t* /*v0*/, vertex_t* /*v1*/, vertex_t* v2)
98 {
99     if (!(v2->flags & vertex_t::LIT)) {
100         v2->flags |= vertex_t::LIT;
101         const GLvoid* cp = c->arrays.color.element(
102                 v2->index & vertex_cache_t::INDEX_MASK);
103         c->arrays.color.fetch(c, v2->color.v, cp);
104     }
105     // configure the rasterizer here, before we clip
106     c->rasterizer.procs.color4xv(c, v2->color.v);
107 }
108 
lightTriangleSmooth(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)109 static void lightTriangleSmooth(ogles_context_t* c,
110         vertex_t* v0, vertex_t* v1, vertex_t* v2)
111 {
112     if (!(v0->flags & vertex_t::LIT))
113         c->lighting.lightVertex(c, v0);
114     if (!(v1->flags & vertex_t::LIT))
115         c->lighting.lightVertex(c, v1);
116     if(!(v2->flags & vertex_t::LIT))
117         c->lighting.lightVertex(c, v2);
118 }
119 
lightTriangleFlat(ogles_context_t * c,vertex_t *,vertex_t *,vertex_t * v2)120 static void lightTriangleFlat(ogles_context_t* c,
121         vertex_t* /*v0*/, vertex_t* /*v1*/, vertex_t* v2)
122 {
123     if (!(v2->flags & vertex_t::LIT))
124         c->lighting.lightVertex(c, v2);
125     // configure the rasterizer here, before we clip
126     c->rasterizer.procs.color4xv(c, v2->color.v);
127 }
128 
129 // The fog versions...
130 
131 static inline
lightVertexDarkSmoothFog(ogles_context_t * c,vertex_t * v)132 void lightVertexDarkSmoothFog(ogles_context_t* c, vertex_t* v)
133 {
134     if (!(v->flags & vertex_t::LIT)) {
135         v->flags |= vertex_t::LIT;
136         v->fog = c->fog.fog(c, v->eye.z);
137         const GLvoid* cp = c->arrays.color.element(
138                 v->index & vertex_cache_t::INDEX_MASK);
139         c->arrays.color.fetch(c, v->color.v, cp);
140     }
141 }
142 static inline
lightVertexDarkFlatFog(ogles_context_t * c,vertex_t * v)143 void lightVertexDarkFlatFog(ogles_context_t* c, vertex_t* v)
144 {
145     if (!(v->flags & vertex_t::LIT)) {
146         v->flags |= vertex_t::LIT;
147         v->fog = c->fog.fog(c, v->eye.z);
148     }
149 }
150 static inline
lightVertexSmoothFog(ogles_context_t * c,vertex_t * v)151 void lightVertexSmoothFog(ogles_context_t* c, vertex_t* v)
152 {
153     if (!(v->flags & vertex_t::LIT)) {
154         v->fog = c->fog.fog(c, v->eye.z);
155         c->lighting.lightVertex(c, v);
156     }
157 }
158 
lightTriangleDarkSmoothFog(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)159 static void lightTriangleDarkSmoothFog(ogles_context_t* c,
160         vertex_t* v0, vertex_t* v1, vertex_t* v2)
161 {
162     lightVertexDarkSmoothFog(c, v0);
163     lightVertexDarkSmoothFog(c, v1);
164     lightVertexDarkSmoothFog(c, v2);
165 }
166 
lightTriangleDarkFlatFog(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)167 static void lightTriangleDarkFlatFog(ogles_context_t* c,
168         vertex_t* v0, vertex_t* v1, vertex_t* v2)
169 {
170     lightVertexDarkFlatFog(c, v0);
171     lightVertexDarkFlatFog(c, v1);
172     lightVertexDarkSmoothFog(c, v2);
173     // configure the rasterizer here, before we clip
174     c->rasterizer.procs.color4xv(c, v2->color.v);
175 }
176 
lightTriangleSmoothFog(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)177 static void lightTriangleSmoothFog(ogles_context_t* c,
178         vertex_t* v0, vertex_t* v1, vertex_t* v2)
179 {
180     lightVertexSmoothFog(c, v0);
181     lightVertexSmoothFog(c, v1);
182     lightVertexSmoothFog(c, v2);
183 }
184 
lightTriangleFlatFog(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)185 static void lightTriangleFlatFog(ogles_context_t* c,
186         vertex_t* v0, vertex_t* v1, vertex_t* v2)
187 {
188     lightVertexDarkFlatFog(c, v0);
189     lightVertexDarkFlatFog(c, v1);
190     lightVertexSmoothFog(c, v2);
191     // configure the rasterizer here, before we clip
192     c->rasterizer.procs.color4xv(c, v2->color.v);
193 }
194 
195 
196 
197 typedef void (*light_primitive_t)(ogles_context_t*,
198         vertex_t*, vertex_t*, vertex_t*);
199 
200 // fog 0x4, light 0x2, smooth 0x1
201 static const light_primitive_t lightPrimitive[8] = {
202     lightTriangleDarkFlat,          // no fog | dark  | flat
203     lightTriangleDarkSmooth,        // no fog | dark  | smooth
204     lightTriangleFlat,              // no fog | light | flat
205     lightTriangleSmooth,            // no fog | light | smooth
206     lightTriangleDarkFlatFog,       // fog    | dark  | flat
207     lightTriangleDarkSmoothFog,     // fog    | dark  | smooth
208     lightTriangleFlatFog,           // fog    | light | flat
209     lightTriangleSmoothFog          // fog    | light | smooth
210 };
211 
ogles_validate_primitives(ogles_context_t * c)212 void ogles_validate_primitives(ogles_context_t* c)
213 {
214     const uint32_t enables = c->rasterizer.state.enables;
215 
216     // set up the lighting/shading/smoothing/fogging function
217     int index = enables & GGL_ENABLE_SMOOTH ? 0x1 : 0;
218     index |= c->lighting.enable ? 0x2 : 0;
219     index |= enables & GGL_ENABLE_FOG ? 0x4 : 0;
220     c->lighting.lightTriangle = lightPrimitive[index];
221 
222     // set up the primitive renderers
223     if (ggl_likely(c->arrays.vertex.enable)) {
224         c->prims.renderPoint    = primitive_point;
225         c->prims.renderLine     = primitive_line;
226         c->prims.renderTriangle = primitive_clip_triangle;
227     } else {
228         c->prims.renderPoint    = primitive_nop_point;
229         c->prims.renderLine     = primitive_nop_line;
230         c->prims.renderTriangle = primitive_nop_triangle;
231     }
232 }
233 
234 // ----------------------------------------------------------------------------
235 
initTriangle(vertex_t const * v0,vertex_t const * v1,vertex_t const * v2)236 void compute_iterators_t::initTriangle(
237         vertex_t const* v0, vertex_t const* v1, vertex_t const* v2)
238 {
239     m_dx01 = v1->window.x - v0->window.x;
240     m_dy10 = v0->window.y - v1->window.y;
241     m_dx20 = v0->window.x - v2->window.x;
242     m_dy02 = v2->window.y - v0->window.y;
243     m_area = m_dx01*m_dy02 + (-m_dy10)*m_dx20;
244 }
245 
initLine(vertex_t const * v0,vertex_t const * v1)246 void compute_iterators_t::initLine(
247         vertex_t const* v0, vertex_t const* v1)
248 {
249     m_dx01 = m_dy02 = v1->window.x - v0->window.x;
250     m_dy10 = m_dx20 = v0->window.y - v1->window.y;
251     m_area = m_dx01*m_dy02 + (-m_dy10)*m_dx20;
252 }
253 
initLerp(vertex_t const * v0,uint32_t enables)254 void compute_iterators_t::initLerp(vertex_t const* v0, uint32_t enables)
255 {
256     m_x0 = v0->window.x;
257     m_y0 = v0->window.y;
258     const GGLcoord area = (m_area + TRI_HALF) >> TRI_FRACTION_BITS;
259     const GGLcoord minArea = 2; // cannot be inverted
260     // triangles with an area smaller than 1.0 are not smooth-shaded
261 
262     int q=0, s=0, d=0;
263     if (abs(area) >= minArea) {
264         // Here we do some voodoo magic, to compute a suitable scale
265         // factor for deltas/area:
266 
267         // First compute the 1/area with full 32-bits precision,
268         // gglRecipQNormalized returns a number [-0.5, 0.5[ and an exponent.
269         d = gglRecipQNormalized(area, &q);
270 
271         // Then compute the minimum left-shift to not overflow the muls
272         // below.
273         s = 32 - gglClz(abs(m_dy02)|abs(m_dy10)|abs(m_dx01)|abs(m_dx20));
274 
275         // We'll keep 16-bits of precision for deltas/area. So we need
276         // to shift everything left an extra 15 bits.
277         s += 15;
278 
279         // make sure all final shifts are not > 32, because gglMulx
280         // can't handle it.
281         if (s < q) s = q;
282         if (s > 32) {
283             d >>= 32-s;
284             s = 32;
285         }
286     }
287 
288     m_dx01 = gglMulx(m_dx01, d, s);
289     m_dy10 = gglMulx(m_dy10, d, s);
290     m_dx20 = gglMulx(m_dx20, d, s);
291     m_dy02 = gglMulx(m_dy02, d, s);
292     m_area_scale = 32 + q - s;
293     m_scale = 0;
294 
295     if (enables & GGL_ENABLE_TMUS) {
296         const int A = gglClz(abs(m_dy02)|abs(m_dy10)|abs(m_dx01)|abs(m_dx20));
297         const int B = gglClz(abs(m_x0)|abs(m_y0));
298         m_scale = max(0, 32 - (A + 16)) +
299                   max(0, 32 - (B + TRI_FRACTION_BITS)) + 1;
300     }
301 }
302 
iteratorsScale(GGLfixed * it,int32_t c0,int32_t c1,int32_t c2) const303 int compute_iterators_t::iteratorsScale(GGLfixed* it,
304         int32_t c0, int32_t c1, int32_t c2) const
305 {
306     int32_t dc01 = c1 - c0;
307     int32_t dc02 = c2 - c0;
308     const int A = gglClz(abs(c0));
309     const int B = gglClz(abs(dc01)|abs(dc02));
310     const int scale = min(A, B - m_scale) - 2;
311     if (scale >= 0) {
312         c0   <<= scale;
313         dc01 <<= scale;
314         dc02 <<= scale;
315     } else {
316         c0   >>= -scale;
317         dc01 >>= -scale;
318         dc02 >>= -scale;
319     }
320     const int s = m_area_scale;
321     int32_t dcdx = gglMulAddx(dc01, m_dy02, gglMulx(dc02, m_dy10, s), s);
322     int32_t dcdy = gglMulAddx(dc02, m_dx01, gglMulx(dc01, m_dx20, s), s);
323     int32_t c = c0 - (gglMulAddx(dcdx, m_x0,
324             gglMulx(dcdy, m_y0, TRI_FRACTION_BITS), TRI_FRACTION_BITS));
325     it[0] = c;
326     it[1] = dcdx;
327     it[2] = dcdy;
328     return scale;
329 }
330 
iterators1616(GGLfixed * it,GGLfixed c0,GGLfixed c1,GGLfixed c2) const331 void compute_iterators_t::iterators1616(GGLfixed* it,
332         GGLfixed c0, GGLfixed c1, GGLfixed c2) const
333 {
334     const GGLfixed dc01 = c1 - c0;
335     const GGLfixed dc02 = c2 - c0;
336     // 16.16 x 16.16 == 32.32 --> 16.16
337     const int s = m_area_scale;
338     int32_t dcdx = gglMulAddx(dc01, m_dy02, gglMulx(dc02, m_dy10, s), s);
339     int32_t dcdy = gglMulAddx(dc02, m_dx01, gglMulx(dc01, m_dx20, s), s);
340     int32_t c = c0 - (gglMulAddx(dcdx, m_x0,
341             gglMulx(dcdy, m_y0, TRI_FRACTION_BITS), TRI_FRACTION_BITS));
342     it[0] = c;
343     it[1] = dcdx;
344     it[2] = dcdy;
345 }
346 
iterators0032(int64_t * it,int32_t c0,int32_t c1,int32_t c2) const347 void compute_iterators_t::iterators0032(int64_t* it,
348         int32_t c0, int32_t c1, int32_t c2) const
349 {
350     const int s = m_area_scale - 16;
351     int32_t dc01 = (c1 - c0)>>s;
352     int32_t dc02 = (c2 - c0)>>s;
353     // 16.16 x 16.16 == 32.32
354     int64_t dcdx = gglMulii(dc01, m_dy02) + gglMulii(dc02, m_dy10);
355     int64_t dcdy = gglMulii(dc02, m_dx01) + gglMulii(dc01, m_dx20);
356     it[ 0] = (c0<<16) - ((dcdx*m_x0 + dcdy*m_y0)>>4);
357     it[ 1] = dcdx;
358     it[ 2] = dcdy;
359 }
360 
361 #if defined(__arm__) && !defined(__thumb__)
iterators0032(int32_t * it,int32_t c0,int32_t c1,int32_t c2) const362 inline void compute_iterators_t::iterators0032(int32_t* it,
363         int32_t c0, int32_t c1, int32_t c2) const
364 {
365     ::iterators0032(this, it, c0, c1, c2);
366 }
367 #else
iterators0032(int32_t * it,int32_t c0,int32_t c1,int32_t c2) const368 void compute_iterators_t::iterators0032(int32_t* it,
369         int32_t c0, int32_t c1, int32_t c2) const
370 {
371     int64_t it64[3];
372     iterators0032(it64, c0, c1, c2);
373     it[0] = it64[0];
374     it[1] = it64[1];
375     it[2] = it64[2];
376 }
377 #endif
378 
379 // ----------------------------------------------------------------------------
380 
381 static inline int32_t clampZ(GLfixed z) CONST;
clampZ(GLfixed z)382 int32_t clampZ(GLfixed z) {
383     z = (z & ~(z>>31));
384     if (z >= 0x10000)
385         z = 0xFFFF;
386     return z;
387 }
388 
389 static __attribute__((noinline))
fetch_texcoord_impl(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)390 void fetch_texcoord_impl(ogles_context_t* c,
391         vertex_t* v0, vertex_t* v1, vertex_t* v2)
392 {
393     vertex_t* const vtx[3] = { v0, v1, v2 };
394     array_t const * const texcoordArray = c->arrays.texture;
395 
396     for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
397         if (!(c->rasterizer.state.texture[i].enable))
398             continue;
399 
400         for (int j=0 ; j<3 ; j++) {
401             vertex_t* const v = vtx[j];
402             if (v->flags & vertex_t::TT)
403                 continue;
404 
405             // NOTE: here we could compute automatic texgen
406             // such as sphere/cube maps, instead of fetching them
407             // from the textcoord array.
408 
409             vec4_t& coords = v->texture[i];
410             const GLubyte* tp = texcoordArray[i].element(
411                     v->index & vertex_cache_t::INDEX_MASK);
412             texcoordArray[i].fetch(c, coords.v, tp);
413 
414             // transform texture coordinates...
415             coords.Q = 0x10000;
416             const transform_t& tr = c->transforms.texture[i].transform;
417             if (ggl_unlikely(tr.ops)) {
418                 c->arrays.tex_transform[i](&tr, &coords, &coords);
419             }
420 
421             // divide by Q
422             const GGLfixed q = coords.Q;
423             if (ggl_unlikely(q != 0x10000)) {
424                 const int32_t qinv = gglRecip28(q);
425                 coords.S = gglMulx(coords.S, qinv, 28);
426                 coords.T = gglMulx(coords.T, qinv, 28);
427             }
428         }
429     }
430     v0->flags |= vertex_t::TT;
431     v1->flags |= vertex_t::TT;
432     v2->flags |= vertex_t::TT;
433 }
434 
fetch_texcoord(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)435 inline void fetch_texcoord(ogles_context_t* c,
436         vertex_t* v0, vertex_t* v1, vertex_t* v2)
437 {
438     const uint32_t enables = c->rasterizer.state.enables;
439     if (!(enables & GGL_ENABLE_TMUS))
440         return;
441 
442     // Fetch & transform texture coordinates...
443     if (ggl_likely(v0->flags & v1->flags & v2->flags & vertex_t::TT)) {
444         // already done for all three vertices, bail...
445         return;
446     }
447     fetch_texcoord_impl(c, v0, v1, v2);
448 }
449 
450 // ----------------------------------------------------------------------------
451 #if 0
452 #pragma mark -
453 #pragma mark Point
454 #endif
455 
primitive_nop_point(ogles_context_t *,vertex_t *)456 void primitive_nop_point(ogles_context_t*, vertex_t*) {
457 }
458 
primitive_point(ogles_context_t * c,vertex_t * v)459 void primitive_point(ogles_context_t* c, vertex_t* v)
460 {
461     // lighting & clamping...
462     const uint32_t enables = c->rasterizer.state.enables;
463 
464     if (ggl_unlikely(!(v->flags & vertex_t::LIT))) {
465         if (c->lighting.enable) {
466             c->lighting.lightVertex(c, v);
467         } else {
468             v->flags |= vertex_t::LIT;
469             const GLvoid* cp = c->arrays.color.element(
470                     v->index & vertex_cache_t::INDEX_MASK);
471             c->arrays.color.fetch(c, v->color.v, cp);
472         }
473         if (enables & GGL_ENABLE_FOG) {
474             v->fog = c->fog.fog(c, v->eye.z);
475         }
476     }
477 
478     // XXX: we don't need to do that each-time
479     // if color array and lighting not enabled
480     c->rasterizer.procs.color4xv(c, v->color.v);
481 
482     // XXX: look into ES point-sprite extension
483     if (enables & GGL_ENABLE_TMUS) {
484         fetch_texcoord(c, v,v,v);
485         for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
486             if (!c->rasterizer.state.texture[i].enable)
487                 continue;
488             int32_t itt[8];
489             itt[1] = itt[2] = itt[4] = itt[5] = 0;
490             itt[6] = itt[7] = 16; // XXX: check that
491             if (c->rasterizer.state.texture[i].s_wrap == GGL_CLAMP) {
492                 int width = c->textures.tmu[i].texture->surface.width;
493                 itt[0] = v->texture[i].S * width;
494                 itt[6] = 0;
495             }
496             if (c->rasterizer.state.texture[i].t_wrap == GGL_CLAMP) {
497                 int height = c->textures.tmu[i].texture->surface.height;
498                 itt[3] = v->texture[i].T * height;
499                 itt[7] = 0;
500             }
501             c->rasterizer.procs.texCoordGradScale8xv(c, i, itt);
502         }
503     }
504 
505     if (enables & GGL_ENABLE_DEPTH_TEST) {
506         int32_t itz[3];
507         itz[0] = clampZ(v->window.z) * 0x00010001;
508         itz[1] = itz[2] = 0;
509         c->rasterizer.procs.zGrad3xv(c, itz);
510     }
511 
512     if (enables & GGL_ENABLE_FOG) {
513         GLfixed itf[3];
514         itf[0] = v->fog;
515         itf[1] = itf[2] = 0;
516         c->rasterizer.procs.fogGrad3xv(c, itf);
517     }
518 
519     // Render our point...
520     c->rasterizer.procs.pointx(c, v->window.v, c->point.size);
521 }
522 
523 // ----------------------------------------------------------------------------
524 #if 0
525 #pragma mark -
526 #pragma mark Line
527 #endif
528 
primitive_nop_line(ogles_context_t *,vertex_t *,vertex_t *)529 void primitive_nop_line(ogles_context_t*, vertex_t*, vertex_t*) {
530 }
531 
primitive_line(ogles_context_t * c,vertex_t * v0,vertex_t * v1)532 void primitive_line(ogles_context_t* c, vertex_t* v0, vertex_t* v1)
533 {
534     // get texture coordinates
535     fetch_texcoord(c, v0, v1, v1);
536 
537     // light/shade the vertices first (they're copied below)
538     c->lighting.lightTriangle(c, v0, v1, v1);
539 
540     // clip the line if needed
541     if (ggl_unlikely((v0->flags | v1->flags) & vertex_t::CLIP_ALL)) {
542         unsigned int count = clip_line(c, v0, v1);
543         if (ggl_unlikely(count == 0))
544             return;
545     }
546 
547     // compute iterators...
548     const uint32_t enables = c->rasterizer.state.enables;
549     const uint32_t mask =   GGL_ENABLE_TMUS |
550                             GGL_ENABLE_SMOOTH |
551                             GGL_ENABLE_W |
552                             GGL_ENABLE_FOG |
553                             GGL_ENABLE_DEPTH_TEST;
554 
555     if (ggl_unlikely(enables & mask)) {
556         c->lerp.initLine(v0, v1);
557         lerp_triangle(c, v0, v1, v0);
558     }
559 
560     // render our line
561     c->rasterizer.procs.linex(c, v0->window.v, v1->window.v, c->line.width);
562 }
563 
564 // ----------------------------------------------------------------------------
565 #if 0
566 #pragma mark -
567 #pragma mark Triangle
568 #endif
569 
primitive_nop_triangle(ogles_context_t *,vertex_t *,vertex_t *,vertex_t *)570 void primitive_nop_triangle(ogles_context_t* /*c*/,
571         vertex_t* /*v0*/, vertex_t* /*v1*/, vertex_t* /*v2*/) {
572 }
573 
primitive_clip_triangle(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)574 void primitive_clip_triangle(ogles_context_t* c,
575         vertex_t* v0, vertex_t* v1, vertex_t* v2)
576 {
577     uint32_t cc = (v0->flags | v1->flags | v2->flags) & vertex_t::CLIP_ALL;
578     if (ggl_likely(!cc)) {
579         // code below must be as optimized as possible, this is the
580         // common code path.
581 
582         // This triangle is not clipped, test if it's culled
583         // unclipped triangle...
584         c->lerp.initTriangle(v0, v1, v2);
585         if (cull_triangle(c, v0, v1, v2))
586             return; // culled!
587 
588         // Fetch all texture coordinates if needed
589         fetch_texcoord(c, v0, v1, v2);
590 
591         // light (or shade) our triangle!
592         c->lighting.lightTriangle(c, v0, v1, v2);
593 
594         triangle(c, v0, v1, v2);
595         return;
596     }
597 
598     // The assumption here is that we're not going to clip very often,
599     // and even more rarely will we clip a triangle that ends up
600     // being culled out. So it's okay to light the vertices here, even though
601     // in a few cases we won't render the triangle (if culled).
602 
603     // Fetch texture coordinates...
604     fetch_texcoord(c, v0, v1, v2);
605 
606     // light (or shade) our triangle!
607     c->lighting.lightTriangle(c, v0, v1, v2);
608 
609     clip_triangle(c, v0, v1, v2);
610 }
611 
612 // -----------------------------------------------------------------------
613 
triangle(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)614 void triangle(ogles_context_t* c,
615         vertex_t* v0, vertex_t* v1, vertex_t* v2)
616 {
617     // compute iterators...
618     const uint32_t enables = c->rasterizer.state.enables;
619     const uint32_t mask =   GGL_ENABLE_TMUS |
620                             GGL_ENABLE_SMOOTH |
621                             GGL_ENABLE_W |
622                             GGL_ENABLE_FOG |
623                             GGL_ENABLE_DEPTH_TEST;
624 
625     if (ggl_likely(enables & mask))
626         lerp_triangle(c, v0, v1, v2);
627 
628     c->rasterizer.procs.trianglex(c, v0->window.v, v1->window.v, v2->window.v);
629 }
630 
lerp_triangle(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)631 void lerp_triangle(ogles_context_t* c,
632         vertex_t* v0, vertex_t* v1, vertex_t* v2)
633 {
634     const uint32_t enables = c->rasterizer.state.enables;
635     c->lerp.initLerp(v0, enables);
636 
637     // set up texture iterators
638     if (enables & GGL_ENABLE_TMUS) {
639         if (enables & GGL_ENABLE_W) {
640             lerp_texcoords_w(c, v0, v1, v2);
641         } else {
642             lerp_texcoords(c, v0, v1, v2);
643         }
644     }
645 
646     // set up the color iterators
647     const compute_iterators_t& lerp = c->lerp;
648     if (enables & GGL_ENABLE_SMOOTH) {
649         GLfixed itc[12];
650         for (int i=0 ; i<4 ; i++) {
651             const GGLcolor c0 = v0->color.v[i] * 255;
652             const GGLcolor c1 = v1->color.v[i] * 255;
653             const GGLcolor c2 = v2->color.v[i] * 255;
654             lerp.iterators1616(&itc[i*3], c0, c1, c2);
655         }
656         c->rasterizer.procs.colorGrad12xv(c, itc);
657     }
658 
659     if (enables & GGL_ENABLE_DEPTH_TEST) {
660         int32_t itz[3];
661         const int32_t v0z = clampZ(v0->window.z);
662         const int32_t v1z = clampZ(v1->window.z);
663         const int32_t v2z = clampZ(v2->window.z);
664         if (ggl_unlikely(c->polygonOffset.enable)) {
665             const int32_t units = (c->polygonOffset.units << 16);
666             const GLfixed factor = c->polygonOffset.factor;
667             if (factor) {
668                 int64_t itz64[3];
669                 lerp.iterators0032(itz64, v0z, v1z, v2z);
670                 int64_t maxDepthSlope = max(itz64[1], itz64[2]);
671                 itz[0] = uint32_t(itz64[0])
672                         + uint32_t((maxDepthSlope*factor)>>16) + units;
673                 itz[1] = uint32_t(itz64[1]);
674                 itz[2] = uint32_t(itz64[2]);
675             } else {
676                 lerp.iterators0032(itz, v0z, v1z, v2z);
677                 itz[0] += units;
678             }
679         } else {
680             lerp.iterators0032(itz, v0z, v1z, v2z);
681         }
682         c->rasterizer.procs.zGrad3xv(c, itz);
683     }
684 
685     if (ggl_unlikely(enables & GGL_ENABLE_FOG)) {
686         GLfixed itf[3];
687         lerp.iterators1616(itf, v0->fog, v1->fog, v2->fog);
688         c->rasterizer.procs.fogGrad3xv(c, itf);
689     }
690 }
691 
692 
693 static inline
compute_lod(ogles_context_t * c,int i,int32_t s0,int32_t t0,int32_t s1,int32_t t1,int32_t s2,int32_t t2)694 int compute_lod(ogles_context_t* c, int i,
695         int32_t s0, int32_t t0, int32_t s1, int32_t t1, int32_t s2, int32_t t2)
696 {
697     // Compute mipmap level / primitive
698     // rho = sqrt( texelArea / area )
699     // lod = log2( rho )
700     // lod = log2( texelArea / area ) / 2
701     // lod = (log2( texelArea ) - log2( area )) / 2
702     const compute_iterators_t& lerp = c->lerp;
703     const GGLcoord area = abs(lerp.area());
704     const int w = c->textures.tmu[i].texture->surface.width;
705     const int h = c->textures.tmu[i].texture->surface.height;
706     const int shift = 16 + (16 - TRI_FRACTION_BITS);
707     int32_t texelArea = abs( gglMulx(s1-s0, t2-t0, shift) -
708             gglMulx(s2-s0, t1-t0, shift) )*w*h;
709     int log2TArea = (32-TRI_FRACTION_BITS  -1) - gglClz(texelArea);
710     int log2Area  = (32-TRI_FRACTION_BITS*2-1) - gglClz(area);
711     int lod = (log2TArea - log2Area + 1) >> 1;
712     return lod;
713 }
714 
lerp_texcoords(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)715 void lerp_texcoords(ogles_context_t* c,
716         vertex_t* v0, vertex_t* v1, vertex_t* v2)
717 {
718     const compute_iterators_t& lerp = c->lerp;
719     int32_t itt[8] __attribute__((aligned(16)));
720     for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
721         const texture_t& tmu = c->rasterizer.state.texture[i];
722         if (!tmu.enable)
723             continue;
724 
725         // compute the jacobians using block floating-point
726         int32_t s0 = v0->texture[i].S;
727         int32_t t0 = v0->texture[i].T;
728         int32_t s1 = v1->texture[i].S;
729         int32_t t1 = v1->texture[i].T;
730         int32_t s2 = v2->texture[i].S;
731         int32_t t2 = v2->texture[i].T;
732 
733         const GLenum min_filter = c->textures.tmu[i].texture->min_filter;
734         if (ggl_unlikely(min_filter >= GL_NEAREST_MIPMAP_NEAREST)) {
735             int lod = compute_lod(c, i, s0, t0, s1, t1, s2, t2);
736             c->rasterizer.procs.bindTextureLod(c, i,
737                     &c->textures.tmu[i].texture->mip(lod));
738         }
739 
740         // premultiply (s,t) when clampling
741         if (tmu.s_wrap == GGL_CLAMP) {
742             const int width = tmu.surface.width;
743             s0 *= width;
744             s1 *= width;
745             s2 *= width;
746         }
747         if (tmu.t_wrap == GGL_CLAMP) {
748             const int height = tmu.surface.height;
749             t0 *= height;
750             t1 *= height;
751             t2 *= height;
752         }
753         itt[6] = -lerp.iteratorsScale(itt+0, s0, s1, s2);
754         itt[7] = -lerp.iteratorsScale(itt+3, t0, t1, t2);
755         c->rasterizer.procs.texCoordGradScale8xv(c, i, itt);
756     }
757 }
758 
lerp_texcoords_w(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)759 void lerp_texcoords_w(ogles_context_t* c,
760         vertex_t* v0, vertex_t* v1, vertex_t* v2)
761 {
762     const compute_iterators_t& lerp = c->lerp;
763     int32_t itt[8] __attribute__((aligned(16)));
764     int32_t itw[3];
765 
766     // compute W's scale to 2.30
767     int32_t w0 = v0->window.w;
768     int32_t w1 = v1->window.w;
769     int32_t w2 = v2->window.w;
770     int wscale = 32 - gglClz(w0|w1|w2);
771 
772     // compute the jacobian using block floating-point
773     int sc = lerp.iteratorsScale(itw, w0, w1, w2);
774     sc +=  wscale - 16;
775     c->rasterizer.procs.wGrad3xv(c, itw);
776 
777     for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
778         const texture_t& tmu = c->rasterizer.state.texture[i];
779         if (!tmu.enable)
780             continue;
781 
782         // compute the jacobians using block floating-point
783         int32_t s0 = v0->texture[i].S;
784         int32_t t0 = v0->texture[i].T;
785         int32_t s1 = v1->texture[i].S;
786         int32_t t1 = v1->texture[i].T;
787         int32_t s2 = v2->texture[i].S;
788         int32_t t2 = v2->texture[i].T;
789 
790         const GLenum min_filter = c->textures.tmu[i].texture->min_filter;
791         if (ggl_unlikely(min_filter >= GL_NEAREST_MIPMAP_NEAREST)) {
792             int lod = compute_lod(c, i, s0, t0, s1, t1, s2, t2);
793             c->rasterizer.procs.bindTextureLod(c, i,
794                     &c->textures.tmu[i].texture->mip(lod));
795         }
796 
797         // premultiply (s,t) when clampling
798         if (tmu.s_wrap == GGL_CLAMP) {
799             const int width = tmu.surface.width;
800             s0 *= width;
801             s1 *= width;
802             s2 *= width;
803         }
804         if (tmu.t_wrap == GGL_CLAMP) {
805             const int height = tmu.surface.height;
806             t0 *= height;
807             t1 *= height;
808             t2 *= height;
809         }
810 
811         s0 = gglMulx(s0, w0, wscale);
812         t0 = gglMulx(t0, w0, wscale);
813         s1 = gglMulx(s1, w1, wscale);
814         t1 = gglMulx(t1, w1, wscale);
815         s2 = gglMulx(s2, w2, wscale);
816         t2 = gglMulx(t2, w2, wscale);
817 
818         itt[6] = sc - lerp.iteratorsScale(itt+0, s0, s1, s2);
819         itt[7] = sc - lerp.iteratorsScale(itt+3, t0, t1, t2);
820         c->rasterizer.procs.texCoordGradScale8xv(c, i, itt);
821     }
822 }
823 
824 
825 static inline
cull_triangle(ogles_context_t * c,vertex_t *,vertex_t *,vertex_t *)826 bool cull_triangle(ogles_context_t* c, vertex_t* /*v0*/, vertex_t* /*v1*/, vertex_t* /*v2*/)
827 {
828     if (ggl_likely(c->cull.enable)) {
829         const GLenum winding = (c->lerp.area() > 0) ? GL_CW : GL_CCW;
830         const GLenum face = (winding == c->cull.frontFace) ? GL_FRONT : GL_BACK;
831         if (face == c->cull.cullFace)
832             return true; // culled!
833     }
834     return false;
835 }
836 
837 static inline
frustumPlaneDist(int plane,const vec4_t & s)838 GLfixed frustumPlaneDist(int plane, const vec4_t& s)
839 {
840     const GLfixed d = s.v[ plane >> 1 ];
841     return  ((plane & 1) ? (s.w - d) : (s.w + d));
842 }
843 
844 static inline
clipDivide(GLfixed a,GLfixed b)845 int32_t clipDivide(GLfixed a, GLfixed b) {
846     // returns a 4.28 fixed-point
847     return gglMulDivi(1LU<<28, a, b);
848 }
849 
clip_triangle(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)850 void clip_triangle(ogles_context_t* c,
851         vertex_t* v0, vertex_t* v1, vertex_t* v2)
852 {
853     uint32_t all_cc = (v0->flags | v1->flags | v2->flags) & vertex_t::CLIP_ALL;
854 
855     vertex_t *p0, *p1, *p2;
856     const int MAX_CLIPPING_PLANES = 6 + OGLES_MAX_CLIP_PLANES;
857     const int MAX_VERTICES = 3;
858 
859     // Temporary buffer to hold the new vertices. Each plane can add up to
860     // two new vertices (because the polygon is convex).
861     // We need one extra element, to handle an overflow case when
862     // the polygon degenerates into something non convex.
863     vertex_t buffer[MAX_CLIPPING_PLANES * 2 + 1];   // ~3KB
864     vertex_t* buf = buffer;
865 
866     // original list of vertices (polygon to clip, in fact this
867     // function works with an arbitrary polygon).
868     vertex_t* in[3] = { v0, v1, v2 };
869 
870     // output lists (we need 2, which we use back and forth)
871     // (maximum outpout list's size is MAX_CLIPPING_PLANES + MAX_VERTICES)
872     // 2 more elements for overflow when non convex polygons.
873     vertex_t* out[2][MAX_CLIPPING_PLANES + MAX_VERTICES + 2];
874     unsigned int outi = 0;
875 
876     // current input list
877     vertex_t** ivl = in;
878 
879     // 3 input vertices, 0 in the output list, first plane
880     unsigned int ic = 3;
881 
882     // User clip-planes first, the clipping is always done in eye-coordinate
883     // this is basically the same algorithm than for the view-volume
884     // clipping, except for the computation of the distance (vertex, plane)
885     // and the fact that we need to compute the eye-coordinates of each
886     // new vertex we create.
887 
888     if (ggl_unlikely(all_cc & vertex_t::USER_CLIP_ALL))
889     {
890         unsigned int plane = 0;
891         uint32_t cc = (all_cc & vertex_t::USER_CLIP_ALL) >> 8;
892         do {
893             if (cc & 1) {
894                 // pointers to our output list (head and current)
895                 vertex_t** const ovl = &out[outi][0];
896                 vertex_t** output = ovl;
897                 unsigned int oc = 0;
898                 unsigned int sentinel = 0;
899                 // previous vertex, compute distance to the plane
900                 vertex_t* s = ivl[ic-1];
901                 const vec4_t& equation = c->clipPlanes.plane[plane].equation;
902                 GLfixed sd = dot4(equation.v, s->eye.v);
903                 // clip each vertex against this plane...
904                 for (unsigned int i=0 ; i<ic ; i++) {
905                     vertex_t* p = ivl[i];
906                     const GLfixed pd = dot4(equation.v, p->eye.v);
907                     if (sd >= 0) {
908                         if (pd >= 0) {
909                             // both inside
910                             *output++ = p;
911                             oc++;
912                         } else {
913                             // s inside, p outside (exiting)
914                             const GLfixed t = clipDivide(sd, sd-pd);
915                             c->arrays.clipEye(c, buf, t, p, s);
916                             *output++ = buf++;
917                             oc++;
918                             if (++sentinel >= 3)
919                                 return; // non-convex polygon!
920                         }
921                     } else {
922                         if (pd >= 0) {
923                             // s outside (entering)
924                             if (pd) {
925                                 const GLfixed t = clipDivide(pd, pd-sd);
926                                 c->arrays.clipEye(c, buf, t, s, p);
927                                 *output++ = buf++;
928                                 oc++;
929                                 if (++sentinel >= 3)
930                                     return; // non-convex polygon!
931                             }
932                             *output++ = p;
933                             oc++;
934                         } else {
935                            // both outside
936                         }
937                     }
938                     s = p;
939                     sd = pd;
940                 }
941                 // output list become the new input list
942                 if (oc<3)
943                     return; // less than 3 vertices left? we're done!
944                 ivl = ovl;
945                 ic = oc;
946                 outi = 1-outi;
947             }
948             cc >>= 1;
949             plane++;
950         } while (cc);
951     }
952 
953     // frustum clip-planes
954     if (all_cc & vertex_t::FRUSTUM_CLIP_ALL)
955     {
956         unsigned int plane = 0;
957         uint32_t cc = all_cc & vertex_t::FRUSTUM_CLIP_ALL;
958         do {
959             if (cc & 1) {
960                 // pointers to our output list (head and current)
961                 vertex_t** const ovl = &out[outi][0];
962                 vertex_t** output = ovl;
963                 unsigned int oc = 0;
964                 unsigned int sentinel = 0;
965                 // previous vertex, compute distance to the plane
966                 vertex_t* s = ivl[ic-1];
967                 GLfixed sd = frustumPlaneDist(plane, s->clip);
968                 // clip each vertex against this plane...
969                 for (unsigned int i=0 ; i<ic ; i++) {
970                     vertex_t* p = ivl[i];
971                     const GLfixed pd = frustumPlaneDist(plane, p->clip);
972                     if (sd >= 0) {
973                         if (pd >= 0) {
974                             // both inside
975                             *output++ = p;
976                             oc++;
977                         } else {
978                             // s inside, p outside (exiting)
979                             const GLfixed t = clipDivide(sd, sd-pd);
980                             c->arrays.clipVertex(c, buf, t, p, s);
981                             *output++ = buf++;
982                             oc++;
983                             if (++sentinel >= 3)
984                                 return; // non-convex polygon!
985                         }
986                     } else {
987                         if (pd >= 0) {
988                             // s outside (entering)
989                             if (pd) {
990                                 const GLfixed t = clipDivide(pd, pd-sd);
991                                 c->arrays.clipVertex(c, buf, t, s, p);
992                                 *output++ = buf++;
993                                 oc++;
994                                 if (++sentinel >= 3)
995                                     return; // non-convex polygon!
996                             }
997                             *output++ = p;
998                             oc++;
999                         } else {
1000                            // both outside
1001                         }
1002                     }
1003                     s = p;
1004                     sd = pd;
1005                 }
1006                 // output list become the new input list
1007                 if (oc<3)
1008                     return; // less than 3 vertices left? we're done!
1009                 ivl = ovl;
1010                 ic = oc;
1011                 outi = 1-outi;
1012             }
1013             cc >>= 1;
1014             plane++;
1015         } while (cc);
1016     }
1017 
1018     // finally we can render our triangles...
1019     p0 = ivl[0];
1020     p1 = ivl[1];
1021     for (unsigned int i=2 ; i<ic ; i++) {
1022         p2 = ivl[i];
1023         c->lerp.initTriangle(p0, p1, p2);
1024         if (cull_triangle(c, p0, p1, p2)) {
1025             p1 = p2;
1026             continue; // culled!
1027         }
1028         triangle(c, p0, p1, p2);
1029         p1 = p2;
1030     }
1031 }
1032 
clip_line(ogles_context_t * c,vertex_t * s,vertex_t * p)1033 unsigned int clip_line(ogles_context_t* c, vertex_t* s, vertex_t* p)
1034 {
1035     const uint32_t all_cc = (s->flags | p->flags) & vertex_t::CLIP_ALL;
1036 
1037     if (ggl_unlikely(all_cc & vertex_t::USER_CLIP_ALL))
1038     {
1039         unsigned int plane = 0;
1040         uint32_t cc = (all_cc & vertex_t::USER_CLIP_ALL) >> 8;
1041         do {
1042             if (cc & 1) {
1043                 const vec4_t& equation = c->clipPlanes.plane[plane].equation;
1044                 const GLfixed sd = dot4(equation.v, s->eye.v);
1045                 const GLfixed pd = dot4(equation.v, p->eye.v);
1046                 if (sd >= 0) {
1047                     if (pd >= 0) {
1048                         // both inside
1049                     } else {
1050                         // s inside, p outside (exiting)
1051                         const GLfixed t = clipDivide(sd, sd-pd);
1052                         c->arrays.clipEye(c, p, t, p, s);
1053                     }
1054                 } else {
1055                     if (pd >= 0) {
1056                         // s outside (entering)
1057                         if (pd) {
1058                             const GLfixed t = clipDivide(pd, pd-sd);
1059                             c->arrays.clipEye(c, s, t, s, p);
1060                         }
1061                     } else {
1062                        // both outside
1063                        return 0;
1064                     }
1065                 }
1066             }
1067             cc >>= 1;
1068             plane++;
1069         } while (cc);
1070     }
1071 
1072     // frustum clip-planes
1073     if (all_cc & vertex_t::FRUSTUM_CLIP_ALL)
1074     {
1075         unsigned int plane = 0;
1076         uint32_t cc = all_cc & vertex_t::FRUSTUM_CLIP_ALL;
1077         do {
1078             if (cc & 1) {
1079                 const GLfixed sd = frustumPlaneDist(plane, s->clip);
1080                 const GLfixed pd = frustumPlaneDist(plane, p->clip);
1081                 if (sd >= 0) {
1082                     if (pd >= 0) {
1083                         // both inside
1084                     } else {
1085                         // s inside, p outside (exiting)
1086                         const GLfixed t = clipDivide(sd, sd-pd);
1087                         c->arrays.clipVertex(c, p, t, p, s);
1088                     }
1089                 } else {
1090                     if (pd >= 0) {
1091                         // s outside (entering)
1092                         if (pd) {
1093                             const GLfixed t = clipDivide(pd, pd-sd);
1094                             c->arrays.clipVertex(c, s, t, s, p);
1095                         }
1096                     } else {
1097                        // both outside
1098                        return 0;
1099                     }
1100                 }
1101             }
1102             cc >>= 1;
1103             plane++;
1104         } while (cc);
1105     }
1106 
1107     return 2;
1108 }
1109 
1110 
1111 }; // namespace android
1112