1 
2 /*
3  * Mesa 3-D graphics library
4  *
5  * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included
15  * in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23  * OTHER DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Keith Whitwell <keithw@vmware.com>
27  */
28 
29 /* Split indexed primitives with per-vertex copying.
30  */
31 
32 #include <stdio.h>
33 
34 #include "main/glheader.h"
35 #include "main/bufferobj.h"
36 #include "main/imports.h"
37 #include "main/glformats.h"
38 #include "main/macros.h"
39 #include "main/mtypes.h"
40 
41 #include "vbo_split.h"
42 #include "vbo.h"
43 
44 
45 #define ELT_TABLE_SIZE 16
46 
47 /**
48  * Used for vertex-level splitting of indexed buffers.  Note that
49  * non-indexed primitives may be converted to indexed in some cases
50  * (eg loops, fans) in order to use this splitting path.
51  */
52 struct copy_context {
53    struct gl_context *ctx;
54    const struct gl_vertex_array **array;
55    const struct _mesa_prim *prim;
56    GLuint nr_prims;
57    const struct _mesa_index_buffer *ib;
58    vbo_draw_func draw;
59 
60    const struct split_limits *limits;
61 
62    struct {
63       GLuint attr;
64       GLuint size;
65       const struct gl_vertex_array *array;
66       const GLubyte *src_ptr;
67 
68       struct gl_vertex_array dstarray;
69 
70    } varying[VERT_ATTRIB_MAX];
71    GLuint nr_varying;
72 
73    const struct gl_vertex_array *dstarray_ptr[VERT_ATTRIB_MAX];
74    struct _mesa_index_buffer dstib;
75 
76    GLuint *translated_elt_buf;
77    const GLuint *srcelt;
78 
79    /** A baby hash table to avoid re-emitting (some) duplicate
80     * vertices when splitting indexed primitives.
81     */
82    struct {
83       GLuint in;
84       GLuint out;
85    } vert_cache[ELT_TABLE_SIZE];
86 
87    GLuint vertex_size;
88    GLubyte *dstbuf;
89    GLubyte *dstptr;     /**< dstptr == dstbuf + dstelt_max * vertsize */
90    GLuint dstbuf_size;  /**< in vertices */
91    GLuint dstbuf_nr;    /**< count of emitted vertices, also the largest value
92                          * in dstelt.  Our MaxIndex.
93                          */
94 
95    GLuint *dstelt;
96    GLuint dstelt_nr;
97    GLuint dstelt_size;
98 
99 #define MAX_PRIM 32
100    struct _mesa_prim dstprim[MAX_PRIM];
101    GLuint dstprim_nr;
102 };
103 
104 
105 static GLuint
attr_size(const struct gl_vertex_array * array)106 attr_size(const struct gl_vertex_array *array)
107 {
108    return array->Size * _mesa_sizeof_type(array->Type);
109 }
110 
111 
112 /**
113  * Starts returning true slightly before the buffer fills, to ensure
114  * that there is sufficient room for any remaining vertices to finish
115  * off the prim:
116  */
117 static GLboolean
check_flush(struct copy_context * copy)118 check_flush(struct copy_context *copy)
119 {
120    GLenum mode = copy->dstprim[copy->dstprim_nr].mode;
121 
122    if (GL_TRIANGLE_STRIP == mode &&
123        copy->dstelt_nr & 1) { /* see bug9962 */
124        return GL_FALSE;
125    }
126 
127    if (copy->dstbuf_nr + 4 > copy->dstbuf_size)
128       return GL_TRUE;
129 
130    if (copy->dstelt_nr + 4 > copy->dstelt_size)
131       return GL_TRUE;
132 
133    return GL_FALSE;
134 }
135 
136 
137 /**
138  * Dump the parameters/info for a vbo->draw() call.
139  */
140 static void
dump_draw_info(struct gl_context * ctx,const struct gl_vertex_array ** arrays,const struct _mesa_prim * prims,GLuint nr_prims,const struct _mesa_index_buffer * ib,GLuint min_index,GLuint max_index)141 dump_draw_info(struct gl_context *ctx,
142                const struct gl_vertex_array **arrays,
143                const struct _mesa_prim *prims,
144                GLuint nr_prims,
145                const struct _mesa_index_buffer *ib,
146                GLuint min_index,
147                GLuint max_index)
148 {
149    GLuint i, j;
150 
151    printf("VBO Draw:\n");
152    for (i = 0; i < nr_prims; i++) {
153       printf("Prim %u of %u\n", i, nr_prims);
154       printf("  Prim mode 0x%x\n", prims[i].mode);
155       printf("  IB: %p\n", (void*) ib);
156       for (j = 0; j < VERT_ATTRIB_MAX; j++) {
157          printf("    array %d at %p:\n", j, (void*) arrays[j]);
158          printf("      ptr %p, size %d, type 0x%x, stride %d\n",
159                 arrays[j]->Ptr,
160                 arrays[j]->Size, arrays[j]->Type, arrays[j]->StrideB);
161          if (0) {
162             GLint k = prims[i].start + prims[i].count - 1;
163             GLfloat *last = (GLfloat *) (arrays[j]->Ptr + arrays[j]->StrideB * k);
164             printf("        last: %f %f %f\n",
165                    last[0], last[1], last[2]);
166          }
167       }
168    }
169 }
170 
171 
172 static void
flush(struct copy_context * copy)173 flush(struct copy_context *copy)
174 {
175    struct gl_context *ctx = copy->ctx;
176    const struct gl_vertex_array **saved_arrays = ctx->Array._DrawArrays;
177    GLuint i;
178 
179    /* Set some counters:
180     */
181    copy->dstib.count = copy->dstelt_nr;
182 
183 #if 0
184    dump_draw_info(copy->ctx,
185                   copy->dstarray_ptr,
186                   copy->dstprim,
187                   copy->dstprim_nr,
188                   &copy->dstib,
189                   0,
190                   copy->dstbuf_nr);
191 #else
192    (void) dump_draw_info;
193 #endif
194 
195    ctx->Array._DrawArrays = copy->dstarray_ptr;
196    ctx->NewDriverState |= ctx->DriverFlags.NewArray;
197 
198    copy->draw(ctx,
199               copy->dstprim,
200               copy->dstprim_nr,
201               &copy->dstib,
202               GL_TRUE,
203               0,
204               copy->dstbuf_nr - 1,
205               NULL, 0, NULL);
206 
207    ctx->Array._DrawArrays = saved_arrays;
208    ctx->NewDriverState |= ctx->DriverFlags.NewArray;
209 
210    /* Reset all pointers:
211     */
212    copy->dstprim_nr = 0;
213    copy->dstelt_nr = 0;
214    copy->dstbuf_nr = 0;
215    copy->dstptr = copy->dstbuf;
216 
217    /* Clear the vertex cache:
218     */
219    for (i = 0; i < ELT_TABLE_SIZE; i++)
220       copy->vert_cache[i].in = ~0;
221 }
222 
223 
224 /**
225  * Called at begin of each primitive during replay.
226  */
227 static void
begin(struct copy_context * copy,GLenum mode,GLboolean begin_flag)228 begin(struct copy_context *copy, GLenum mode, GLboolean begin_flag)
229 {
230    struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
231 
232    prim->mode = mode;
233    prim->begin = begin_flag;
234    prim->num_instances = 1;
235 }
236 
237 
238 /**
239  * Use a hashtable to attempt to identify recently-emitted vertices
240  * and avoid re-emitting them.
241  */
242 static GLuint
elt(struct copy_context * copy,GLuint elt_idx)243 elt(struct copy_context *copy, GLuint elt_idx)
244 {
245    GLuint elt = copy->srcelt[elt_idx] + copy->prim->basevertex;
246    GLuint slot = elt & (ELT_TABLE_SIZE-1);
247 
248    /* Look up the incoming element in the vertex cache.  Re-emit if
249     * necessary.
250     */
251    if (copy->vert_cache[slot].in != elt) {
252       GLubyte *csr = copy->dstptr;
253       GLuint i;
254 
255       for (i = 0; i < copy->nr_varying; i++) {
256          const struct gl_vertex_array *srcarray = copy->varying[i].array;
257          const GLubyte *srcptr = copy->varying[i].src_ptr + elt * srcarray->StrideB;
258 
259          memcpy(csr, srcptr, copy->varying[i].size);
260          csr += copy->varying[i].size;
261 
262 #ifdef NAN_CHECK
263          if (srcarray->Type == GL_FLOAT) {
264             GLuint k;
265             GLfloat *f = (GLfloat *) srcptr;
266             for (k = 0; k < srcarray->Size; k++) {
267                assert(!IS_INF_OR_NAN(f[k]));
268                assert(f[k] <= 1.0e20 && f[k] >= -1.0e20);
269             }
270          }
271 #endif
272 
273          if (0) {
274             const GLuint *f = (const GLuint *)srcptr;
275             GLuint j;
276             printf("  varying %d: ", i);
277             for (j = 0; j < copy->varying[i].size / 4; j++)
278                printf("%x ", f[j]);
279             printf("\n");
280          }
281       }
282 
283       copy->vert_cache[slot].in = elt;
284       copy->vert_cache[slot].out = copy->dstbuf_nr++;
285       copy->dstptr += copy->vertex_size;
286 
287       assert(csr == copy->dstptr);
288       assert(copy->dstptr == (copy->dstbuf +
289                               copy->dstbuf_nr * copy->vertex_size));
290    }
291 
292    copy->dstelt[copy->dstelt_nr++] = copy->vert_cache[slot].out;
293    return check_flush(copy);
294 }
295 
296 
297 /**
298  * Called at end of each primitive during replay.
299  */
300 static void
end(struct copy_context * copy,GLboolean end_flag)301 end(struct copy_context *copy, GLboolean end_flag)
302 {
303    struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
304 
305    prim->end = end_flag;
306    prim->count = copy->dstelt_nr - prim->start;
307 
308    if (++copy->dstprim_nr == MAX_PRIM || check_flush(copy)) {
309       flush(copy);
310    }
311 }
312 
313 
314 static void
replay_elts(struct copy_context * copy)315 replay_elts(struct copy_context *copy)
316 {
317    GLuint i, j, k;
318    GLboolean split;
319 
320    for (i = 0; i < copy->nr_prims; i++) {
321       const struct _mesa_prim *prim = &copy->prim[i];
322       const GLuint start = prim->start;
323       GLuint first, incr;
324 
325       switch (prim->mode) {
326       case GL_LINE_LOOP:
327          /* Convert to linestrip and emit the final vertex explicitly,
328           * but only in the resultant strip that requires it.
329           */
330          j = 0;
331          while (j != prim->count) {
332             begin(copy, GL_LINE_STRIP, prim->begin && j == 0);
333 
334             for (split = GL_FALSE; j != prim->count && !split; j++)
335                split = elt(copy, start + j);
336 
337             if (j == prim->count) {
338                /* Done, emit final line.  Split doesn't matter as
339                 * it is always raised a bit early so we can emit
340                 * the last verts if necessary!
341                 */
342                if (prim->end)
343                   (void)elt(copy, start + 0);
344 
345                end(copy, prim->end);
346             }
347             else {
348                /* Wrap
349                 */
350                assert(split);
351                end(copy, 0);
352                j--;
353             }
354          }
355          break;
356 
357       case GL_TRIANGLE_FAN:
358       case GL_POLYGON:
359          j = 2;
360          while (j != prim->count) {
361             begin(copy, prim->mode, prim->begin && j == 0);
362 
363             split = elt(copy, start+0);
364             assert(!split);
365 
366             split = elt(copy, start+j-1);
367             assert(!split);
368 
369             for (; j != prim->count && !split; j++)
370                split = elt(copy, start+j);
371 
372             end(copy, prim->end && j == prim->count);
373 
374             if (j != prim->count) {
375                /* Wrapped the primitive, need to repeat some vertices:
376                 */
377                j -= 1;
378             }
379          }
380          break;
381 
382       default:
383          (void)split_prim_inplace(prim->mode, &first, &incr);
384 
385          j = 0;
386          while (j != prim->count) {
387 
388             begin(copy, prim->mode, prim->begin && j == 0);
389 
390             split = 0;
391             for (k = 0; k < first; k++, j++)
392                split |= elt(copy, start+j);
393 
394             assert(!split);
395 
396             for (; j != prim->count && !split;)
397                for (k = 0; k < incr; k++, j++)
398                   split |= elt(copy, start+j);
399 
400             end(copy, prim->end && j == prim->count);
401 
402             if (j != prim->count) {
403                /* Wrapped the primitive, need to repeat some vertices:
404                 */
405                assert(j > first - incr);
406                j -= (first - incr);
407             }
408          }
409          break;
410       }
411    }
412 
413    if (copy->dstprim_nr)
414       flush(copy);
415 }
416 
417 
418 static void
replay_init(struct copy_context * copy)419 replay_init(struct copy_context *copy)
420 {
421    struct gl_context *ctx = copy->ctx;
422    GLuint i;
423    GLuint offset;
424    const GLvoid *srcptr;
425 
426    /* Make a list of varying attributes and their vbo's.  Also
427     * calculate vertex size.
428     */
429    copy->vertex_size = 0;
430    for (i = 0; i < VERT_ATTRIB_MAX; i++) {
431       struct gl_buffer_object *vbo = copy->array[i]->BufferObj;
432 
433       if (copy->array[i]->StrideB == 0) {
434          copy->dstarray_ptr[i] = copy->array[i];
435       }
436       else {
437          GLuint j = copy->nr_varying++;
438 
439          copy->varying[j].attr = i;
440          copy->varying[j].array = copy->array[i];
441          copy->varying[j].size = attr_size(copy->array[i]);
442          copy->vertex_size += attr_size(copy->array[i]);
443 
444          if (_mesa_is_bufferobj(vbo) &&
445              !_mesa_bufferobj_mapped(vbo, MAP_INTERNAL))
446             ctx->Driver.MapBufferRange(ctx, 0, vbo->Size, GL_MAP_READ_BIT, vbo,
447                                        MAP_INTERNAL);
448 
449          copy->varying[j].src_ptr =
450                ADD_POINTERS(vbo->Mappings[MAP_INTERNAL].Pointer,
451                             copy->array[i]->Ptr);
452 
453          copy->dstarray_ptr[i] = &copy->varying[j].dstarray;
454       }
455    }
456 
457    /* There must always be an index buffer.  Currently require the
458     * caller convert non-indexed prims to indexed.  Could alternately
459     * do it internally.
460     */
461    if (_mesa_is_bufferobj(copy->ib->obj) &&
462        !_mesa_bufferobj_mapped(copy->ib->obj, MAP_INTERNAL))
463       ctx->Driver.MapBufferRange(ctx, 0, copy->ib->obj->Size, GL_MAP_READ_BIT,
464                                  copy->ib->obj, MAP_INTERNAL);
465 
466    srcptr = (const GLubyte *)
467             ADD_POINTERS(copy->ib->obj->Mappings[MAP_INTERNAL].Pointer,
468                          copy->ib->ptr);
469 
470    switch (copy->ib->index_size) {
471    case 1:
472       copy->translated_elt_buf = malloc(sizeof(GLuint) * copy->ib->count);
473       copy->srcelt = copy->translated_elt_buf;
474 
475       for (i = 0; i < copy->ib->count; i++)
476          copy->translated_elt_buf[i] = ((const GLubyte *)srcptr)[i];
477       break;
478 
479    case 2:
480       copy->translated_elt_buf = malloc(sizeof(GLuint) * copy->ib->count);
481       copy->srcelt = copy->translated_elt_buf;
482 
483       for (i = 0; i < copy->ib->count; i++)
484          copy->translated_elt_buf[i] = ((const GLushort *)srcptr)[i];
485       break;
486 
487    case 4:
488       copy->translated_elt_buf = NULL;
489       copy->srcelt = (const GLuint *)srcptr;
490       break;
491    }
492 
493    /* Figure out the maximum allowed vertex buffer size:
494     */
495    if (copy->vertex_size * copy->limits->max_verts <= copy->limits->max_vb_size) {
496       copy->dstbuf_size = copy->limits->max_verts;
497    }
498    else {
499       copy->dstbuf_size = copy->limits->max_vb_size / copy->vertex_size;
500    }
501 
502    /* Allocate an output vertex buffer:
503     *
504     * XXX:  This should be a VBO!
505     */
506    copy->dstbuf = malloc(copy->dstbuf_size * copy->vertex_size);
507    copy->dstptr = copy->dstbuf;
508 
509    /* Setup new vertex arrays to point into the output buffer:
510     */
511    for (offset = 0, i = 0; i < copy->nr_varying; i++) {
512       const struct gl_vertex_array *src = copy->varying[i].array;
513       struct gl_vertex_array *dst = &copy->varying[i].dstarray;
514 
515       dst->Size = src->Size;
516       dst->Type = src->Type;
517       dst->Format = GL_RGBA;
518       dst->StrideB = copy->vertex_size;
519       dst->Ptr = copy->dstbuf + offset;
520       dst->Normalized = src->Normalized;
521       dst->Integer = src->Integer;
522       dst->Doubles = src->Doubles;
523       dst->BufferObj = ctx->Shared->NullBufferObj;
524       dst->_ElementSize = src->_ElementSize;
525 
526       offset += copy->varying[i].size;
527    }
528 
529    /* Allocate an output element list:
530     */
531    copy->dstelt_size = MIN2(65536, copy->ib->count * 2 + 3);
532    copy->dstelt_size = MIN2(copy->dstelt_size, copy->limits->max_indices);
533    copy->dstelt = malloc(sizeof(GLuint) * copy->dstelt_size);
534    copy->dstelt_nr = 0;
535 
536    /* Setup the new index buffer to point to the allocated element
537     * list:
538     */
539    copy->dstib.count = 0;        /* duplicates dstelt_nr */
540    copy->dstib.index_size = 4;
541    copy->dstib.obj = ctx->Shared->NullBufferObj;
542    copy->dstib.ptr = copy->dstelt;
543 }
544 
545 
546 /**
547  * Free up everything allocated during split/replay.
548  */
549 static void
replay_finish(struct copy_context * copy)550 replay_finish(struct copy_context *copy)
551 {
552    struct gl_context *ctx = copy->ctx;
553    GLuint i;
554 
555    /* Free our vertex and index buffers */
556    free(copy->translated_elt_buf);
557    free(copy->dstbuf);
558    free(copy->dstelt);
559 
560    /* Unmap VBO's */
561    for (i = 0; i < copy->nr_varying; i++) {
562       struct gl_buffer_object *vbo = copy->varying[i].array->BufferObj;
563       if (_mesa_is_bufferobj(vbo) && _mesa_bufferobj_mapped(vbo, MAP_INTERNAL))
564          ctx->Driver.UnmapBuffer(ctx, vbo, MAP_INTERNAL);
565    }
566 
567    /* Unmap index buffer */
568    if (_mesa_is_bufferobj(copy->ib->obj) &&
569        _mesa_bufferobj_mapped(copy->ib->obj, MAP_INTERNAL)) {
570       ctx->Driver.UnmapBuffer(ctx, copy->ib->obj, MAP_INTERNAL);
571    }
572 }
573 
574 
575 /**
576  * Split VBO into smaller pieces, draw the pieces.
577  */
578 void
vbo_split_copy(struct gl_context * ctx,const struct gl_vertex_array * arrays[],const struct _mesa_prim * prim,GLuint nr_prims,const struct _mesa_index_buffer * ib,vbo_draw_func draw,const struct split_limits * limits)579 vbo_split_copy(struct gl_context *ctx,
580                const struct gl_vertex_array *arrays[],
581                const struct _mesa_prim *prim,
582                GLuint nr_prims,
583                const struct _mesa_index_buffer *ib,
584                vbo_draw_func draw,
585                const struct split_limits *limits)
586 {
587    struct copy_context copy;
588    GLuint i, this_nr_prims;
589 
590    for (i = 0; i < nr_prims;) {
591       /* Our SW TNL pipeline doesn't handle basevertex yet, so bind_indices
592        * will rebase the elements to the basevertex, and we'll only
593        * emit strings of prims with the same basevertex in one draw call.
594        */
595       for (this_nr_prims = 1; i + this_nr_prims < nr_prims;
596            this_nr_prims++) {
597          if (prim[i].basevertex != prim[i + this_nr_prims].basevertex)
598             break;
599       }
600 
601       memset(&copy, 0, sizeof(copy));
602 
603       /* Require indexed primitives:
604        */
605       assert(ib);
606 
607       copy.ctx = ctx;
608       copy.array = arrays;
609       copy.prim = &prim[i];
610       copy.nr_prims = this_nr_prims;
611       copy.ib = ib;
612       copy.draw = draw;
613       copy.limits = limits;
614 
615       /* Clear the vertex cache:
616        */
617       for (i = 0; i < ELT_TABLE_SIZE; i++)
618          copy.vert_cache[i].in = ~0;
619 
620       replay_init(&copy);
621       replay_elts(&copy);
622       replay_finish(&copy);
623    }
624 }
625