1 
2 /*
3  * Mesa 3-D graphics library
4  *
5  * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included
15  * in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23  * OTHER DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Keith Whitwell <keithw@vmware.com>
27  */
28 
29 /* Split indexed primitives with per-vertex copying.
30  */
31 
32 #include <stdio.h>
33 
34 #include "main/glheader.h"
35 #include "main/bufferobj.h"
36 #include "main/imports.h"
37 #include "main/glformats.h"
38 #include "main/macros.h"
39 #include "main/mtypes.h"
40 
41 #include "vbo_split.h"
42 #include "vbo.h"
43 
44 
45 #define ELT_TABLE_SIZE 16
46 
47 /**
48  * Used for vertex-level splitting of indexed buffers.  Note that
49  * non-indexed primitives may be converted to indexed in some cases
50  * (eg loops, fans) in order to use this splitting path.
51  */
52 struct copy_context {
53 
54    struct gl_context *ctx;
55    const struct gl_vertex_array **array;
56    const struct _mesa_prim *prim;
57    GLuint nr_prims;
58    const struct _mesa_index_buffer *ib;
59    vbo_draw_func draw;
60 
61    const struct split_limits *limits;
62 
63    struct {
64       GLuint attr;
65       GLuint size;
66       const struct gl_vertex_array *array;
67       const GLubyte *src_ptr;
68 
69       struct gl_vertex_array dstarray;
70 
71    } varying[VERT_ATTRIB_MAX];
72    GLuint nr_varying;
73 
74    const struct gl_vertex_array *dstarray_ptr[VERT_ATTRIB_MAX];
75    struct _mesa_index_buffer dstib;
76 
77    GLuint *translated_elt_buf;
78    const GLuint *srcelt;
79 
80    /** A baby hash table to avoid re-emitting (some) duplicate
81     * vertices when splitting indexed primitives.
82     */
83    struct {
84       GLuint in;
85       GLuint out;
86    } vert_cache[ELT_TABLE_SIZE];
87 
88    GLuint vertex_size;
89    GLubyte *dstbuf;
90    GLubyte *dstptr;     /**< dstptr == dstbuf + dstelt_max * vertsize */
91    GLuint dstbuf_size;  /**< in vertices */
92    GLuint dstbuf_nr;    /**< count of emitted vertices, also the largest value
93                          * in dstelt.  Our MaxIndex.
94                          */
95 
96    GLuint *dstelt;
97    GLuint dstelt_nr;
98    GLuint dstelt_size;
99 
100 #define MAX_PRIM 32
101    struct _mesa_prim dstprim[MAX_PRIM];
102    GLuint dstprim_nr;
103 
104 };
105 
106 
attr_size(const struct gl_vertex_array * array)107 static GLuint attr_size( const struct gl_vertex_array *array )
108 {
109    return array->Size * _mesa_sizeof_type(array->Type);
110 }
111 
112 
113 /**
114  * Starts returning true slightly before the buffer fills, to ensure
115  * that there is sufficient room for any remaining vertices to finish
116  * off the prim:
117  */
118 static GLboolean
check_flush(struct copy_context * copy)119 check_flush( struct copy_context *copy )
120 {
121    GLenum mode = copy->dstprim[copy->dstprim_nr].mode;
122 
123    if (GL_TRIANGLE_STRIP == mode &&
124        copy->dstelt_nr & 1) { /* see bug9962 */
125        return GL_FALSE;
126    }
127 
128    if (copy->dstbuf_nr + 4 > copy->dstbuf_size)
129       return GL_TRUE;
130 
131    if (copy->dstelt_nr + 4 > copy->dstelt_size)
132       return GL_TRUE;
133 
134    return GL_FALSE;
135 }
136 
137 
138 /**
139  * Dump the parameters/info for a vbo->draw() call.
140  */
141 static void
dump_draw_info(struct gl_context * ctx,const struct gl_vertex_array ** arrays,const struct _mesa_prim * prims,GLuint nr_prims,const struct _mesa_index_buffer * ib,GLuint min_index,GLuint max_index)142 dump_draw_info(struct gl_context *ctx,
143                const struct gl_vertex_array **arrays,
144                const struct _mesa_prim *prims,
145                GLuint nr_prims,
146                const struct _mesa_index_buffer *ib,
147                GLuint min_index,
148                GLuint max_index)
149 {
150    GLuint i, j;
151 
152    printf("VBO Draw:\n");
153    for (i = 0; i < nr_prims; i++) {
154       printf("Prim %u of %u\n", i, nr_prims);
155       printf("  Prim mode 0x%x\n", prims[i].mode);
156       printf("  IB: %p\n", (void*) ib);
157       for (j = 0; j < VERT_ATTRIB_MAX; j++) {
158          printf("    array %d at %p:\n", j, (void*) arrays[j]);
159          printf("      ptr %p, size %d, type 0x%x, stride %d\n",
160 		arrays[j]->Ptr,
161 		arrays[j]->Size, arrays[j]->Type, arrays[j]->StrideB);
162          if (0) {
163             GLint k = prims[i].start + prims[i].count - 1;
164             GLfloat *last = (GLfloat *) (arrays[j]->Ptr + arrays[j]->StrideB * k);
165             printf("        last: %f %f %f\n",
166 		   last[0], last[1], last[2]);
167          }
168       }
169    }
170 }
171 
172 
173 static void
flush(struct copy_context * copy)174 flush( struct copy_context *copy )
175 {
176    struct gl_context *ctx = copy->ctx;
177    const struct gl_vertex_array **saved_arrays = ctx->Array._DrawArrays;
178    GLuint i;
179 
180    /* Set some counters:
181     */
182    copy->dstib.count = copy->dstelt_nr;
183 
184 #if 0
185    dump_draw_info(copy->ctx,
186                   copy->dstarray_ptr,
187                   copy->dstprim,
188                   copy->dstprim_nr,
189                   &copy->dstib,
190                   0,
191                   copy->dstbuf_nr);
192 #else
193    (void) dump_draw_info;
194 #endif
195 
196    ctx->Array._DrawArrays = copy->dstarray_ptr;
197    ctx->NewDriverState |= ctx->DriverFlags.NewArray;
198 
199    copy->draw( ctx,
200 	       copy->dstprim,
201 	       copy->dstprim_nr,
202 	       &copy->dstib,
203 	       GL_TRUE,
204 	       0,
205 	       copy->dstbuf_nr - 1,
206 	       NULL, 0, NULL );
207 
208    ctx->Array._DrawArrays = saved_arrays;
209    ctx->NewDriverState |= ctx->DriverFlags.NewArray;
210 
211    /* Reset all pointers:
212     */
213    copy->dstprim_nr = 0;
214    copy->dstelt_nr = 0;
215    copy->dstbuf_nr = 0;
216    copy->dstptr = copy->dstbuf;
217 
218    /* Clear the vertex cache:
219     */
220    for (i = 0; i < ELT_TABLE_SIZE; i++)
221       copy->vert_cache[i].in = ~0;
222 }
223 
224 
225 /**
226  * Called at begin of each primitive during replay.
227  */
228 static void
begin(struct copy_context * copy,GLenum mode,GLboolean begin_flag)229 begin( struct copy_context *copy, GLenum mode, GLboolean begin_flag )
230 {
231    struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
232 
233    prim->mode = mode;
234    prim->begin = begin_flag;
235    prim->num_instances = 1;
236 }
237 
238 
239 /**
240  * Use a hashtable to attempt to identify recently-emitted vertices
241  * and avoid re-emitting them.
242  */
243 static GLuint
elt(struct copy_context * copy,GLuint elt_idx)244 elt(struct copy_context *copy, GLuint elt_idx)
245 {
246    GLuint elt = copy->srcelt[elt_idx] + copy->prim->basevertex;
247    GLuint slot = elt & (ELT_TABLE_SIZE-1);
248 
249 /*    printf("elt %d\n", elt); */
250 
251    /* Look up the incoming element in the vertex cache.  Re-emit if
252     * necessary.
253     */
254    if (copy->vert_cache[slot].in != elt) {
255       GLubyte *csr = copy->dstptr;
256       GLuint i;
257 
258 /*       printf("  --> emit to dstelt %d\n", copy->dstbuf_nr); */
259 
260       for (i = 0; i < copy->nr_varying; i++) {
261 	 const struct gl_vertex_array *srcarray = copy->varying[i].array;
262 	 const GLubyte *srcptr = copy->varying[i].src_ptr + elt * srcarray->StrideB;
263 
264 	 memcpy(csr, srcptr, copy->varying[i].size);
265 	 csr += copy->varying[i].size;
266 
267 #ifdef NAN_CHECK
268          if (srcarray->Type == GL_FLOAT) {
269             GLuint k;
270             GLfloat *f = (GLfloat *) srcptr;
271             for (k = 0; k < srcarray->Size; k++) {
272                assert(!IS_INF_OR_NAN(f[k]));
273                assert(f[k] <= 1.0e20 && f[k] >= -1.0e20);
274             }
275          }
276 #endif
277 
278 	 if (0)
279 	 {
280 	    const GLuint *f = (const GLuint *)srcptr;
281 	    GLuint j;
282 	    printf("  varying %d: ", i);
283 	    for(j = 0; j < copy->varying[i].size / 4; j++)
284 	       printf("%x ", f[j]);
285 	    printf("\n");
286 	 }
287       }
288 
289       copy->vert_cache[slot].in = elt;
290       copy->vert_cache[slot].out = copy->dstbuf_nr++;
291       copy->dstptr += copy->vertex_size;
292 
293       assert(csr == copy->dstptr);
294       assert(copy->dstptr == (copy->dstbuf +
295                               copy->dstbuf_nr * copy->vertex_size));
296    }
297 /*    else */
298 /*       printf("  --> reuse vertex\n"); */
299 
300 /*    printf("  --> emit %d\n", copy->vert_cache[slot].out); */
301    copy->dstelt[copy->dstelt_nr++] = copy->vert_cache[slot].out;
302    return check_flush(copy);
303 }
304 
305 
306 /**
307  * Called at end of each primitive during replay.
308  */
309 static void
end(struct copy_context * copy,GLboolean end_flag)310 end( struct copy_context *copy, GLboolean end_flag )
311 {
312    struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
313 
314 /*    printf("end (%d)\n", end_flag); */
315 
316    prim->end = end_flag;
317    prim->count = copy->dstelt_nr - prim->start;
318 
319    if (++copy->dstprim_nr == MAX_PRIM ||
320        check_flush(copy))
321       flush(copy);
322 }
323 
324 
325 static void
replay_elts(struct copy_context * copy)326 replay_elts( struct copy_context *copy )
327 {
328    GLuint i, j, k;
329    GLboolean split;
330 
331    for (i = 0; i < copy->nr_prims; i++) {
332       const struct _mesa_prim *prim = &copy->prim[i];
333       const GLuint start = prim->start;
334       GLuint first, incr;
335 
336       switch (prim->mode) {
337 
338       case GL_LINE_LOOP:
339 	 /* Convert to linestrip and emit the final vertex explicitly,
340 	  * but only in the resultant strip that requires it.
341 	  */
342 	 j = 0;
343 	 while (j != prim->count) {
344 	    begin(copy, GL_LINE_STRIP, prim->begin && j == 0);
345 
346 	    for (split = GL_FALSE; j != prim->count && !split; j++)
347 	       split = elt(copy, start + j);
348 
349 	    if (j == prim->count) {
350 	       /* Done, emit final line.  Split doesn't matter as
351 		* it is always raised a bit early so we can emit
352 		* the last verts if necessary!
353 		*/
354 	       if (prim->end)
355 		  (void)elt(copy, start + 0);
356 
357 	       end(copy, prim->end);
358 	    }
359 	    else {
360 	       /* Wrap
361 		*/
362 	       assert(split);
363 	       end(copy, 0);
364 	       j--;
365 	    }
366 	 }
367 	 break;
368 
369       case GL_TRIANGLE_FAN:
370       case GL_POLYGON:
371 	 j = 2;
372 	 while (j != prim->count) {
373 	    begin(copy, prim->mode, prim->begin && j == 0);
374 
375 	    split = elt(copy, start+0);
376 	    assert(!split);
377 
378 	    split = elt(copy, start+j-1);
379 	    assert(!split);
380 
381 	    for (; j != prim->count && !split; j++)
382 	       split = elt(copy, start+j);
383 
384 	    end(copy, prim->end && j == prim->count);
385 
386 	    if (j != prim->count) {
387 	       /* Wrapped the primitive, need to repeat some vertices:
388 		*/
389 	       j -= 1;
390 	    }
391 	 }
392 	 break;
393 
394       default:
395 	 (void)split_prim_inplace(prim->mode, &first, &incr);
396 
397 	 j = 0;
398 	 while (j != prim->count) {
399 
400 	    begin(copy, prim->mode, prim->begin && j == 0);
401 
402 	    split = 0;
403 	    for (k = 0; k < first; k++, j++)
404 	       split |= elt(copy, start+j);
405 
406 	    assert(!split);
407 
408 	    for (; j != prim->count && !split; )
409 	       for (k = 0; k < incr; k++, j++)
410 		  split |= elt(copy, start+j);
411 
412 	    end(copy, prim->end && j == prim->count);
413 
414 	    if (j != prim->count) {
415 	       /* Wrapped the primitive, need to repeat some vertices:
416 		*/
417 	       assert(j > first - incr);
418 	       j -= (first - incr);
419 	    }
420 	 }
421 	 break;
422       }
423    }
424 
425    if (copy->dstprim_nr)
426       flush(copy);
427 }
428 
429 
430 static void
replay_init(struct copy_context * copy)431 replay_init( struct copy_context *copy )
432 {
433    struct gl_context *ctx = copy->ctx;
434    GLuint i;
435    GLuint offset;
436    const GLvoid *srcptr;
437 
438    /* Make a list of varying attributes and their vbo's.  Also
439     * calculate vertex size.
440     */
441    copy->vertex_size = 0;
442    for (i = 0; i < VERT_ATTRIB_MAX; i++) {
443       struct gl_buffer_object *vbo = copy->array[i]->BufferObj;
444 
445       if (copy->array[i]->StrideB == 0) {
446 	 copy->dstarray_ptr[i] = copy->array[i];
447       }
448       else {
449 	 GLuint j = copy->nr_varying++;
450 
451 	 copy->varying[j].attr = i;
452 	 copy->varying[j].array = copy->array[i];
453 	 copy->varying[j].size = attr_size(copy->array[i]);
454 	 copy->vertex_size += attr_size(copy->array[i]);
455 
456 	 if (_mesa_is_bufferobj(vbo) &&
457              !_mesa_bufferobj_mapped(vbo, MAP_INTERNAL))
458 	    ctx->Driver.MapBufferRange(ctx, 0, vbo->Size, GL_MAP_READ_BIT, vbo,
459                                        MAP_INTERNAL);
460 
461 	 copy->varying[j].src_ptr =
462                ADD_POINTERS(vbo->Mappings[MAP_INTERNAL].Pointer,
463                             copy->array[i]->Ptr);
464 
465 	 copy->dstarray_ptr[i] = &copy->varying[j].dstarray;
466       }
467    }
468 
469    /* There must always be an index buffer.  Currently require the
470     * caller convert non-indexed prims to indexed.  Could alternately
471     * do it internally.
472     */
473    if (_mesa_is_bufferobj(copy->ib->obj) &&
474        !_mesa_bufferobj_mapped(copy->ib->obj, MAP_INTERNAL))
475       ctx->Driver.MapBufferRange(ctx, 0, copy->ib->obj->Size, GL_MAP_READ_BIT,
476 				 copy->ib->obj, MAP_INTERNAL);
477 
478    srcptr = (const GLubyte *)
479             ADD_POINTERS(copy->ib->obj->Mappings[MAP_INTERNAL].Pointer,
480                          copy->ib->ptr);
481 
482    switch (copy->ib->type) {
483    case GL_UNSIGNED_BYTE:
484       copy->translated_elt_buf = malloc(sizeof(GLuint) * copy->ib->count);
485       copy->srcelt = copy->translated_elt_buf;
486 
487       for (i = 0; i < copy->ib->count; i++)
488 	 copy->translated_elt_buf[i] = ((const GLubyte *)srcptr)[i];
489       break;
490 
491    case GL_UNSIGNED_SHORT:
492       copy->translated_elt_buf = malloc(sizeof(GLuint) * copy->ib->count);
493       copy->srcelt = copy->translated_elt_buf;
494 
495       for (i = 0; i < copy->ib->count; i++)
496 	 copy->translated_elt_buf[i] = ((const GLushort *)srcptr)[i];
497       break;
498 
499    case GL_UNSIGNED_INT:
500       copy->translated_elt_buf = NULL;
501       copy->srcelt = (const GLuint *)srcptr;
502       break;
503    }
504 
505    /* Figure out the maximum allowed vertex buffer size:
506     */
507    if (copy->vertex_size * copy->limits->max_verts <= copy->limits->max_vb_size) {
508       copy->dstbuf_size = copy->limits->max_verts;
509    }
510    else {
511       copy->dstbuf_size = copy->limits->max_vb_size / copy->vertex_size;
512    }
513 
514    /* Allocate an output vertex buffer:
515     *
516     * XXX:  This should be a VBO!
517     */
518    copy->dstbuf = malloc(copy->dstbuf_size * copy->vertex_size);
519    copy->dstptr = copy->dstbuf;
520 
521    /* Setup new vertex arrays to point into the output buffer:
522     */
523    for (offset = 0, i = 0; i < copy->nr_varying; i++) {
524       const struct gl_vertex_array *src = copy->varying[i].array;
525       struct gl_vertex_array *dst = &copy->varying[i].dstarray;
526 
527       dst->Size = src->Size;
528       dst->Type = src->Type;
529       dst->Format = GL_RGBA;
530       dst->StrideB = copy->vertex_size;
531       dst->Ptr = copy->dstbuf + offset;
532       dst->Normalized = src->Normalized;
533       dst->Integer = src->Integer;
534       dst->Doubles = src->Doubles;
535       dst->BufferObj = ctx->Shared->NullBufferObj;
536       dst->_ElementSize = src->_ElementSize;
537 
538       offset += copy->varying[i].size;
539    }
540 
541    /* Allocate an output element list:
542     */
543    copy->dstelt_size = MIN2(65536,
544 			    copy->ib->count * 2 + 3);
545    copy->dstelt_size = MIN2(copy->dstelt_size,
546 			    copy->limits->max_indices);
547    copy->dstelt = malloc(sizeof(GLuint) * copy->dstelt_size);
548    copy->dstelt_nr = 0;
549 
550    /* Setup the new index buffer to point to the allocated element
551     * list:
552     */
553    copy->dstib.count = 0;	/* duplicates dstelt_nr */
554    copy->dstib.type = GL_UNSIGNED_INT;
555    copy->dstib.obj = ctx->Shared->NullBufferObj;
556    copy->dstib.ptr = copy->dstelt;
557 }
558 
559 
560 /**
561  * Free up everything allocated during split/replay.
562  */
563 static void
replay_finish(struct copy_context * copy)564 replay_finish( struct copy_context *copy )
565 {
566    struct gl_context *ctx = copy->ctx;
567    GLuint i;
568 
569    /* Free our vertex and index buffers:
570     */
571    free(copy->translated_elt_buf);
572    free(copy->dstbuf);
573    free(copy->dstelt);
574 
575    /* Unmap VBO's
576     */
577    for (i = 0; i < copy->nr_varying; i++) {
578       struct gl_buffer_object *vbo = copy->varying[i].array->BufferObj;
579       if (_mesa_is_bufferobj(vbo) && _mesa_bufferobj_mapped(vbo, MAP_INTERNAL))
580 	 ctx->Driver.UnmapBuffer(ctx, vbo, MAP_INTERNAL);
581    }
582 
583    /* Unmap index buffer:
584     */
585    if (_mesa_is_bufferobj(copy->ib->obj) &&
586        _mesa_bufferobj_mapped(copy->ib->obj, MAP_INTERNAL)) {
587       ctx->Driver.UnmapBuffer(ctx, copy->ib->obj, MAP_INTERNAL);
588    }
589 }
590 
591 
592 /**
593  * Split VBO into smaller pieces, draw the pieces.
594  */
vbo_split_copy(struct gl_context * ctx,const struct gl_vertex_array * arrays[],const struct _mesa_prim * prim,GLuint nr_prims,const struct _mesa_index_buffer * ib,vbo_draw_func draw,const struct split_limits * limits)595 void vbo_split_copy( struct gl_context *ctx,
596 		     const struct gl_vertex_array *arrays[],
597 		     const struct _mesa_prim *prim,
598 		     GLuint nr_prims,
599 		     const struct _mesa_index_buffer *ib,
600 		     vbo_draw_func draw,
601 		     const struct split_limits *limits )
602 {
603    struct copy_context copy;
604    GLuint i, this_nr_prims;
605 
606    for (i = 0; i < nr_prims;) {
607       /* Our SW TNL pipeline doesn't handle basevertex yet, so bind_indices
608        * will rebase the elements to the basevertex, and we'll only
609        * emit strings of prims with the same basevertex in one draw call.
610        */
611       for (this_nr_prims = 1; i + this_nr_prims < nr_prims;
612 	   this_nr_prims++) {
613 	 if (prim[i].basevertex != prim[i + this_nr_prims].basevertex)
614 	    break;
615       }
616 
617       memset(&copy, 0, sizeof(copy));
618 
619       /* Require indexed primitives:
620        */
621       assert(ib);
622 
623       copy.ctx = ctx;
624       copy.array = arrays;
625       copy.prim = &prim[i];
626       copy.nr_prims = this_nr_prims;
627       copy.ib = ib;
628       copy.draw = draw;
629       copy.limits = limits;
630 
631       /* Clear the vertex cache:
632        */
633       for (i = 0; i < ELT_TABLE_SIZE; i++)
634 	 copy.vert_cache[i].in = ~0;
635 
636       replay_init(&copy);
637       replay_elts(&copy);
638       replay_finish(&copy);
639    }
640 }
641