1 /*
2  * Copyright 2003 VMware, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sublicense, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the
14  * next paragraph) shall be included in all copies or substantial portions
15  * of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
21  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24  */
25 
26 #include "main/arrayobj.h"
27 #include "main/bufferobj.h"
28 #include "main/context.h"
29 #include "main/enums.h"
30 #include "main/macros.h"
31 #include "main/glformats.h"
32 #include "nir.h"
33 
34 #include "brw_draw.h"
35 #include "brw_defines.h"
36 #include "brw_context.h"
37 #include "brw_state.h"
38 
39 #include "intel_batchbuffer.h"
40 #include "intel_buffer_objects.h"
41 
42 static const GLuint double_types_float[5] = {
43    0,
44    ISL_FORMAT_R64_FLOAT,
45    ISL_FORMAT_R64G64_FLOAT,
46    ISL_FORMAT_R64G64B64_FLOAT,
47    ISL_FORMAT_R64G64B64A64_FLOAT
48 };
49 
50 static const GLuint double_types_passthru[5] = {
51    0,
52    ISL_FORMAT_R64_PASSTHRU,
53    ISL_FORMAT_R64G64_PASSTHRU,
54    ISL_FORMAT_R64G64B64_PASSTHRU,
55    ISL_FORMAT_R64G64B64A64_PASSTHRU
56 };
57 
58 static const GLuint float_types[5] = {
59    0,
60    ISL_FORMAT_R32_FLOAT,
61    ISL_FORMAT_R32G32_FLOAT,
62    ISL_FORMAT_R32G32B32_FLOAT,
63    ISL_FORMAT_R32G32B32A32_FLOAT
64 };
65 
66 static const GLuint half_float_types[5] = {
67    0,
68    ISL_FORMAT_R16_FLOAT,
69    ISL_FORMAT_R16G16_FLOAT,
70    ISL_FORMAT_R16G16B16_FLOAT,
71    ISL_FORMAT_R16G16B16A16_FLOAT
72 };
73 
74 static const GLuint fixed_point_types[5] = {
75    0,
76    ISL_FORMAT_R32_SFIXED,
77    ISL_FORMAT_R32G32_SFIXED,
78    ISL_FORMAT_R32G32B32_SFIXED,
79    ISL_FORMAT_R32G32B32A32_SFIXED,
80 };
81 
82 static const GLuint uint_types_direct[5] = {
83    0,
84    ISL_FORMAT_R32_UINT,
85    ISL_FORMAT_R32G32_UINT,
86    ISL_FORMAT_R32G32B32_UINT,
87    ISL_FORMAT_R32G32B32A32_UINT
88 };
89 
90 static const GLuint uint_types_norm[5] = {
91    0,
92    ISL_FORMAT_R32_UNORM,
93    ISL_FORMAT_R32G32_UNORM,
94    ISL_FORMAT_R32G32B32_UNORM,
95    ISL_FORMAT_R32G32B32A32_UNORM
96 };
97 
98 static const GLuint uint_types_scale[5] = {
99    0,
100    ISL_FORMAT_R32_USCALED,
101    ISL_FORMAT_R32G32_USCALED,
102    ISL_FORMAT_R32G32B32_USCALED,
103    ISL_FORMAT_R32G32B32A32_USCALED
104 };
105 
106 static const GLuint int_types_direct[5] = {
107    0,
108    ISL_FORMAT_R32_SINT,
109    ISL_FORMAT_R32G32_SINT,
110    ISL_FORMAT_R32G32B32_SINT,
111    ISL_FORMAT_R32G32B32A32_SINT
112 };
113 
114 static const GLuint int_types_norm[5] = {
115    0,
116    ISL_FORMAT_R32_SNORM,
117    ISL_FORMAT_R32G32_SNORM,
118    ISL_FORMAT_R32G32B32_SNORM,
119    ISL_FORMAT_R32G32B32A32_SNORM
120 };
121 
122 static const GLuint int_types_scale[5] = {
123    0,
124    ISL_FORMAT_R32_SSCALED,
125    ISL_FORMAT_R32G32_SSCALED,
126    ISL_FORMAT_R32G32B32_SSCALED,
127    ISL_FORMAT_R32G32B32A32_SSCALED
128 };
129 
130 static const GLuint ushort_types_direct[5] = {
131    0,
132    ISL_FORMAT_R16_UINT,
133    ISL_FORMAT_R16G16_UINT,
134    ISL_FORMAT_R16G16B16_UINT,
135    ISL_FORMAT_R16G16B16A16_UINT
136 };
137 
138 static const GLuint ushort_types_norm[5] = {
139    0,
140    ISL_FORMAT_R16_UNORM,
141    ISL_FORMAT_R16G16_UNORM,
142    ISL_FORMAT_R16G16B16_UNORM,
143    ISL_FORMAT_R16G16B16A16_UNORM
144 };
145 
146 static const GLuint ushort_types_scale[5] = {
147    0,
148    ISL_FORMAT_R16_USCALED,
149    ISL_FORMAT_R16G16_USCALED,
150    ISL_FORMAT_R16G16B16_USCALED,
151    ISL_FORMAT_R16G16B16A16_USCALED
152 };
153 
154 static const GLuint short_types_direct[5] = {
155    0,
156    ISL_FORMAT_R16_SINT,
157    ISL_FORMAT_R16G16_SINT,
158    ISL_FORMAT_R16G16B16_SINT,
159    ISL_FORMAT_R16G16B16A16_SINT
160 };
161 
162 static const GLuint short_types_norm[5] = {
163    0,
164    ISL_FORMAT_R16_SNORM,
165    ISL_FORMAT_R16G16_SNORM,
166    ISL_FORMAT_R16G16B16_SNORM,
167    ISL_FORMAT_R16G16B16A16_SNORM
168 };
169 
170 static const GLuint short_types_scale[5] = {
171    0,
172    ISL_FORMAT_R16_SSCALED,
173    ISL_FORMAT_R16G16_SSCALED,
174    ISL_FORMAT_R16G16B16_SSCALED,
175    ISL_FORMAT_R16G16B16A16_SSCALED
176 };
177 
178 static const GLuint ubyte_types_direct[5] = {
179    0,
180    ISL_FORMAT_R8_UINT,
181    ISL_FORMAT_R8G8_UINT,
182    ISL_FORMAT_R8G8B8_UINT,
183    ISL_FORMAT_R8G8B8A8_UINT
184 };
185 
186 static const GLuint ubyte_types_norm[5] = {
187    0,
188    ISL_FORMAT_R8_UNORM,
189    ISL_FORMAT_R8G8_UNORM,
190    ISL_FORMAT_R8G8B8_UNORM,
191    ISL_FORMAT_R8G8B8A8_UNORM
192 };
193 
194 static const GLuint ubyte_types_scale[5] = {
195    0,
196    ISL_FORMAT_R8_USCALED,
197    ISL_FORMAT_R8G8_USCALED,
198    ISL_FORMAT_R8G8B8_USCALED,
199    ISL_FORMAT_R8G8B8A8_USCALED
200 };
201 
202 static const GLuint byte_types_direct[5] = {
203    0,
204    ISL_FORMAT_R8_SINT,
205    ISL_FORMAT_R8G8_SINT,
206    ISL_FORMAT_R8G8B8_SINT,
207    ISL_FORMAT_R8G8B8A8_SINT
208 };
209 
210 static const GLuint byte_types_norm[5] = {
211    0,
212    ISL_FORMAT_R8_SNORM,
213    ISL_FORMAT_R8G8_SNORM,
214    ISL_FORMAT_R8G8B8_SNORM,
215    ISL_FORMAT_R8G8B8A8_SNORM
216 };
217 
218 static const GLuint byte_types_scale[5] = {
219    0,
220    ISL_FORMAT_R8_SSCALED,
221    ISL_FORMAT_R8G8_SSCALED,
222    ISL_FORMAT_R8G8B8_SSCALED,
223    ISL_FORMAT_R8G8B8A8_SSCALED
224 };
225 
226 static GLuint
double_types(int size,GLboolean doubles)227 double_types(int size, GLboolean doubles)
228 {
229    /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE):
230     * "When SourceElementFormat is set to one of the *64*_PASSTHRU formats,
231     * 64-bit components are stored in the URB without any conversion."
232     * Also included on BDW PRM, Volume 7, page 470, table "Source Element
233     * Formats Supported in VF Unit"
234     *
235     * Previous PRMs don't include those references, so for gen7 we can't use
236     * PASSTHRU formats directly. But in any case, we prefer to return passthru
237     * even in that case, because that reflects what we want to achieve, even
238     * if we would need to workaround on gen < 8.
239     */
240    return (doubles
241            ? double_types_passthru[size]
242            : double_types_float[size]);
243 }
244 
245 /**
246  * Given vertex array type/size/format/normalized info, return
247  * the appopriate hardware surface type.
248  * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays.
249  */
250 unsigned
brw_get_vertex_surface_type(struct brw_context * brw,const struct gl_vertex_format * glformat)251 brw_get_vertex_surface_type(struct brw_context *brw,
252                             const struct gl_vertex_format *glformat)
253 {
254    int size = glformat->Size;
255    const struct gen_device_info *devinfo = &brw->screen->devinfo;
256    const bool is_ivybridge_or_older =
257       devinfo->gen <= 7 && !devinfo->is_baytrail && !devinfo->is_haswell;
258 
259    if (INTEL_DEBUG & DEBUG_VERTS)
260       fprintf(stderr, "type %s size %d normalized %d\n",
261               _mesa_enum_to_string(glformat->Type),
262               glformat->Size, glformat->Normalized);
263 
264    if (glformat->Integer) {
265       assert(glformat->Format == GL_RGBA); /* sanity check */
266       switch (glformat->Type) {
267       case GL_INT: return int_types_direct[size];
268       case GL_SHORT:
269          if (is_ivybridge_or_older && size == 3)
270             return short_types_direct[4];
271          else
272             return short_types_direct[size];
273       case GL_BYTE:
274          if (is_ivybridge_or_older && size == 3)
275             return byte_types_direct[4];
276          else
277             return byte_types_direct[size];
278       case GL_UNSIGNED_INT: return uint_types_direct[size];
279       case GL_UNSIGNED_SHORT:
280          if (is_ivybridge_or_older && size == 3)
281             return ushort_types_direct[4];
282          else
283             return ushort_types_direct[size];
284       case GL_UNSIGNED_BYTE:
285          if (is_ivybridge_or_older && size == 3)
286             return ubyte_types_direct[4];
287          else
288             return ubyte_types_direct[size];
289       default: unreachable("not reached");
290       }
291    } else if (glformat->Type == GL_UNSIGNED_INT_10F_11F_11F_REV) {
292       return ISL_FORMAT_R11G11B10_FLOAT;
293    } else if (glformat->Normalized) {
294       switch (glformat->Type) {
295       case GL_DOUBLE: return double_types(size, glformat->Doubles);
296       case GL_FLOAT: return float_types[size];
297       case GL_HALF_FLOAT:
298       case GL_HALF_FLOAT_OES:
299          if (devinfo->gen < 6 && size == 3)
300             return half_float_types[4];
301          else
302             return half_float_types[size];
303       case GL_INT: return int_types_norm[size];
304       case GL_SHORT: return short_types_norm[size];
305       case GL_BYTE: return byte_types_norm[size];
306       case GL_UNSIGNED_INT: return uint_types_norm[size];
307       case GL_UNSIGNED_SHORT: return ushort_types_norm[size];
308       case GL_UNSIGNED_BYTE:
309          if (glformat->Format == GL_BGRA) {
310             /* See GL_EXT_vertex_array_bgra */
311             assert(size == 4);
312             return ISL_FORMAT_B8G8R8A8_UNORM;
313          }
314          else {
315             return ubyte_types_norm[size];
316          }
317       case GL_FIXED:
318          if (devinfo->gen >= 8 || devinfo->is_haswell)
319             return fixed_point_types[size];
320 
321          /* This produces GL_FIXED inputs as values between INT32_MIN and
322           * INT32_MAX, which will be scaled down by 1/65536 by the VS.
323           */
324          return int_types_scale[size];
325       /* See GL_ARB_vertex_type_2_10_10_10_rev.
326        * W/A: Pre-Haswell, the hardware doesn't really support the formats we'd
327        * like to use here, so upload everything as UINT and fix
328        * it in the shader
329        */
330       case GL_INT_2_10_10_10_REV:
331          assert(size == 4);
332          if (devinfo->gen >= 8 || devinfo->is_haswell) {
333             return glformat->Format == GL_BGRA
334                ? ISL_FORMAT_B10G10R10A2_SNORM
335                : ISL_FORMAT_R10G10B10A2_SNORM;
336          }
337          return ISL_FORMAT_R10G10B10A2_UINT;
338       case GL_UNSIGNED_INT_2_10_10_10_REV:
339          assert(size == 4);
340          if (devinfo->gen >= 8 || devinfo->is_haswell) {
341             return glformat->Format == GL_BGRA
342                ? ISL_FORMAT_B10G10R10A2_UNORM
343                : ISL_FORMAT_R10G10B10A2_UNORM;
344          }
345          return ISL_FORMAT_R10G10B10A2_UINT;
346       default: unreachable("not reached");
347       }
348    }
349    else {
350       /* See GL_ARB_vertex_type_2_10_10_10_rev.
351        * W/A: the hardware doesn't really support the formats we'd
352        * like to use here, so upload everything as UINT and fix
353        * it in the shader
354        */
355       if (glformat->Type == GL_INT_2_10_10_10_REV) {
356          assert(size == 4);
357          if (devinfo->gen >= 8 || devinfo->is_haswell) {
358             return glformat->Format == GL_BGRA
359                ? ISL_FORMAT_B10G10R10A2_SSCALED
360                : ISL_FORMAT_R10G10B10A2_SSCALED;
361          }
362          return ISL_FORMAT_R10G10B10A2_UINT;
363       } else if (glformat->Type == GL_UNSIGNED_INT_2_10_10_10_REV) {
364          assert(size == 4);
365          if (devinfo->gen >= 8 || devinfo->is_haswell) {
366             return glformat->Format == GL_BGRA
367                ? ISL_FORMAT_B10G10R10A2_USCALED
368                : ISL_FORMAT_R10G10B10A2_USCALED;
369          }
370          return ISL_FORMAT_R10G10B10A2_UINT;
371       }
372       assert(glformat->Format == GL_RGBA); /* sanity check */
373       switch (glformat->Type) {
374       case GL_DOUBLE: return double_types(size, glformat->Doubles);
375       case GL_FLOAT: return float_types[size];
376       case GL_HALF_FLOAT:
377       case GL_HALF_FLOAT_OES:
378          if (devinfo->gen < 6 && size == 3)
379             return half_float_types[4];
380          else
381             return half_float_types[size];
382       case GL_INT: return int_types_scale[size];
383       case GL_SHORT: return short_types_scale[size];
384       case GL_BYTE: return byte_types_scale[size];
385       case GL_UNSIGNED_INT: return uint_types_scale[size];
386       case GL_UNSIGNED_SHORT: return ushort_types_scale[size];
387       case GL_UNSIGNED_BYTE: return ubyte_types_scale[size];
388       case GL_FIXED:
389          if (devinfo->gen >= 8 || devinfo->is_haswell)
390             return fixed_point_types[size];
391 
392          /* This produces GL_FIXED inputs as values between INT32_MIN and
393           * INT32_MAX, which will be scaled down by 1/65536 by the VS.
394           */
395          return int_types_scale[size];
396       default: unreachable("not reached");
397       }
398    }
399 }
400 
401 static void
copy_array_to_vbo_array(struct brw_context * brw,const uint8_t * const ptr,const int src_stride,int min,int max,struct brw_vertex_buffer * buffer,GLuint dst_stride)402 copy_array_to_vbo_array(struct brw_context *brw,
403                         const uint8_t *const ptr, const int src_stride,
404 			int min, int max,
405 			struct brw_vertex_buffer *buffer,
406 			GLuint dst_stride)
407 {
408    const unsigned char *src = ptr + min * src_stride;
409    int count = max - min + 1;
410    GLuint size = count * dst_stride;
411    uint8_t *dst = brw_upload_space(&brw->upload, size, dst_stride,
412                                    &buffer->bo, &buffer->offset);
413 
414    /* The GL 4.5 spec says:
415     *      "If any enabled array’s buffer binding is zero when DrawArrays or
416     *      one of the other drawing commands defined in section 10.4 is called,
417     *      the result is undefined."
418     *
419     * In this case, let's the dst with undefined values
420     */
421    if (ptr != NULL) {
422       if (dst_stride == src_stride) {
423          memcpy(dst, src, size);
424       } else {
425          while (count--) {
426             memcpy(dst, src, dst_stride);
427             src += src_stride;
428             dst += dst_stride;
429          }
430       }
431    }
432    buffer->stride = dst_stride;
433    buffer->size = size;
434 }
435 
436 void
brw_prepare_vertices(struct brw_context * brw)437 brw_prepare_vertices(struct brw_context *brw)
438 {
439    const struct gen_device_info *devinfo = &brw->screen->devinfo;
440    struct gl_context *ctx = &brw->ctx;
441    /* BRW_NEW_VERTEX_PROGRAM */
442    const struct gl_program *vp = brw->programs[MESA_SHADER_VERTEX];
443    /* BRW_NEW_VS_PROG_DATA */
444    const struct brw_vs_prog_data *vs_prog_data =
445       brw_vs_prog_data(brw->vs.base.prog_data);
446    const uint64_t vs_inputs64 =
447       nir_get_single_slot_attribs_mask(vs_prog_data->inputs_read,
448                                        vp->DualSlotInputs);
449    assert((vs_inputs64 & ~(uint64_t)VERT_BIT_ALL) == 0);
450    unsigned vs_inputs = (unsigned)vs_inputs64;
451    unsigned int min_index = brw->vb.min_index + brw->basevertex;
452    unsigned int max_index = brw->vb.max_index + brw->basevertex;
453    int delta, j;
454 
455    /* _NEW_POLYGON
456     *
457     * On gen6+, edge flags don't end up in the VUE (either in or out of the
458     * VS).  Instead, they're uploaded as the last vertex element, and the data
459     * is passed sideband through the fixed function units.  So, we need to
460     * prepare the vertex buffer for it, but it's not present in inputs_read.
461     */
462    if (devinfo->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL ||
463                            ctx->Polygon.BackMode != GL_FILL)) {
464       vs_inputs |= VERT_BIT_EDGEFLAG;
465    }
466 
467    if (0)
468       fprintf(stderr, "%s %d..%d\n", __func__, min_index, max_index);
469 
470    /* Accumulate the list of enabled arrays. */
471    brw->vb.nr_enabled = 0;
472 
473    unsigned mask = vs_inputs;
474    while (mask) {
475       const gl_vert_attrib attr = u_bit_scan(&mask);
476       struct brw_vertex_element *input = &brw->vb.inputs[attr];
477       brw->vb.enabled[brw->vb.nr_enabled++] = input;
478    }
479    assert(brw->vb.nr_enabled <= VERT_ATTRIB_MAX);
480 
481    if (brw->vb.nr_enabled == 0)
482       return;
483 
484    if (brw->vb.nr_buffers)
485       return;
486 
487    j = 0;
488    const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO;
489 
490    unsigned vbomask = vs_inputs & _mesa_draw_vbo_array_bits(ctx);
491    while (vbomask) {
492       const struct gl_vertex_buffer_binding *const glbinding =
493          _mesa_draw_buffer_binding(vao, ffs(vbomask) - 1);
494       const GLsizei stride = glbinding->Stride;
495 
496       assert(glbinding->BufferObj);
497 
498       /* Accumulate the range of a single vertex, start with inverted range */
499       uint32_t vertex_range_start = ~(uint32_t)0;
500       uint32_t vertex_range_end = 0;
501 
502       const unsigned boundmask = _mesa_draw_bound_attrib_bits(glbinding);
503       unsigned attrmask = vbomask & boundmask;
504       /* Mark the those attributes as processed */
505       vbomask ^= attrmask;
506       /* We can assume that we have an array for the binding */
507       assert(attrmask);
508       /* Walk attributes belonging to the binding */
509       while (attrmask) {
510          const gl_vert_attrib attr = u_bit_scan(&attrmask);
511          const struct gl_array_attributes *const glattrib =
512             _mesa_draw_array_attrib(vao, attr);
513          const uint32_t rel_offset =
514             _mesa_draw_attributes_relative_offset(glattrib);
515          const uint32_t rel_end = rel_offset + glattrib->Format._ElementSize;
516 
517          vertex_range_start = MIN2(vertex_range_start, rel_offset);
518          vertex_range_end = MAX2(vertex_range_end, rel_end);
519 
520          struct brw_vertex_element *input = &brw->vb.inputs[attr];
521          input->glformat = &glattrib->Format;
522          input->buffer = j;
523          input->is_dual_slot = (vp->DualSlotInputs & BITFIELD64_BIT(attr)) != 0;
524          input->offset = rel_offset;
525       }
526       assert(vertex_range_start <= vertex_range_end);
527 
528       struct intel_buffer_object *intel_buffer =
529          intel_buffer_object(glbinding->BufferObj);
530       struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
531 
532       const uint32_t offset = _mesa_draw_binding_offset(glbinding);
533 
534       /* If nothing else is known take the buffer size and offset as a bound */
535       uint32_t start = vertex_range_start;
536       uint32_t range = intel_buffer->Base.Size - offset - vertex_range_start;
537       /* Check if we can get a more narrow range */
538       if (glbinding->InstanceDivisor) {
539          if (brw->num_instances) {
540             const uint32_t vertex_size = vertex_range_end - vertex_range_start;
541             start = vertex_range_start + stride * brw->baseinstance;
542             range = (stride * ((brw->num_instances - 1) /
543                                glbinding->InstanceDivisor) +
544                      vertex_size);
545          }
546       } else {
547          if (brw->vb.index_bounds_valid) {
548             const uint32_t vertex_size = vertex_range_end - vertex_range_start;
549             start = vertex_range_start + stride * min_index;
550             range = (stride * (max_index - min_index) +
551                      vertex_size);
552 
553             /**
554              * Unreal Engine 4 has a bug in usage of glDrawRangeElements,
555              * causing it to be called with a number of vertices in place
556              * of "end" parameter (which specifies the maximum array index
557              * contained in indices).
558              *
559              * Since there is unknown amount of games affected and we
560              * could not identify that a game is built with UE4 - we are
561              * forced to make a blanket workaround, disregarding max_index
562              * in range calculations. Fortunately all such calls look like:
563              *   glDrawRangeElements(GL_TRIANGLES, 0, 3, 3, ...);
564              * So we are able to narrow down this workaround.
565              *
566              * See: https://gitlab.freedesktop.org/mesa/mesa/-/issues/2917
567              */
568             if (unlikely(max_index == 3 && min_index == 0 &&
569                          brw->draw.derived_params.is_indexed_draw)) {
570                   range = intel_buffer->Base.Size - offset - start;
571             }
572          }
573       }
574 
575       buffer->offset = offset;
576       buffer->size = start + range;
577       buffer->stride = stride;
578       buffer->step_rate = glbinding->InstanceDivisor;
579 
580       buffer->bo = intel_bufferobj_buffer(brw, intel_buffer, offset + start,
581                                           range, false);
582       brw_bo_reference(buffer->bo);
583 
584       j++;
585    }
586 
587    /* If we need to upload all the arrays, then we can trim those arrays to
588     * only the used elements [min_index, max_index] so long as we adjust all
589     * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias.
590     */
591    brw->vb.start_vertex_bias = 0;
592    delta = min_index;
593    if ((vs_inputs & _mesa_draw_vbo_array_bits(ctx)) == 0) {
594       brw->vb.start_vertex_bias = -delta;
595       delta = 0;
596    }
597 
598    unsigned usermask = vs_inputs & _mesa_draw_user_array_bits(ctx);
599    while (usermask) {
600       const struct gl_vertex_buffer_binding *const glbinding =
601          _mesa_draw_buffer_binding(vao, ffs(usermask) - 1);
602       const GLsizei stride = glbinding->Stride;
603 
604       assert(!glbinding->BufferObj);
605       assert(brw->vb.index_bounds_valid);
606 
607       /* Accumulate the range of a single vertex, start with inverted range */
608       uint32_t vertex_range_start = ~(uint32_t)0;
609       uint32_t vertex_range_end = 0;
610 
611       const unsigned boundmask = _mesa_draw_bound_attrib_bits(glbinding);
612       unsigned attrmask = usermask & boundmask;
613       /* Mark the those attributes as processed */
614       usermask ^= attrmask;
615       /* We can assume that we have an array for the binding */
616       assert(attrmask);
617       /* Walk attributes belonging to the binding */
618       while (attrmask) {
619          const gl_vert_attrib attr = u_bit_scan(&attrmask);
620          const struct gl_array_attributes *const glattrib =
621             _mesa_draw_array_attrib(vao, attr);
622          const uint32_t rel_offset =
623             _mesa_draw_attributes_relative_offset(glattrib);
624          const uint32_t rel_end = rel_offset + glattrib->Format._ElementSize;
625 
626          vertex_range_start = MIN2(vertex_range_start, rel_offset);
627          vertex_range_end = MAX2(vertex_range_end, rel_end);
628 
629          struct brw_vertex_element *input = &brw->vb.inputs[attr];
630          input->glformat = &glattrib->Format;
631          input->buffer = j;
632          input->is_dual_slot = (vp->DualSlotInputs & BITFIELD64_BIT(attr)) != 0;
633          input->offset = rel_offset;
634       }
635       assert(vertex_range_start <= vertex_range_end);
636 
637       struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
638 
639       const uint8_t *ptr = (const uint8_t*)_mesa_draw_binding_offset(glbinding);
640       ptr += vertex_range_start;
641       const uint32_t vertex_size = vertex_range_end - vertex_range_start;
642       if (glbinding->Stride == 0) {
643          /* If the source stride is zero, we just want to upload the current
644           * attribute once and set the buffer's stride to 0.  There's no need
645           * to replicate it out.
646           */
647          copy_array_to_vbo_array(brw, ptr, 0, 0, 0, buffer, vertex_size);
648       } else if (glbinding->InstanceDivisor == 0) {
649          copy_array_to_vbo_array(brw, ptr, stride, min_index,
650                                  max_index, buffer, vertex_size);
651       } else {
652          /* This is an instanced attribute, since its InstanceDivisor
653           * is not zero. Therefore, its data will be stepped after the
654           * instanced draw has been run InstanceDivisor times.
655           */
656          uint32_t instanced_attr_max_index =
657             (brw->num_instances - 1) / glbinding->InstanceDivisor;
658          copy_array_to_vbo_array(brw, ptr, stride, 0,
659                                  instanced_attr_max_index, buffer, vertex_size);
660       }
661       buffer->offset -= delta * buffer->stride + vertex_range_start;
662       buffer->size += delta * buffer->stride + vertex_range_start;
663       buffer->step_rate = glbinding->InstanceDivisor;
664 
665       j++;
666    }
667 
668    /* Upload the current values */
669    unsigned curmask = vs_inputs & _mesa_draw_current_bits(ctx);
670    if (curmask) {
671       /* For each attribute, upload the maximum possible size. */
672       uint8_t data[VERT_ATTRIB_MAX * sizeof(GLdouble) * 4];
673       uint8_t *cursor = data;
674 
675       do {
676          const gl_vert_attrib attr = u_bit_scan(&curmask);
677          const struct gl_array_attributes *const glattrib =
678             _mesa_draw_current_attrib(ctx, attr);
679          const unsigned size = glattrib->Format._ElementSize;
680          const unsigned alignment = align(size, sizeof(GLdouble));
681          memcpy(cursor, glattrib->Ptr, size);
682          if (alignment != size)
683             memset(cursor + size, 0, alignment - size);
684 
685          struct brw_vertex_element *input = &brw->vb.inputs[attr];
686          input->glformat = &glattrib->Format;
687          input->buffer = j;
688          input->is_dual_slot = (vp->DualSlotInputs & BITFIELD64_BIT(attr)) != 0;
689          input->offset = cursor - data;
690 
691          cursor += alignment;
692       } while (curmask);
693 
694       struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
695       const unsigned size = cursor - data;
696       brw_upload_data(&brw->upload, data, size, size,
697                       &buffer->bo, &buffer->offset);
698       buffer->stride = 0;
699       buffer->size = size;
700       buffer->step_rate = 0;
701 
702       j++;
703    }
704    brw->vb.nr_buffers = j;
705 }
706 
707 void
brw_prepare_shader_draw_parameters(struct brw_context * brw)708 brw_prepare_shader_draw_parameters(struct brw_context *brw)
709 {
710    const struct brw_vs_prog_data *vs_prog_data =
711       brw_vs_prog_data(brw->vs.base.prog_data);
712 
713    /* For non-indirect draws, upload the shader draw parameters */
714    if ((vs_prog_data->uses_firstvertex || vs_prog_data->uses_baseinstance) &&
715        brw->draw.draw_params_bo == NULL) {
716       brw_upload_data(&brw->upload,
717                       &brw->draw.params, sizeof(brw->draw.params), 4,
718                       &brw->draw.draw_params_bo,
719                       &brw->draw.draw_params_offset);
720    }
721 
722    if (vs_prog_data->uses_drawid || vs_prog_data->uses_is_indexed_draw) {
723       brw_upload_data(&brw->upload,
724                       &brw->draw.derived_params, sizeof(brw->draw.derived_params), 4,
725                       &brw->draw.derived_draw_params_bo,
726                       &brw->draw.derived_draw_params_offset);
727    }
728 }
729 
730 static void
brw_upload_indices(struct brw_context * brw)731 brw_upload_indices(struct brw_context *brw)
732 {
733    const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
734    GLuint ib_size;
735    struct brw_bo *old_bo = brw->ib.bo;
736    struct gl_buffer_object *bufferobj;
737    GLuint offset;
738    GLuint ib_type_size;
739 
740    if (index_buffer == NULL)
741       return;
742 
743    ib_type_size = 1 << index_buffer->index_size_shift;
744    ib_size = index_buffer->count ? ib_type_size * index_buffer->count :
745                                    index_buffer->obj->Size;
746    bufferobj = index_buffer->obj;
747 
748    /* Turn into a proper VBO:
749     */
750    if (!bufferobj) {
751       /* Get new bufferobj, offset:
752        */
753       brw_upload_data(&brw->upload, index_buffer->ptr, ib_size, ib_type_size,
754                       &brw->ib.bo, &offset);
755       brw->ib.size = brw->ib.bo->size;
756    } else {
757       offset = (GLuint) (unsigned long) index_buffer->ptr;
758 
759       struct brw_bo *bo =
760          intel_bufferobj_buffer(brw, intel_buffer_object(bufferobj),
761                                 offset, ib_size, false);
762       if (bo != brw->ib.bo) {
763          brw_bo_unreference(brw->ib.bo);
764          brw->ib.bo = bo;
765          brw->ib.size = bufferobj->Size;
766          brw_bo_reference(bo);
767       }
768    }
769 
770    /* Use 3DPRIMITIVE's start_vertex_offset to avoid re-uploading
771     * the index buffer state when we're just moving the start index
772     * of our drawing.
773     */
774    brw->ib.start_vertex_offset = offset / ib_type_size;
775 
776    if (brw->ib.bo != old_bo)
777       brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
778 
779    unsigned index_size = 1 << index_buffer->index_size_shift;
780    if (index_size != brw->ib.index_size) {
781       brw->ib.index_size = index_size;
782       brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
783    }
784 
785    /* We need to re-emit an index buffer state each time
786     * when cut index flag is changed
787     */
788    if (brw->prim_restart.enable_cut_index != brw->ib.enable_cut_index) {
789       brw->ib.enable_cut_index = brw->prim_restart.enable_cut_index;
790       brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
791    }
792 }
793 
794 const struct brw_tracked_state brw_indices = {
795    .dirty = {
796       .mesa = 0,
797       .brw = BRW_NEW_BLORP |
798              BRW_NEW_INDICES,
799    },
800    .emit = brw_upload_indices,
801 };
802