1 /*
2  * Copyright 2010 Christoph Bumiller
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #define NVC0_PUSH_EXPLICIT_SPACE_CHECKING
24 
25 #include "pipe/p_context.h"
26 #include "pipe/p_state.h"
27 #include "util/u_inlines.h"
28 #include "util/u_format.h"
29 #include "translate/translate.h"
30 
31 #include "nvc0/nvc0_context.h"
32 #include "nvc0/nvc0_query_hw.h"
33 #include "nvc0/nvc0_resource.h"
34 
35 #include "nvc0/nvc0_3d.xml.h"
36 
37 void
nvc0_vertex_state_delete(struct pipe_context * pipe,void * hwcso)38 nvc0_vertex_state_delete(struct pipe_context *pipe,
39                          void *hwcso)
40 {
41    struct nvc0_vertex_stateobj *so = hwcso;
42 
43    if (so->translate)
44       so->translate->release(so->translate);
45    FREE(hwcso);
46 }
47 
48 void *
nvc0_vertex_state_create(struct pipe_context * pipe,unsigned num_elements,const struct pipe_vertex_element * elements)49 nvc0_vertex_state_create(struct pipe_context *pipe,
50                          unsigned num_elements,
51                          const struct pipe_vertex_element *elements)
52 {
53     struct nvc0_vertex_stateobj *so;
54     struct translate_key transkey;
55     unsigned i;
56     unsigned src_offset_max = 0;
57 
58     so = MALLOC(sizeof(*so) +
59                 num_elements * sizeof(struct nvc0_vertex_element));
60     if (!so)
61         return NULL;
62     so->num_elements = num_elements;
63     so->instance_elts = 0;
64     so->instance_bufs = 0;
65     so->shared_slots = false;
66     so->need_conversion = false;
67 
68     memset(so->vb_access_size, 0, sizeof(so->vb_access_size));
69 
70     for (i = 0; i < PIPE_MAX_ATTRIBS; ++i)
71        so->min_instance_div[i] = 0xffffffff;
72 
73     transkey.nr_elements = 0;
74     transkey.output_stride = 0;
75 
76     for (i = 0; i < num_elements; ++i) {
77         const struct pipe_vertex_element *ve = &elements[i];
78         const unsigned vbi = ve->vertex_buffer_index;
79         unsigned size;
80         enum pipe_format fmt = ve->src_format;
81 
82         so->element[i].pipe = elements[i];
83         so->element[i].state = nvc0_vertex_format[fmt].vtx;
84 
85         if (!so->element[i].state) {
86             switch (util_format_get_nr_components(fmt)) {
87             case 1: fmt = PIPE_FORMAT_R32_FLOAT; break;
88             case 2: fmt = PIPE_FORMAT_R32G32_FLOAT; break;
89             case 3: fmt = PIPE_FORMAT_R32G32B32_FLOAT; break;
90             case 4: fmt = PIPE_FORMAT_R32G32B32A32_FLOAT; break;
91             default:
92                 assert(0);
93                 FREE(so);
94                 return NULL;
95             }
96             so->element[i].state = nvc0_vertex_format[fmt].vtx;
97             so->need_conversion = true;
98             pipe_debug_message(&nouveau_context(pipe)->debug, FALLBACK,
99                                "Converting vertex element %d, no hw format %s",
100                                i, util_format_name(ve->src_format));
101         }
102         size = util_format_get_blocksize(fmt);
103 
104         src_offset_max = MAX2(src_offset_max, ve->src_offset);
105 
106         if (so->vb_access_size[vbi] < (ve->src_offset + size))
107            so->vb_access_size[vbi] = ve->src_offset + size;
108 
109         if (unlikely(ve->instance_divisor)) {
110            so->instance_elts |= 1 << i;
111            so->instance_bufs |= 1 << vbi;
112            if (ve->instance_divisor < so->min_instance_div[vbi])
113               so->min_instance_div[vbi] = ve->instance_divisor;
114         }
115 
116         if (1) {
117             unsigned ca;
118             unsigned j = transkey.nr_elements++;
119 
120             ca = util_format_description(fmt)->channel[0].size / 8;
121             if (ca != 1 && ca != 2)
122                ca = 4;
123 
124             transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL;
125             transkey.element[j].input_format = ve->src_format;
126             transkey.element[j].input_buffer = vbi;
127             transkey.element[j].input_offset = ve->src_offset;
128             transkey.element[j].instance_divisor = ve->instance_divisor;
129 
130             transkey.output_stride = align(transkey.output_stride, ca);
131             transkey.element[j].output_format = fmt;
132             transkey.element[j].output_offset = transkey.output_stride;
133             transkey.output_stride += size;
134 
135             so->element[i].state_alt = so->element[i].state;
136             so->element[i].state_alt |= transkey.element[j].output_offset << 7;
137         }
138 
139         so->element[i].state |= i << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT;
140     }
141     transkey.output_stride = align(transkey.output_stride, 4);
142 
143     so->size = transkey.output_stride;
144     so->translate = translate_create(&transkey);
145 
146     if (so->instance_elts || src_offset_max >= (1 << 14))
147        return so;
148     so->shared_slots = true;
149 
150     for (i = 0; i < num_elements; ++i) {
151        const unsigned b = elements[i].vertex_buffer_index;
152        const unsigned s = elements[i].src_offset;
153        so->element[i].state &= ~NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__MASK;
154        so->element[i].state |= b << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT;
155        so->element[i].state |= s << NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__SHIFT;
156     }
157     return so;
158 }
159 
160 #define NVC0_3D_VERTEX_ATTRIB_INACTIVE                                       \
161    NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT |                                 \
162    NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST
163 
164 #define VTX_ATTR(a, c, t, s)                            \
165    ((NVC0_3D_VTX_ATTR_DEFINE_TYPE_##t) |                \
166     (NVC0_3D_VTX_ATTR_DEFINE_SIZE_##s) |                \
167     ((a) << NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT) |      \
168     ((c) << NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT))
169 
170 static void
nvc0_set_constant_vertex_attrib(struct nvc0_context * nvc0,const unsigned a)171 nvc0_set_constant_vertex_attrib(struct nvc0_context *nvc0, const unsigned a)
172 {
173    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
174    struct pipe_vertex_element *ve = &nvc0->vertex->element[a].pipe;
175    struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index];
176    uint32_t mode;
177    const struct util_format_description *desc;
178    void *dst;
179    const void *src = (const uint8_t *)vb->buffer.user + ve->src_offset;
180    assert(vb->is_user_buffer);
181 
182    desc = util_format_description(ve->src_format);
183 
184    PUSH_SPACE(push, 6);
185    BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 5);
186    dst = &push->cur[1];
187    if (desc->channel[0].pure_integer) {
188       if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
189          mode = VTX_ATTR(a, 4, SINT, 32);
190          desc->unpack_rgba_sint(dst, 0, src, 0, 1, 1);
191       } else {
192          mode = VTX_ATTR(a, 4, UINT, 32);
193          desc->unpack_rgba_uint(dst, 0, src, 0, 1, 1);
194       }
195    } else {
196       mode = VTX_ATTR(a, 4, FLOAT, 32);
197       desc->unpack_rgba_float(dst, 0, src, 0, 1, 1);
198    }
199    push->cur[0] = mode;
200    push->cur += 5;
201 }
202 
203 static inline void
nvc0_user_vbuf_range(struct nvc0_context * nvc0,int vbi,uint32_t * base,uint32_t * size)204 nvc0_user_vbuf_range(struct nvc0_context *nvc0, int vbi,
205                      uint32_t *base, uint32_t *size)
206 {
207    if (unlikely(nvc0->vertex->instance_bufs & (1 << vbi))) {
208       const uint32_t div = nvc0->vertex->min_instance_div[vbi];
209       *base = nvc0->instance_off * nvc0->vtxbuf[vbi].stride;
210       *size = (nvc0->instance_max / div) * nvc0->vtxbuf[vbi].stride +
211          nvc0->vertex->vb_access_size[vbi];
212    } else {
213       /* NOTE: if there are user buffers, we *must* have index bounds */
214       assert(nvc0->vb_elt_limit != ~0);
215       *base = nvc0->vb_elt_first * nvc0->vtxbuf[vbi].stride;
216       *size = nvc0->vb_elt_limit * nvc0->vtxbuf[vbi].stride +
217          nvc0->vertex->vb_access_size[vbi];
218    }
219 }
220 
221 static inline void
nvc0_release_user_vbufs(struct nvc0_context * nvc0)222 nvc0_release_user_vbufs(struct nvc0_context *nvc0)
223 {
224    if (nvc0->vbo_user) {
225       nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX_TMP);
226       nouveau_scratch_done(&nvc0->base);
227    }
228 }
229 
230 static void
nvc0_update_user_vbufs(struct nvc0_context * nvc0)231 nvc0_update_user_vbufs(struct nvc0_context *nvc0)
232 {
233    uint64_t address[PIPE_MAX_ATTRIBS];
234    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
235    int i;
236    uint32_t written = 0;
237 
238    PUSH_SPACE(push, nvc0->vertex->num_elements * 8);
239    for (i = 0; i < nvc0->vertex->num_elements; ++i) {
240       struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe;
241       const unsigned b = ve->vertex_buffer_index;
242       struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b];
243       uint32_t base, size;
244 
245       if (!(nvc0->vbo_user & (1 << b)))
246          continue;
247       if (nvc0->constant_vbos & (1 << b)) {
248          nvc0_set_constant_vertex_attrib(nvc0, i);
249          continue;
250       }
251       nvc0_user_vbuf_range(nvc0, b, &base, &size);
252 
253       if (!(written & (1 << b))) {
254          struct nouveau_bo *bo;
255          const uint32_t bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART;
256          written |= 1 << b;
257          address[b] = nouveau_scratch_data(&nvc0->base, vb->buffer.user,
258                                            base, size, &bo);
259          if (bo)
260             BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP, bo_flags, bo);
261 
262          NOUVEAU_DRV_STAT(&nvc0->screen->base, user_buffer_upload_bytes, size);
263       }
264 
265       BEGIN_1IC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_SELECT), 5);
266       PUSH_DATA (push, i);
267       PUSH_DATAh(push, address[b] + base + size - 1);
268       PUSH_DATA (push, address[b] + base + size - 1);
269       PUSH_DATAh(push, address[b] + ve->src_offset);
270       PUSH_DATA (push, address[b] + ve->src_offset);
271    }
272    nvc0->base.vbo_dirty = true;
273 }
274 
275 static void
nvc0_update_user_vbufs_shared(struct nvc0_context * nvc0)276 nvc0_update_user_vbufs_shared(struct nvc0_context *nvc0)
277 {
278    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
279    uint32_t mask = nvc0->vbo_user & ~nvc0->constant_vbos;
280 
281    PUSH_SPACE(push, nvc0->num_vtxbufs * 8);
282    while (mask) {
283       struct nouveau_bo *bo;
284       const uint32_t bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART;
285       uint64_t address;
286       uint32_t base, size;
287       const int b = ffs(mask) - 1;
288       mask &= ~(1 << b);
289 
290       nvc0_user_vbuf_range(nvc0, b, &base, &size);
291 
292       address = nouveau_scratch_data(&nvc0->base, nvc0->vtxbuf[b].buffer.user,
293                                      base, size, &bo);
294       if (bo)
295          BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP, bo_flags, bo);
296 
297       BEGIN_1IC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_SELECT), 5);
298       PUSH_DATA (push, b);
299       PUSH_DATAh(push, address + base + size - 1);
300       PUSH_DATA (push, address + base + size - 1);
301       PUSH_DATAh(push, address);
302       PUSH_DATA (push, address);
303 
304       NOUVEAU_DRV_STAT(&nvc0->screen->base, user_buffer_upload_bytes, size);
305    }
306 
307    mask = nvc0->state.constant_elts;
308    while (mask) {
309       int i = ffs(mask) - 1;
310       mask &= ~(1 << i);
311       nvc0_set_constant_vertex_attrib(nvc0, i);
312    }
313 }
314 
315 static void
nvc0_validate_vertex_buffers(struct nvc0_context * nvc0)316 nvc0_validate_vertex_buffers(struct nvc0_context *nvc0)
317 {
318    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
319    const struct nvc0_vertex_stateobj *vertex = nvc0->vertex;
320    uint32_t refd = 0;
321    unsigned i;
322 
323    PUSH_SPACE(push, vertex->num_elements * 8);
324    for (i = 0; i < vertex->num_elements; ++i) {
325       const struct nvc0_vertex_element *ve;
326       const struct pipe_vertex_buffer *vb;
327       struct nv04_resource *res;
328       unsigned b;
329       unsigned limit, offset;
330 
331       if (nvc0->state.constant_elts & (1 << i))
332          continue;
333       ve = &vertex->element[i];
334       b = ve->pipe.vertex_buffer_index;
335       vb = &nvc0->vtxbuf[b];
336 
337       if (nvc0->vbo_user & (1 << b)) {
338          if (!(nvc0->constant_vbos & (1 << b))) {
339             if (ve->pipe.instance_divisor) {
340                BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_DIVISOR(i)), 1);
341                PUSH_DATA (push, ve->pipe.instance_divisor);
342             }
343             BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 1);
344             PUSH_DATA (push, (1 << 12) | vb->stride);
345          }
346          /* address/value set in nvc0_update_user_vbufs */
347          continue;
348       }
349       res = nv04_resource(vb->buffer.resource);
350       offset = ve->pipe.src_offset + vb->buffer_offset;
351       limit = vb->buffer.resource->width0 - 1;
352 
353       if (unlikely(ve->pipe.instance_divisor)) {
354          BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 4);
355          PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
356          PUSH_DATAh(push, res->address + offset);
357          PUSH_DATA (push, res->address + offset);
358          PUSH_DATA (push, ve->pipe.instance_divisor);
359       } else {
360          BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 3);
361          PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
362          PUSH_DATAh(push, res->address + offset);
363          PUSH_DATA (push, res->address + offset);
364       }
365       BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
366       PUSH_DATAh(push, res->address + limit);
367       PUSH_DATA (push, res->address + limit);
368 
369       if (!(refd & (1 << b))) {
370          refd |= 1 << b;
371          BCTX_REFN(nvc0->bufctx_3d, 3D_VTX, res, RD);
372       }
373    }
374    if (nvc0->vbo_user)
375       nvc0_update_user_vbufs(nvc0);
376 }
377 
378 static void
nvc0_validate_vertex_buffers_shared(struct nvc0_context * nvc0)379 nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0)
380 {
381    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
382    unsigned b;
383    const uint32_t mask = nvc0->vbo_user;
384 
385    PUSH_SPACE(push, nvc0->num_vtxbufs * 8 + nvc0->vertex->num_elements);
386    for (b = 0; b < nvc0->num_vtxbufs; ++b) {
387       struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b];
388       struct nv04_resource *buf;
389       uint32_t offset, limit;
390 
391       if (mask & (1 << b)) {
392          if (!(nvc0->constant_vbos & (1 << b))) {
393             BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 1);
394             PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
395          }
396          /* address/value set in nvc0_update_user_vbufs_shared */
397          continue;
398       } else if (!vb->buffer.resource) {
399          /* there can be holes in the vertex buffer lists */
400          IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 0);
401          continue;
402       }
403       buf = nv04_resource(vb->buffer.resource);
404       offset = vb->buffer_offset;
405       limit = buf->base.width0 - 1;
406 
407       BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 3);
408       PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
409       PUSH_DATAh(push, buf->address + offset);
410       PUSH_DATA (push, buf->address + offset);
411       BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(b)), 2);
412       PUSH_DATAh(push, buf->address + limit);
413       PUSH_DATA (push, buf->address + limit);
414 
415       BCTX_REFN(nvc0->bufctx_3d, 3D_VTX, buf, RD);
416    }
417    /* If there are more elements than buffers, we might not have unset
418     * fetching on the later elements.
419     */
420    for (; b < nvc0->vertex->num_elements; ++b)
421       IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 0);
422 
423    if (nvc0->vbo_user)
424       nvc0_update_user_vbufs_shared(nvc0);
425 }
426 
427 void
nvc0_vertex_arrays_validate(struct nvc0_context * nvc0)428 nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)
429 {
430    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
431    struct nvc0_vertex_stateobj *vertex = nvc0->vertex;
432    struct nvc0_vertex_element *ve;
433    uint32_t const_vbos;
434    unsigned i;
435    uint8_t vbo_mode;
436    bool update_vertex;
437 
438    nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX);
439 
440    assert(vertex);
441    if (unlikely(vertex->need_conversion) ||
442        unlikely(nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS)) {
443       vbo_mode = 3;
444    } else if (nvc0->vbo_user & ~nvc0->constant_vbos) {
445       vbo_mode = nvc0->vbo_push_hint ? 1 : 0;
446    } else {
447       vbo_mode = 0;
448    }
449    const_vbos = vbo_mode ? 0 : nvc0->constant_vbos;
450 
451    update_vertex = (nvc0->dirty_3d & NVC0_NEW_3D_VERTEX) ||
452       (const_vbos != nvc0->state.constant_vbos) ||
453       (vbo_mode != nvc0->state.vbo_mode);
454 
455    if (update_vertex) {
456       const unsigned n = MAX2(vertex->num_elements, nvc0->state.num_vtxelts);
457 
458       nvc0->state.constant_vbos = const_vbos;
459       nvc0->state.constant_elts = 0;
460       nvc0->state.num_vtxelts = vertex->num_elements;
461       nvc0->state.vbo_mode = vbo_mode;
462 
463       if (unlikely(vbo_mode)) {
464          if (unlikely(nvc0->state.instance_elts & 3)) {
465             /* translate mode uses only 2 vertex buffers */
466             nvc0->state.instance_elts &= ~3;
467             PUSH_SPACE(push, 3);
468             BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_PER_INSTANCE(0)), 2);
469             PUSH_DATA (push, 0);
470             PUSH_DATA (push, 0);
471          }
472 
473          PUSH_SPACE(push, n * 2 + 4);
474 
475          BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(0)), n);
476          for (i = 0; i < vertex->num_elements; ++i)
477             PUSH_DATA(push, vertex->element[i].state_alt);
478          for (; i < n; ++i)
479             PUSH_DATA(push, NVC0_3D_VERTEX_ATTRIB_INACTIVE);
480 
481          BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(0)), 1);
482          PUSH_DATA (push, (1 << 12) | vertex->size);
483          for (i = 1; i < n; ++i)
484             IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0);
485       } else {
486          uint32_t *restrict data;
487 
488          if (unlikely(vertex->instance_elts != nvc0->state.instance_elts)) {
489             nvc0->state.instance_elts = vertex->instance_elts;
490             assert(n); /* if (n == 0), both masks should be 0 */
491             PUSH_SPACE(push, 3);
492             BEGIN_NVC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_PER_INSTANCE), 2);
493             PUSH_DATA (push, n);
494             PUSH_DATA (push, vertex->instance_elts);
495          }
496 
497          PUSH_SPACE(push, n * 2 + 1);
498          BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(0)), n);
499          data = push->cur;
500          push->cur += n;
501          for (i = 0; i < vertex->num_elements; ++i) {
502             ve = &vertex->element[i];
503             data[i] = ve->state;
504             if (unlikely(const_vbos & (1 << ve->pipe.vertex_buffer_index))) {
505                nvc0->state.constant_elts |= 1 << i;
506                data[i] |= NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST;
507                IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0);
508             }
509          }
510          for (; i < n; ++i) {
511             data[i] = NVC0_3D_VERTEX_ATTRIB_INACTIVE;
512             IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0);
513          }
514       }
515    }
516    if (nvc0->state.vbo_mode) /* using translate, don't set up arrays here */
517       return;
518 
519    if (vertex->shared_slots)
520       nvc0_validate_vertex_buffers_shared(nvc0);
521    else
522       nvc0_validate_vertex_buffers(nvc0);
523 }
524 
525 #define NVC0_PRIM_GL_CASE(n) \
526    case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
527 
528 static inline unsigned
nvc0_prim_gl(unsigned prim)529 nvc0_prim_gl(unsigned prim)
530 {
531    switch (prim) {
532    NVC0_PRIM_GL_CASE(POINTS);
533    NVC0_PRIM_GL_CASE(LINES);
534    NVC0_PRIM_GL_CASE(LINE_LOOP);
535    NVC0_PRIM_GL_CASE(LINE_STRIP);
536    NVC0_PRIM_GL_CASE(TRIANGLES);
537    NVC0_PRIM_GL_CASE(TRIANGLE_STRIP);
538    NVC0_PRIM_GL_CASE(TRIANGLE_FAN);
539    NVC0_PRIM_GL_CASE(QUADS);
540    NVC0_PRIM_GL_CASE(QUAD_STRIP);
541    NVC0_PRIM_GL_CASE(POLYGON);
542    NVC0_PRIM_GL_CASE(LINES_ADJACENCY);
543    NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
544    NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
545    NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
546    NVC0_PRIM_GL_CASE(PATCHES);
547    default:
548       return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
549    }
550 }
551 
552 static void
nvc0_draw_vbo_kick_notify(struct nouveau_pushbuf * push)553 nvc0_draw_vbo_kick_notify(struct nouveau_pushbuf *push)
554 {
555    struct nvc0_screen *screen = push->user_priv;
556 
557    nouveau_fence_update(&screen->base, true);
558 
559    NOUVEAU_DRV_STAT(&screen->base, pushbuf_count, 1);
560 }
561 
562 static void
nvc0_draw_arrays(struct nvc0_context * nvc0,unsigned mode,unsigned start,unsigned count,unsigned instance_count)563 nvc0_draw_arrays(struct nvc0_context *nvc0,
564                  unsigned mode, unsigned start, unsigned count,
565                  unsigned instance_count)
566 {
567    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
568    unsigned prim;
569 
570    if (nvc0->state.index_bias) {
571       /* index_bias is implied 0 if !info->index_size (really ?) */
572       /* TODO: can we deactivate it for the VERTEX_BUFFER_FIRST command ? */
573       PUSH_SPACE(push, 2);
574       IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_BASE), 0);
575       IMMED_NVC0(push, NVC0_3D(VERTEX_ID_BASE), 0);
576       nvc0->state.index_bias = 0;
577    }
578 
579    prim = nvc0_prim_gl(mode);
580 
581    while (instance_count--) {
582       PUSH_SPACE(push, 6);
583       BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1);
584       PUSH_DATA (push, prim);
585       BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2);
586       PUSH_DATA (push, start);
587       PUSH_DATA (push, count);
588       IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0);
589 
590       prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
591    }
592    NOUVEAU_DRV_STAT(&nvc0->screen->base, draw_calls_array, 1);
593 }
594 
595 static void
nvc0_draw_elements_inline_u08(struct nouveau_pushbuf * push,const uint8_t * map,unsigned start,unsigned count)596 nvc0_draw_elements_inline_u08(struct nouveau_pushbuf *push, const uint8_t *map,
597                               unsigned start, unsigned count)
598 {
599    map += start;
600 
601    if (count & 3) {
602       unsigned i;
603       PUSH_SPACE(push, 4);
604       BEGIN_NIC0(push, NVC0_3D(VB_ELEMENT_U32), count & 3);
605       for (i = 0; i < (count & 3); ++i)
606          PUSH_DATA(push, *map++);
607       count &= ~3;
608    }
609    while (count) {
610       unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 4) / 4;
611 
612       PUSH_SPACE(push, nr + 1);
613       BEGIN_NIC0(push, NVC0_3D(VB_ELEMENT_U8), nr);
614       for (i = 0; i < nr; ++i) {
615          PUSH_DATA(push,
616                   (map[3] << 24) | (map[2] << 16) | (map[1] << 8) | map[0]);
617          map += 4;
618       }
619       count -= nr * 4;
620    }
621 }
622 
623 static void
nvc0_draw_elements_inline_u16(struct nouveau_pushbuf * push,const uint16_t * map,unsigned start,unsigned count)624 nvc0_draw_elements_inline_u16(struct nouveau_pushbuf *push, const uint16_t *map,
625                               unsigned start, unsigned count)
626 {
627    map += start;
628 
629    if (count & 1) {
630       count &= ~1;
631       PUSH_SPACE(push, 2);
632       BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
633       PUSH_DATA (push, *map++);
634    }
635    while (count) {
636       unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2;
637 
638       PUSH_SPACE(push, nr + 1);
639       BEGIN_NIC0(push, NVC0_3D(VB_ELEMENT_U16), nr);
640       for (i = 0; i < nr; ++i) {
641          PUSH_DATA(push, (map[1] << 16) | map[0]);
642          map += 2;
643       }
644       count -= nr * 2;
645    }
646 }
647 
648 static void
nvc0_draw_elements_inline_u32(struct nouveau_pushbuf * push,const uint32_t * map,unsigned start,unsigned count)649 nvc0_draw_elements_inline_u32(struct nouveau_pushbuf *push, const uint32_t *map,
650                               unsigned start, unsigned count)
651 {
652    map += start;
653 
654    while (count) {
655       const unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);
656 
657       PUSH_SPACE(push, nr + 1);
658       BEGIN_NIC0(push, NVC0_3D(VB_ELEMENT_U32), nr);
659       PUSH_DATAp(push, map, nr);
660 
661       map += nr;
662       count -= nr;
663    }
664 }
665 
666 static void
nvc0_draw_elements_inline_u32_short(struct nouveau_pushbuf * push,const uint32_t * map,unsigned start,unsigned count)667 nvc0_draw_elements_inline_u32_short(struct nouveau_pushbuf *push,
668                                     const uint32_t *map,
669                                     unsigned start, unsigned count)
670 {
671    map += start;
672 
673    if (count & 1) {
674       count--;
675       PUSH_SPACE(push, 2);
676       BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
677       PUSH_DATA (push, *map++);
678    }
679    while (count) {
680       unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2;
681 
682       PUSH_SPACE(push, nr + 1);
683       BEGIN_NIC0(push, NVC0_3D(VB_ELEMENT_U16), nr);
684       for (i = 0; i < nr; ++i) {
685          PUSH_DATA(push, (map[1] << 16) | map[0]);
686          map += 2;
687       }
688       count -= nr * 2;
689    }
690 }
691 
692 static void
nvc0_draw_elements(struct nvc0_context * nvc0,bool shorten,const struct pipe_draw_info * info,unsigned mode,unsigned start,unsigned count,unsigned instance_count,int32_t index_bias,unsigned index_size)693 nvc0_draw_elements(struct nvc0_context *nvc0, bool shorten,
694                    const struct pipe_draw_info *info,
695                    unsigned mode, unsigned start, unsigned count,
696                    unsigned instance_count, int32_t index_bias,
697 		   unsigned index_size)
698 {
699    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
700    unsigned prim;
701 
702    prim = nvc0_prim_gl(mode);
703 
704    if (index_bias != nvc0->state.index_bias) {
705       PUSH_SPACE(push, 4);
706       BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_BASE), 1);
707       PUSH_DATA (push, index_bias);
708       BEGIN_NVC0(push, NVC0_3D(VERTEX_ID_BASE), 1);
709       PUSH_DATA (push, index_bias);
710       nvc0->state.index_bias = index_bias;
711    }
712 
713    if (!info->has_user_indices) {
714       PUSH_SPACE(push, 1);
715       IMMED_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), prim);
716       do {
717          PUSH_SPACE(push, 7);
718          BEGIN_NVC0(push, NVC0_3D(INDEX_BATCH_FIRST), 2);
719          PUSH_DATA (push, start);
720          PUSH_DATA (push, count);
721          if (--instance_count) {
722             BEGIN_NVC0(push, NVC0_3D(VERTEX_END_GL), 2);
723             PUSH_DATA (push, 0);
724             PUSH_DATA (push, prim | NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT);
725          }
726       } while (instance_count);
727       IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0);
728    } else {
729       const void *data = info->index.user;
730 
731       while (instance_count--) {
732          PUSH_SPACE(push, 2);
733          BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1);
734          PUSH_DATA (push, prim);
735          switch (index_size) {
736          case 1:
737             nvc0_draw_elements_inline_u08(push, data, start, count);
738             break;
739          case 2:
740             nvc0_draw_elements_inline_u16(push, data, start, count);
741             break;
742          case 4:
743             if (shorten)
744                nvc0_draw_elements_inline_u32_short(push, data, start, count);
745             else
746                nvc0_draw_elements_inline_u32(push, data, start, count);
747             break;
748          default:
749             assert(0);
750             return;
751          }
752          PUSH_SPACE(push, 1);
753          IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0);
754 
755          prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
756       }
757    }
758    NOUVEAU_DRV_STAT(&nvc0->screen->base, draw_calls_indexed, 1);
759 }
760 
761 static void
nvc0_draw_stream_output(struct nvc0_context * nvc0,const struct pipe_draw_info * info)762 nvc0_draw_stream_output(struct nvc0_context *nvc0,
763                         const struct pipe_draw_info *info)
764 {
765    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
766    struct nvc0_so_target *so = nvc0_so_target(info->count_from_stream_output);
767    struct nv04_resource *res = nv04_resource(so->pipe.buffer);
768    unsigned mode = nvc0_prim_gl(info->mode);
769    unsigned num_instances = info->instance_count;
770 
771    if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
772       res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
773       PUSH_SPACE(push, 2);
774       IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
775       nvc0_hw_query_fifo_wait(nvc0, nvc0_query(so->pq));
776       if (nvc0->screen->eng3d->oclass < GM107_3D_CLASS)
777          IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);
778 
779       NOUVEAU_DRV_STAT(&nvc0->screen->base, gpu_serialize_count, 1);
780    }
781 
782    while (num_instances--) {
783       nouveau_pushbuf_space(push, 16, 0, 1);
784       BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1);
785       PUSH_DATA (push, mode);
786       BEGIN_NVC0(push, NVC0_3D(DRAW_TFB_BASE), 1);
787       PUSH_DATA (push, 0);
788       BEGIN_NVC0(push, NVC0_3D(DRAW_TFB_STRIDE), 1);
789       PUSH_DATA (push, so->stride);
790       BEGIN_NVC0(push, NVC0_3D(DRAW_TFB_BYTES), 1);
791       nvc0_hw_query_pushbuf_submit(push, nvc0_query(so->pq), 0x4);
792       IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0);
793 
794       mode |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
795    }
796 }
797 
798 static void
nvc0_draw_indirect(struct nvc0_context * nvc0,const struct pipe_draw_info * info)799 nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
800 {
801    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
802    struct nv04_resource *buf = nv04_resource(info->indirect->buffer);
803    struct nv04_resource *buf_count = nv04_resource(info->indirect->indirect_draw_count);
804    unsigned size, macro, count = info->indirect->draw_count, drawid = info->drawid;
805    uint32_t offset = buf->offset + info->indirect->offset;
806    struct nvc0_screen *screen = nvc0->screen;
807 
808    PUSH_SPACE(push, 7);
809 
810    /* must make FIFO wait for engines idle before continuing to process */
811    if ((buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr)) ||
812        (buf_count && buf_count->fence_wr &&
813         !nouveau_fence_signalled(buf_count->fence_wr))) {
814       IMMED_NVC0(push, SUBC_3D(NV10_SUBCHAN_REF_CNT), 0);
815    }
816 
817    /* Queue things up to let the macros write params to the driver constbuf */
818    BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
819    PUSH_DATA (push, NVC0_CB_AUX_SIZE);
820    PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
821    PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
822    BEGIN_NVC0(push, NVC0_3D(CB_POS), 1);
823    PUSH_DATA (push, NVC0_CB_AUX_DRAW_INFO);
824 
825    if (info->index_size) {
826       assert(!info->has_user_indices);
827       assert(nouveau_resource_mapped_by_gpu(info->index.resource));
828       size = 5;
829       if (buf_count)
830          macro = NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT;
831       else
832          macro = NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT;
833    } else {
834       if (nvc0->state.index_bias) {
835          /* index_bias is implied 0 if !info->index_size (really ?) */
836          IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_BASE), 0);
837          IMMED_NVC0(push, NVC0_3D(VERTEX_ID_BASE), 0);
838          nvc0->state.index_bias = 0;
839       }
840       size = 4;
841       if (buf_count)
842          macro = NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT;
843       else
844          macro = NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT;
845    }
846 
847    /* If the stride is not the natural stride, we have to stick a separate
848     * push data reference for each draw. Otherwise it can all go in as one.
849     * Of course there is a maximum packet size, so we have to break things up
850     * along those borders as well.
851     */
852    while (count) {
853       unsigned draws = count, pushes, i;
854       if (info->indirect->stride == size * 4) {
855          draws = MIN2(draws, (NV04_PFIFO_MAX_PACKET_LEN - 4) / size);
856          pushes = 1;
857       } else {
858          draws = MIN2(draws, 32);
859          pushes = draws;
860       }
861 
862       nouveau_pushbuf_space(push, 16, 0, pushes + !!buf_count);
863       PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain);
864       if (buf_count)
865          PUSH_REFN(push, buf_count->bo, NOUVEAU_BO_RD | buf_count->domain);
866       PUSH_DATA(push,
867                 NVC0_FIFO_PKHDR_1I(0, macro, 3 + !!buf_count + draws * size));
868       PUSH_DATA(push, nvc0_prim_gl(info->mode));
869       PUSH_DATA(push, drawid);
870       PUSH_DATA(push, draws);
871       if (buf_count) {
872          nouveau_pushbuf_data(push,
873                               buf_count->bo,
874                               buf_count->offset + info->indirect->indirect_draw_count_offset,
875                               NVC0_IB_ENTRY_1_NO_PREFETCH | 4);
876       }
877       if (pushes == 1) {
878          nouveau_pushbuf_data(push,
879                               buf->bo, offset,
880                               NVC0_IB_ENTRY_1_NO_PREFETCH | (size * 4 * draws));
881          offset += draws * info->indirect->stride;
882       } else {
883          for (i = 0; i < pushes; i++) {
884             nouveau_pushbuf_data(push,
885                                  buf->bo, offset,
886                                  NVC0_IB_ENTRY_1_NO_PREFETCH | (size * 4));
887             offset += info->indirect->stride;
888          }
889       }
890       count -= draws;
891       drawid += draws;
892    }
893 }
894 
895 static inline void
nvc0_update_prim_restart(struct nvc0_context * nvc0,bool en,uint32_t index)896 nvc0_update_prim_restart(struct nvc0_context *nvc0, bool en, uint32_t index)
897 {
898    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
899 
900    if (en != nvc0->state.prim_restart) {
901       if (en) {
902          BEGIN_NVC0(push, NVC0_3D(PRIM_RESTART_ENABLE), 2);
903          PUSH_DATA (push, 1);
904          PUSH_DATA (push, index);
905       } else {
906          IMMED_NVC0(push, NVC0_3D(PRIM_RESTART_ENABLE), 0);
907       }
908       nvc0->state.prim_restart = en;
909    } else
910    if (en) {
911       BEGIN_NVC0(push, NVC0_3D(PRIM_RESTART_INDEX), 1);
912       PUSH_DATA (push, index);
913    }
914 }
915 
916 void
nvc0_draw_vbo(struct pipe_context * pipe,const struct pipe_draw_info * info)917 nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
918 {
919    struct nvc0_context *nvc0 = nvc0_context(pipe);
920    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
921    struct nvc0_screen *screen = nvc0->screen;
922    int s;
923 
924    /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
925    nvc0->vb_elt_first = info->min_index + info->index_bias;
926    nvc0->vb_elt_limit = info->max_index - info->min_index;
927    nvc0->instance_off = info->start_instance;
928    nvc0->instance_max = info->instance_count - 1;
929 
930    /* For picking only a few vertices from a large user buffer, push is better,
931     * if index count is larger and we expect repeated vertices, suggest upload.
932     */
933    nvc0->vbo_push_hint =
934       !info->indirect && info->index_size &&
935       (nvc0->vb_elt_limit >= (info->count * 2));
936 
937    /* Check whether we want to switch vertex-submission mode. */
938    if (nvc0->vbo_user && !(nvc0->dirty_3d & (NVC0_NEW_3D_ARRAYS | NVC0_NEW_3D_VERTEX))) {
939       if (nvc0->vbo_push_hint != !!nvc0->state.vbo_mode)
940          if (nvc0->state.vbo_mode != 3)
941             nvc0->dirty_3d |= NVC0_NEW_3D_ARRAYS;
942 
943       if (!(nvc0->dirty_3d & NVC0_NEW_3D_ARRAYS) && nvc0->state.vbo_mode == 0) {
944          if (nvc0->vertex->shared_slots)
945             nvc0_update_user_vbufs_shared(nvc0);
946          else
947             nvc0_update_user_vbufs(nvc0);
948       }
949    }
950 
951    if (info->mode == PIPE_PRIM_PATCHES &&
952        nvc0->state.patch_vertices != info->vertices_per_patch) {
953       nvc0->state.patch_vertices = info->vertices_per_patch;
954       PUSH_SPACE(push, 1);
955       IMMED_NVC0(push, NVC0_3D(PATCH_VERTICES), nvc0->state.patch_vertices);
956    }
957 
958    if (info->index_size && !info->has_user_indices) {
959       struct nv04_resource *buf = nv04_resource(info->index.resource);
960 
961       assert(buf);
962       assert(nouveau_resource_mapped_by_gpu(&buf->base));
963 
964       PUSH_SPACE(push, 6);
965       BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 5);
966       PUSH_DATAh(push, buf->address);
967       PUSH_DATA (push, buf->address);
968       PUSH_DATAh(push, buf->address + buf->base.width0 - 1);
969       PUSH_DATA (push, buf->address + buf->base.width0 - 1);
970       PUSH_DATA (push, info->index_size >> 1);
971 
972       BCTX_REFN(nvc0->bufctx_3d, 3D_IDX, buf, RD);
973    }
974 
975    list_for_each_entry(struct nvc0_resident, resident, &nvc0->tex_head, list) {
976       nvc0_add_resident(nvc0->bufctx_3d, NVC0_BIND_3D_BINDLESS, resident->buf,
977                         resident->flags);
978    }
979 
980    list_for_each_entry(struct nvc0_resident, resident, &nvc0->img_head, list) {
981       nvc0_add_resident(nvc0->bufctx_3d, NVC0_BIND_3D_BINDLESS, resident->buf,
982                         resident->flags);
983    }
984 
985    nvc0_state_validate_3d(nvc0, ~0);
986 
987    if (nvc0->vertprog->vp.need_draw_parameters && !info->indirect) {
988       PUSH_SPACE(push, 9);
989       BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
990       PUSH_DATA (push, NVC0_CB_AUX_SIZE);
991       PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
992       PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
993       BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 3);
994       PUSH_DATA (push, NVC0_CB_AUX_DRAW_INFO);
995       PUSH_DATA (push, info->index_bias);
996       PUSH_DATA (push, info->start_instance);
997       PUSH_DATA (push, info->drawid);
998    }
999 
1000    if (nvc0->screen->base.class_3d < NVE4_3D_CLASS &&
1001        nvc0->seamless_cube_map != nvc0->state.seamless_cube_map) {
1002       nvc0->state.seamless_cube_map = nvc0->seamless_cube_map;
1003       PUSH_SPACE(push, 1);
1004       IMMED_NVC0(push, NVC0_3D(TEX_MISC),
1005                  nvc0->seamless_cube_map ? NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP : 0);
1006    }
1007 
1008    push->kick_notify = nvc0_draw_vbo_kick_notify;
1009 
1010    for (s = 0; s < 5 && !nvc0->cb_dirty; ++s) {
1011       if (nvc0->constbuf_coherent[s])
1012          nvc0->cb_dirty = true;
1013    }
1014 
1015    if (nvc0->cb_dirty) {
1016       PUSH_SPACE(push, 1);
1017       IMMED_NVC0(push, NVC0_3D(MEM_BARRIER), 0x1011);
1018       nvc0->cb_dirty = false;
1019    }
1020 
1021    for (s = 0; s < 5; ++s) {
1022       if (!nvc0->textures_coherent[s])
1023          continue;
1024 
1025       PUSH_SPACE(push, nvc0->num_textures[s] * 2);
1026 
1027       for (int i = 0; i < nvc0->num_textures[s]; ++i) {
1028          struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
1029          if (!(nvc0->textures_coherent[s] & (1 << i)))
1030             continue;
1031 
1032          BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
1033          PUSH_DATA (push, (tic->id << 4) | 1);
1034          NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1);
1035       }
1036    }
1037 
1038    if (nvc0->state.vbo_mode) {
1039       nvc0_push_vbo(nvc0, info);
1040       goto cleanup;
1041    }
1042 
1043    /* space for base instance, flush, and prim restart */
1044    PUSH_SPACE(push, 8);
1045 
1046    if (nvc0->state.instance_base != info->start_instance) {
1047       nvc0->state.instance_base = info->start_instance;
1048       /* NOTE: this does not affect the shader input, should it ? */
1049       BEGIN_NVC0(push, NVC0_3D(VB_INSTANCE_BASE), 1);
1050       PUSH_DATA (push, info->start_instance);
1051    }
1052 
1053    nvc0->base.vbo_dirty |= !!nvc0->vtxbufs_coherent;
1054 
1055    if (!nvc0->base.vbo_dirty && info->index_size && !info->has_user_indices &&
1056        info->index.resource->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
1057       nvc0->base.vbo_dirty = true;
1058 
1059    nvc0_update_prim_restart(nvc0, info->primitive_restart, info->restart_index);
1060 
1061    if (nvc0->base.vbo_dirty) {
1062       if (nvc0->screen->eng3d->oclass < GM107_3D_CLASS)
1063          IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);
1064       nvc0->base.vbo_dirty = false;
1065    }
1066 
1067    if (unlikely(info->indirect)) {
1068       nvc0_draw_indirect(nvc0, info);
1069    } else
1070    if (unlikely(info->count_from_stream_output)) {
1071       nvc0_draw_stream_output(nvc0, info);
1072    } else
1073    if (info->index_size) {
1074       bool shorten = info->max_index <= 65535;
1075 
1076       if (info->primitive_restart && info->restart_index > 65535)
1077          shorten = false;
1078 
1079       nvc0_draw_elements(nvc0, shorten, info,
1080                          info->mode, info->start, info->count,
1081                          info->instance_count, info->index_bias, info->index_size);
1082    } else {
1083       nvc0_draw_arrays(nvc0,
1084                        info->mode, info->start, info->count,
1085                        info->instance_count);
1086    }
1087 
1088 cleanup:
1089    push->kick_notify = nvc0_default_kick_notify;
1090 
1091    nvc0_release_user_vbufs(nvc0);
1092 
1093    nouveau_pushbuf_bufctx(push, NULL);
1094 
1095    nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_IDX);
1096    nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_BINDLESS);
1097 }
1098