1 /*
2  * Copyright 2008 Ben Skeggs
3  * Copyright 2010 Christoph Bumiller
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
20  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "pipe/p_context.h"
25 #include "pipe/p_defines.h"
26 #include "pipe/p_state.h"
27 #include "util/u_inlines.h"
28 
29 #include "nv50_context.h"
30 
31 void
nv50_constbufs_validate(struct nv50_context * nv50)32 nv50_constbufs_validate(struct nv50_context *nv50)
33 {
34    struct nouveau_pushbuf *push = nv50->base.pushbuf;
35    unsigned s;
36 
37    for (s = 0; s < 3; ++s) {
38       unsigned p;
39 
40       if (s == PIPE_SHADER_FRAGMENT)
41          p = NV50_3D_SET_PROGRAM_CB_PROGRAM_FRAGMENT;
42       else
43       if (s == PIPE_SHADER_GEOMETRY)
44          p = NV50_3D_SET_PROGRAM_CB_PROGRAM_GEOMETRY;
45       else
46          p = NV50_3D_SET_PROGRAM_CB_PROGRAM_VERTEX;
47 
48       while (nv50->constbuf_dirty[s]) {
49          const int i = ffs(nv50->constbuf_dirty[s]) - 1;
50          nv50->constbuf_dirty[s] &= ~(1 << i);
51 
52          if (nv50->constbuf[s][i].user) {
53             const unsigned b = NV50_CB_PVP + s;
54             unsigned start = 0;
55             unsigned words = nv50->constbuf[s][0].size / 4;
56             if (i) {
57                NOUVEAU_ERR("user constbufs only supported in slot 0\n");
58                continue;
59             }
60             if (!nv50->state.uniform_buffer_bound[s]) {
61                nv50->state.uniform_buffer_bound[s] = TRUE;
62                BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
63                PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
64             }
65             while (words) {
66                unsigned nr;
67 
68                if (!PUSH_SPACE(push, 16))
69                   break;
70                nr = PUSH_AVAIL(push);
71                assert(nr >= 16);
72                nr = MIN2(MIN2(nr - 3, words), NV04_PFIFO_MAX_PACKET_LEN);
73 
74                BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
75                PUSH_DATA (push, (start << 8) | b);
76                BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr);
77                PUSH_DATAp(push, &nv50->constbuf[s][0].u.data[start * 4], nr);
78 
79                start += nr;
80                words -= nr;
81             }
82          } else {
83             struct nv04_resource *res =
84                nv04_resource(nv50->constbuf[s][i].u.buf);
85             if (res) {
86                /* TODO: allocate persistent bindings */
87                const unsigned b = s * 16 + i;
88 
89                assert(nouveau_resource_mapped_by_gpu(&res->base));
90 
91                BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
92                PUSH_DATAh(push, res->address + nv50->constbuf[s][i].offset);
93                PUSH_DATA (push, res->address + nv50->constbuf[s][i].offset);
94                PUSH_DATA (push, (b << 16) |
95                           (nv50->constbuf[s][i].size & 0xffff));
96                BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
97                PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
98 
99                BCTX_REFN(nv50->bufctx_3d, CB(s, i), res, RD);
100             } else {
101                BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
102                PUSH_DATA (push, (i << 8) | p | 0);
103             }
104             if (i == 0)
105                nv50->state.uniform_buffer_bound[s] = FALSE;
106          }
107       }
108    }
109 }
110 
111 static boolean
nv50_program_validate(struct nv50_context * nv50,struct nv50_program * prog)112 nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog)
113 {
114    if (!prog->translated) {
115       prog->translated = nv50_program_translate(
116          prog, nv50->screen->base.device->chipset);
117       if (!prog->translated)
118          return FALSE;
119    } else
120    if (prog->mem)
121       return TRUE;
122 
123    return nv50_program_upload_code(nv50, prog);
124 }
125 
126 static INLINE void
nv50_program_update_context_state(struct nv50_context * nv50,struct nv50_program * prog,int stage)127 nv50_program_update_context_state(struct nv50_context *nv50,
128                                   struct nv50_program *prog, int stage)
129 {
130    const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;
131 
132    if (prog && prog->tls_space) {
133       if (nv50->state.new_tls_space)
134          nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS);
135       if (!nv50->state.tls_required || nv50->state.new_tls_space)
136          BCTX_REFN_bo(nv50->bufctx_3d, TLS, flags, nv50->screen->tls_bo);
137       nv50->state.new_tls_space = FALSE;
138       nv50->state.tls_required |= 1 << stage;
139    } else {
140       if (nv50->state.tls_required == (1 << stage))
141          nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS);
142       nv50->state.tls_required &= ~(1 << stage);
143    }
144 }
145 
146 void
nv50_vertprog_validate(struct nv50_context * nv50)147 nv50_vertprog_validate(struct nv50_context *nv50)
148 {
149    struct nouveau_pushbuf *push = nv50->base.pushbuf;
150    struct nv50_program *vp = nv50->vertprog;
151 
152    if (!nv50_program_validate(nv50, vp))
153          return;
154    nv50_program_update_context_state(nv50, vp, 0);
155 
156    BEGIN_NV04(push, NV50_3D(VP_ATTR_EN(0)), 2);
157    PUSH_DATA (push, vp->vp.attrs[0]);
158    PUSH_DATA (push, vp->vp.attrs[1]);
159    BEGIN_NV04(push, NV50_3D(VP_REG_ALLOC_RESULT), 1);
160    PUSH_DATA (push, vp->max_out);
161    BEGIN_NV04(push, NV50_3D(VP_REG_ALLOC_TEMP), 1);
162    PUSH_DATA (push, vp->max_gpr);
163    BEGIN_NV04(push, NV50_3D(VP_START_ID), 1);
164    PUSH_DATA (push, vp->code_base);
165 }
166 
167 void
nv50_fragprog_validate(struct nv50_context * nv50)168 nv50_fragprog_validate(struct nv50_context *nv50)
169 {
170    struct nouveau_pushbuf *push = nv50->base.pushbuf;
171    struct nv50_program *fp = nv50->fragprog;
172 
173    if (!nv50_program_validate(nv50, fp))
174          return;
175    nv50_program_update_context_state(nv50, fp, 1);
176 
177    BEGIN_NV04(push, NV50_3D(FP_REG_ALLOC_TEMP), 1);
178    PUSH_DATA (push, fp->max_gpr);
179    BEGIN_NV04(push, NV50_3D(FP_RESULT_COUNT), 1);
180    PUSH_DATA (push, fp->max_out);
181    BEGIN_NV04(push, NV50_3D(FP_CONTROL), 1);
182    PUSH_DATA (push, fp->fp.flags[0]);
183    BEGIN_NV04(push, NV50_3D(FP_CTRL_UNK196C), 1);
184    PUSH_DATA (push, fp->fp.flags[1]);
185    BEGIN_NV04(push, NV50_3D(FP_START_ID), 1);
186    PUSH_DATA (push, fp->code_base);
187 }
188 
189 void
nv50_gmtyprog_validate(struct nv50_context * nv50)190 nv50_gmtyprog_validate(struct nv50_context *nv50)
191 {
192    struct nouveau_pushbuf *push = nv50->base.pushbuf;
193    struct nv50_program *gp = nv50->gmtyprog;
194 
195    if (gp) {
196       BEGIN_NV04(push, NV50_3D(GP_REG_ALLOC_TEMP), 1);
197       PUSH_DATA (push, gp->max_gpr);
198       BEGIN_NV04(push, NV50_3D(GP_REG_ALLOC_RESULT), 1);
199       PUSH_DATA (push, gp->max_out);
200       BEGIN_NV04(push, NV50_3D(GP_OUTPUT_PRIMITIVE_TYPE), 1);
201       PUSH_DATA (push, gp->gp.prim_type);
202       BEGIN_NV04(push, NV50_3D(GP_VERTEX_OUTPUT_COUNT), 1);
203       PUSH_DATA (push, gp->gp.vert_count);
204       BEGIN_NV04(push, NV50_3D(GP_START_ID), 1);
205       PUSH_DATA (push, gp->code_base);
206 
207       nv50->state.prim_size = gp->gp.prim_type; /* enum matches vertex count */
208    }
209    nv50_program_update_context_state(nv50, gp, 2);
210 
211    /* GP_ENABLE is updated in linkage validation */
212 }
213 
214 static void
nv50_sprite_coords_validate(struct nv50_context * nv50)215 nv50_sprite_coords_validate(struct nv50_context *nv50)
216 {
217    struct nouveau_pushbuf *push = nv50->base.pushbuf;
218    uint32_t pntc[8], mode;
219    struct nv50_program *fp = nv50->fragprog;
220    unsigned i, c;
221    unsigned m = (nv50->state.interpolant_ctrl >> 8) & 0xff;
222 
223    if (!nv50->rast->pipe.point_quad_rasterization) {
224       if (nv50->state.point_sprite) {
225          BEGIN_NV04(push, NV50_3D(POINT_COORD_REPLACE_MAP(0)), 8);
226          for (i = 0; i < 8; ++i)
227             PUSH_DATA(push, 0);
228 
229          nv50->state.point_sprite = FALSE;
230       }
231       return;
232    } else {
233       nv50->state.point_sprite = TRUE;
234    }
235 
236    memset(pntc, 0, sizeof(pntc));
237 
238    for (i = 0; i < fp->in_nr; i++) {
239       unsigned n = util_bitcount(fp->in[i].mask);
240 
241       if (fp->in[i].sn != TGSI_SEMANTIC_GENERIC) {
242          m += n;
243          continue;
244       }
245       if (!(nv50->rast->pipe.sprite_coord_enable & (1 << fp->in[i].si))) {
246          m += n;
247          continue;
248       }
249 
250       for (c = 0; c < 4; ++c) {
251          if (fp->in[i].mask & (1 << c)) {
252             pntc[m / 8] |= (c + 1) << ((m % 8) * 4);
253             ++m;
254          }
255       }
256    }
257 
258    if (nv50->rast->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
259       mode = 0x00;
260    else
261       mode = 0x10;
262 
263    BEGIN_NV04(push, NV50_3D(POINT_SPRITE_CTRL), 1);
264    PUSH_DATA (push, mode);
265 
266    BEGIN_NV04(push, NV50_3D(POINT_COORD_REPLACE_MAP(0)), 8);
267    PUSH_DATAp(push, pntc, 8);
268 }
269 
270 /* Validate state derived from shaders and the rasterizer cso. */
271 void
nv50_validate_derived_rs(struct nv50_context * nv50)272 nv50_validate_derived_rs(struct nv50_context *nv50)
273 {
274    struct nouveau_pushbuf *push = nv50->base.pushbuf;
275    uint32_t color, psize;
276 
277    nv50_sprite_coords_validate(nv50);
278 
279    if (nv50->state.rasterizer_discard != nv50->rast->pipe.rasterizer_discard) {
280       nv50->state.rasterizer_discard = nv50->rast->pipe.rasterizer_discard;
281       BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1);
282       PUSH_DATA (push, !nv50->rast->pipe.rasterizer_discard);
283    }
284 
285    if (nv50->dirty & NV50_NEW_FRAGPROG)
286       return;
287    psize = nv50->state.semantic_psize & ~NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK;
288    color = nv50->state.semantic_color & ~NV50_3D_SEMANTIC_COLOR_CLMP_EN;
289 
290    if (nv50->rast->pipe.clamp_vertex_color)
291       color |= NV50_3D_SEMANTIC_COLOR_CLMP_EN;
292 
293    if (color != nv50->state.semantic_color) {
294       nv50->state.semantic_color = color;
295       BEGIN_NV04(push, NV50_3D(SEMANTIC_COLOR), 1);
296       PUSH_DATA (push, color);
297    }
298 
299    if (nv50->rast->pipe.point_size_per_vertex)
300       psize |= NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK;
301 
302    if (psize != nv50->state.semantic_psize) {
303       nv50->state.semantic_psize = psize;
304       BEGIN_NV04(push, NV50_3D(SEMANTIC_PTSZ), 1);
305       PUSH_DATA (push, psize);
306    }
307 }
308 
309 static int
nv50_vec4_map(uint8_t * map,int mid,uint32_t lin[4],struct nv50_varying * in,struct nv50_varying * out)310 nv50_vec4_map(uint8_t *map, int mid, uint32_t lin[4],
311               struct nv50_varying *in, struct nv50_varying *out)
312 {
313    int c;
314    uint8_t mv = out->mask, mf = in->mask, oid = out->hw;
315 
316    for (c = 0; c < 4; ++c) {
317       if (mf & 1) {
318          if (in->linear)
319             lin[mid / 32] |= 1 << (mid % 32);
320          if (mv & 1)
321             map[mid] = oid;
322          else
323          if (c == 3)
324             map[mid] |= 1;
325          ++mid;
326       }
327 
328       oid += mv & 1;
329       mf >>= 1;
330       mv >>= 1;
331    }
332 
333    return mid;
334 }
335 
336 void
nv50_fp_linkage_validate(struct nv50_context * nv50)337 nv50_fp_linkage_validate(struct nv50_context *nv50)
338 {
339    struct nouveau_pushbuf *push = nv50->base.pushbuf;
340    struct nv50_program *vp = nv50->gmtyprog ? nv50->gmtyprog : nv50->vertprog;
341    struct nv50_program *fp = nv50->fragprog;
342    struct nv50_varying dummy;
343    int i, n, c, m;
344    uint32_t primid = 0;
345    uint32_t psiz = 0x000;
346    uint32_t interp = fp->fp.interp;
347    uint32_t colors = fp->fp.colors;
348    uint32_t lin[4];
349    uint8_t map[64];
350    uint8_t so_map[64];
351 
352    if (!(nv50->dirty & (NV50_NEW_VERTPROG |
353                         NV50_NEW_FRAGPROG |
354                         NV50_NEW_GMTYPROG))) {
355       uint8_t bfc, ffc;
356       ffc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_FFC0_ID__MASK);
357       bfc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_BFC0_ID__MASK)
358          >> 8;
359       if (nv50->rast->pipe.light_twoside == ((ffc == bfc) ? 0 : 1))
360          return;
361    }
362 
363    memset(lin, 0x00, sizeof(lin));
364 
365    /* XXX: in buggy-endian mode, is the first element of map (u32)0x000000xx
366     *  or is it the first byte ?
367     */
368    memset(map, nv50->gmtyprog ? 0x80 : 0x40, sizeof(map));
369 
370    dummy.mask = 0xf; /* map all components of HPOS */
371    dummy.linear = 0;
372    m = nv50_vec4_map(map, 0, lin, &dummy, &vp->out[0]);
373 
374    for (c = 0; c < vp->vp.clpd_nr; ++c)
375       map[m++] = vp->vp.clpd[c / 4] + (c % 4);
376 
377    colors |= m << 8; /* adjust BFC0 id */
378 
379    dummy.mask = 0x0;
380 
381    /* if light_twoside is active, FFC0_ID == BFC0_ID is invalid */
382    if (nv50->rast->pipe.light_twoside) {
383       for (i = 0; i < 2; ++i) {
384          n = vp->vp.bfc[i];
385          if (fp->vp.bfc[i] >= fp->in_nr)
386             continue;
387          m = nv50_vec4_map(map, m, lin, &fp->in[fp->vp.bfc[i]],
388                            (n < vp->out_nr) ? &vp->out[n] : &dummy);
389       }
390    }
391    colors += m - 4; /* adjust FFC0 id */
392    interp |= m << 8; /* set map id where 'normal' FP inputs start */
393 
394    for (i = 0; i < fp->in_nr; ++i) {
395       for (n = 0; n < vp->out_nr; ++n)
396          if (vp->out[n].sn == fp->in[i].sn &&
397              vp->out[n].si == fp->in[i].si)
398             break;
399       m = nv50_vec4_map(map, m, lin,
400                         &fp->in[i], (n < vp->out_nr) ? &vp->out[n] : &dummy);
401    }
402 
403    /* PrimitiveID either is replaced by the system value, or
404     * written by the geometry shader into an output register
405     */
406    if (fp->gp.primid < 0x80) {
407       primid = m;
408       map[m++] = vp->gp.primid;
409    }
410 
411    if (nv50->rast->pipe.point_size_per_vertex) {
412       psiz = (m << 4) | 1;
413       map[m++] = vp->vp.psiz;
414    }
415 
416    if (nv50->rast->pipe.clamp_vertex_color)
417       colors |= NV50_3D_SEMANTIC_COLOR_CLMP_EN;
418 
419    if (unlikely(vp->so)) {
420       /* Slot i in STRMOUT_MAP specifies the offset where slot i in RESULT_MAP
421        * gets written.
422        *
423        * TODO:
424        * Inverting vp->so->map (output -> offset) would probably speed this up.
425        */
426       memset(so_map, 0, sizeof(so_map));
427       for (i = 0; i < vp->so->map_size; ++i) {
428          if (vp->so->map[i] == 0xff)
429             continue;
430          for (c = 0; c < m; ++c)
431             if (map[c] == vp->so->map[i] && !so_map[c])
432                break;
433          if (c == m) {
434             c = m;
435             map[m++] = vp->so->map[i];
436          }
437          so_map[c] = 0x80 | i;
438       }
439       for (c = m; c & 3; ++c)
440          so_map[c] = 0;
441    }
442 
443    n = (m + 3) / 4;
444    assert(m <= 64);
445 
446    if (unlikely(nv50->gmtyprog)) {
447       BEGIN_NV04(push, NV50_3D(GP_RESULT_MAP_SIZE), 1);
448       PUSH_DATA (push, m);
449       BEGIN_NV04(push, NV50_3D(GP_RESULT_MAP(0)), n);
450       PUSH_DATAp(push, map, n);
451    } else {
452       BEGIN_NV04(push, NV50_3D(VP_GP_BUILTIN_ATTR_EN), 1);
453       PUSH_DATA (push, vp->vp.attrs[2]);
454 
455       BEGIN_NV04(push, NV50_3D(SEMANTIC_PRIM_ID), 1);
456       PUSH_DATA (push, primid);
457 
458       BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP_SIZE), 1);
459       PUSH_DATA (push, m);
460       BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP(0)), n);
461       PUSH_DATAp(push, map, n);
462    }
463 
464    BEGIN_NV04(push, NV50_3D(SEMANTIC_COLOR), 4);
465    PUSH_DATA (push, colors);
466    PUSH_DATA (push, (vp->vp.clpd_nr << 8) | 4);
467    PUSH_DATA (push, 0);
468    PUSH_DATA (push, psiz);
469 
470    BEGIN_NV04(push, NV50_3D(FP_INTERPOLANT_CTRL), 1);
471    PUSH_DATA (push, interp);
472 
473    nv50->state.interpolant_ctrl = interp;
474 
475    nv50->state.semantic_color = colors;
476    nv50->state.semantic_psize = psiz;
477 
478    BEGIN_NV04(push, NV50_3D(NOPERSPECTIVE_BITMAP(0)), 4);
479    PUSH_DATAp(push, lin, 4);
480 
481    BEGIN_NV04(push, NV50_3D(GP_ENABLE), 1);
482    PUSH_DATA (push, nv50->gmtyprog ? 1 : 0);
483 
484    if (vp->so) {
485       BEGIN_NV04(push, NV50_3D(STRMOUT_MAP(0)), n);
486       PUSH_DATAp(push, so_map, n);
487    }
488 }
489 
490 static int
nv50_vp_gp_mapping(uint8_t * map,int m,struct nv50_program * vp,struct nv50_program * gp)491 nv50_vp_gp_mapping(uint8_t *map, int m,
492                    struct nv50_program *vp, struct nv50_program *gp)
493 {
494    int i, j, c;
495 
496    for (i = 0; i < gp->in_nr; ++i) {
497       uint8_t oid = 0, mv = 0, mg = gp->in[i].mask;
498 
499       for (j = 0; j < vp->out_nr; ++j) {
500          if (vp->out[j].sn == gp->in[i].sn &&
501              vp->out[j].si == gp->in[i].si) {
502             mv = vp->out[j].mask;
503             oid = vp->out[j].hw;
504             break;
505          }
506       }
507 
508       for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) {
509          if (mg & mv & 1)
510             map[m++] = oid;
511          else
512          if (mg & 1)
513             map[m++] = (c == 3) ? 0x41 : 0x40;
514          oid += mv & 1;
515       }
516    }
517    return m;
518 }
519 
520 void
nv50_gp_linkage_validate(struct nv50_context * nv50)521 nv50_gp_linkage_validate(struct nv50_context *nv50)
522 {
523    struct nouveau_pushbuf *push = nv50->base.pushbuf;
524    struct nv50_program *vp = nv50->vertprog;
525    struct nv50_program *gp = nv50->gmtyprog;
526    int m = 0;
527    int n;
528    uint8_t map[64];
529 
530    if (!gp)
531       return;
532    memset(map, 0, sizeof(map));
533 
534    m = nv50_vp_gp_mapping(map, m, vp, gp);
535 
536    n = (m + 3) / 4;
537 
538    BEGIN_NV04(push, NV50_3D(VP_GP_BUILTIN_ATTR_EN), 1);
539    PUSH_DATA (push, vp->vp.attrs[2] | gp->vp.attrs[2]);
540 
541    BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP_SIZE), 1);
542    PUSH_DATA (push, m);
543    BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP(0)), n);
544    PUSH_DATAp(push, map, n);
545 }
546 
547 void
nv50_stream_output_validate(struct nv50_context * nv50)548 nv50_stream_output_validate(struct nv50_context *nv50)
549 {
550    struct nouveau_pushbuf *push = nv50->base.pushbuf;
551    struct nv50_stream_output_state *so;
552    uint32_t ctrl;
553    unsigned i;
554    unsigned prims = ~0;
555 
556    so = nv50->gmtyprog ? nv50->gmtyprog->so : nv50->vertprog->so;
557 
558    BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1);
559    PUSH_DATA (push, 0);
560    if (!so || !nv50->num_so_targets) {
561       if (nv50->screen->base.class_3d < NVA0_3D_CLASS) {
562          BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1);
563          PUSH_DATA (push, 0);
564       }
565       BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1);
566       PUSH_DATA (push, 1);
567       return;
568    }
569 
570    /* previous TFB needs to complete */
571    if (nv50->screen->base.class_3d < NVA0_3D_CLASS) {
572       BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
573       PUSH_DATA (push, 0);
574    }
575 
576    ctrl = so->ctrl;
577    if (nv50->screen->base.class_3d >= NVA0_3D_CLASS)
578       ctrl |= NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_OFFSET;
579 
580    BEGIN_NV04(push, NV50_3D(STRMOUT_BUFFERS_CTRL), 1);
581    PUSH_DATA (push, ctrl);
582 
583    nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_SO);
584 
585    for (i = 0; i < nv50->num_so_targets; ++i) {
586       struct nv50_so_target *targ = nv50_so_target(nv50->so_target[i]);
587       struct nv04_resource *buf = nv04_resource(targ->pipe.buffer);
588 
589       const unsigned n = nv50->screen->base.class_3d >= NVA0_3D_CLASS ? 4 : 3;
590 
591       if (n == 4 && !targ->clean)
592          nv84_query_fifo_wait(push, targ->pq);
593       BEGIN_NV04(push, NV50_3D(STRMOUT_ADDRESS_HIGH(i)), n);
594       PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
595       PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);
596       PUSH_DATA (push, so->num_attribs[i]);
597       if (n == 4) {
598          PUSH_DATA(push, targ->pipe.buffer_size);
599 
600          BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
601          if (!targ->clean) {
602             assert(targ->pq);
603             nv50_query_pushbuf_submit(push, targ->pq, 0x4);
604          } else {
605             PUSH_DATA(push, 0);
606             targ->clean = FALSE;
607          }
608       } else {
609          const unsigned limit = targ->pipe.buffer_size /
610             (so->stride[i] * nv50->state.prim_size);
611          prims = MIN2(prims, limit);
612       }
613       BCTX_REFN(nv50->bufctx_3d, SO, buf, WR);
614    }
615    if (prims != ~0) {
616       BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1);
617       PUSH_DATA (push, prims);
618    }
619    BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1);
620    PUSH_DATA (push, 1);
621    BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1);
622    PUSH_DATA (push, 1);
623 }
624