1 /*
2  * Copyright © 2014-2017 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "util/u_format.h"
25 #include "util/u_half.h"
26 #include "vc5_context.h"
27 #include "broadcom/common/v3d_macros.h"
28 #include "broadcom/cle/v3dx_pack.h"
29 #include "broadcom/compiler/v3d_compiler.h"
30 
31 static uint8_t
vc5_factor(enum pipe_blendfactor factor,bool dst_alpha_one)32 vc5_factor(enum pipe_blendfactor factor, bool dst_alpha_one)
33 {
34         /* We may get a bad blendfactor when blending is disabled. */
35         if (factor == 0)
36                 return V3D_BLEND_FACTOR_ZERO;
37 
38         switch (factor) {
39         case PIPE_BLENDFACTOR_ZERO:
40                 return V3D_BLEND_FACTOR_ZERO;
41         case PIPE_BLENDFACTOR_ONE:
42                 return V3D_BLEND_FACTOR_ONE;
43         case PIPE_BLENDFACTOR_SRC_COLOR:
44                 return V3D_BLEND_FACTOR_SRC_COLOR;
45         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
46                 return V3D_BLEND_FACTOR_INV_SRC_COLOR;
47         case PIPE_BLENDFACTOR_DST_COLOR:
48                 return V3D_BLEND_FACTOR_DST_COLOR;
49         case PIPE_BLENDFACTOR_INV_DST_COLOR:
50                 return V3D_BLEND_FACTOR_INV_DST_COLOR;
51         case PIPE_BLENDFACTOR_SRC_ALPHA:
52                 return V3D_BLEND_FACTOR_SRC_ALPHA;
53         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
54                 return V3D_BLEND_FACTOR_INV_SRC_ALPHA;
55         case PIPE_BLENDFACTOR_DST_ALPHA:
56                 return (dst_alpha_one ?
57                         V3D_BLEND_FACTOR_ONE :
58                         V3D_BLEND_FACTOR_DST_ALPHA);
59         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
60                 return (dst_alpha_one ?
61                         V3D_BLEND_FACTOR_ZERO :
62                         V3D_BLEND_FACTOR_INV_DST_ALPHA);
63         case PIPE_BLENDFACTOR_CONST_COLOR:
64                 return V3D_BLEND_FACTOR_CONST_COLOR;
65         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
66                 return V3D_BLEND_FACTOR_INV_CONST_COLOR;
67         case PIPE_BLENDFACTOR_CONST_ALPHA:
68                 return V3D_BLEND_FACTOR_CONST_ALPHA;
69         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
70                 return V3D_BLEND_FACTOR_INV_CONST_ALPHA;
71         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
72                 return V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE;
73         default:
74                 unreachable("Bad blend factor");
75         }
76 }
77 
78 static inline uint16_t
swizzled_border_color(const struct v3d_device_info * devinfo,struct pipe_sampler_state * sampler,struct vc5_sampler_view * sview,int chan)79 swizzled_border_color(const struct v3d_device_info *devinfo,
80                       struct pipe_sampler_state *sampler,
81                       struct vc5_sampler_view *sview,
82                       int chan)
83 {
84         const struct util_format_description *desc =
85                 util_format_description(sview->base.format);
86         uint8_t swiz = chan;
87 
88         /* If we're doing swizzling in the sampler, then only rearrange the
89          * border color for the mismatch between the VC5 texture format and
90          * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by
91          * the sampler's swizzle.
92          *
93          * For swizzling in the shader, we don't do any pre-swizzling of the
94          * border color.
95          */
96         if (vc5_get_tex_return_size(devinfo, sview->base.format,
97                                     sampler->compare_mode) != 32)
98                 swiz = desc->swizzle[swiz];
99 
100         switch (swiz) {
101         case PIPE_SWIZZLE_0:
102                 return util_float_to_half(0.0);
103         case PIPE_SWIZZLE_1:
104                 return util_float_to_half(1.0);
105         default:
106                 return util_float_to_half(sampler->border_color.f[swiz]);
107         }
108 }
109 
110 #if V3D_VERSION < 40
111 static uint32_t
translate_swizzle(unsigned char pipe_swizzle)112 translate_swizzle(unsigned char pipe_swizzle)
113 {
114         switch (pipe_swizzle) {
115         case PIPE_SWIZZLE_0:
116                 return 0;
117         case PIPE_SWIZZLE_1:
118                 return 1;
119         case PIPE_SWIZZLE_X:
120         case PIPE_SWIZZLE_Y:
121         case PIPE_SWIZZLE_Z:
122         case PIPE_SWIZZLE_W:
123                 return 2 + pipe_swizzle;
124         default:
125                 unreachable("unknown swizzle");
126         }
127 }
128 
129 static void
emit_one_texture(struct vc5_context * vc5,struct vc5_texture_stateobj * stage_tex,int i)130 emit_one_texture(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex,
131                  int i)
132 {
133         struct vc5_job *job = vc5->job;
134         struct pipe_sampler_state *psampler = stage_tex->samplers[i];
135         struct vc5_sampler_state *sampler = vc5_sampler_state(psampler);
136         struct pipe_sampler_view *psview = stage_tex->textures[i];
137         struct vc5_sampler_view *sview = vc5_sampler_view(psview);
138         struct pipe_resource *prsc = psview->texture;
139         struct vc5_resource *rsc = vc5_resource(prsc);
140         const struct v3d_device_info *devinfo = &vc5->screen->devinfo;
141 
142         stage_tex->texture_state[i].offset =
143                 vc5_cl_ensure_space(&job->indirect,
144                                     cl_packet_length(TEXTURE_SHADER_STATE),
145                                     32);
146         vc5_bo_set_reference(&stage_tex->texture_state[i].bo,
147                              job->indirect.bo);
148 
149         uint32_t return_size = vc5_get_tex_return_size(devinfo, psview->format,
150                                                        psampler->compare_mode);
151 
152         struct V3D33_TEXTURE_SHADER_STATE unpacked = {
153                 /* XXX */
154                 .border_color_red = swizzled_border_color(devinfo, psampler,
155                                                           sview, 0),
156                 .border_color_green = swizzled_border_color(devinfo, psampler,
157                                                             sview, 1),
158                 .border_color_blue = swizzled_border_color(devinfo, psampler,
159                                                            sview, 2),
160                 .border_color_alpha = swizzled_border_color(devinfo, psampler,
161                                                             sview, 3),
162 
163                 /* In the normal texturing path, the LOD gets clamped between
164                  * min/max, and the base_level field (set in the sampler view
165                  * from first_level) only decides where the min/mag switch
166                  * happens, so we need to use the LOD clamps to keep us
167                  * between min and max.
168                  *
169                  * For txf, the LOD clamp is still used, despite GL not
170                  * wanting that.  We will need to have a separate
171                  * TEXTURE_SHADER_STATE that ignores psview->min/max_lod to
172                  * support txf properly.
173                  */
174                 .min_level_of_detail = MIN2(psview->u.tex.first_level +
175                                             MAX2(psampler->min_lod, 0),
176                                             psview->u.tex.last_level),
177                 .max_level_of_detail = MIN2(psview->u.tex.first_level +
178                                             psampler->max_lod,
179                                             psview->u.tex.last_level),
180 
181                 .texture_base_pointer = cl_address(rsc->bo,
182                                                    rsc->slices[0].offset),
183 
184                 .output_32_bit = return_size == 32,
185         };
186 
187         /* Set up the sampler swizzle if we're doing 16-bit sampling.  For
188          * 32-bit, we leave swizzling up to the shader compiler.
189          *
190          * Note: Contrary to the docs, the swizzle still applies even if the
191          * return size is 32.  It's just that you probably want to swizzle in
192          * the shader, because you need the Y/Z/W channels to be defined.
193          */
194         if (return_size == 32) {
195                 unpacked.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X);
196                 unpacked.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y);
197                 unpacked.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z);
198                 unpacked.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W);
199         } else {
200                 unpacked.swizzle_r = translate_swizzle(sview->swizzle[0]);
201                 unpacked.swizzle_g = translate_swizzle(sview->swizzle[1]);
202                 unpacked.swizzle_b = translate_swizzle(sview->swizzle[2]);
203                 unpacked.swizzle_a = translate_swizzle(sview->swizzle[3]);
204         }
205 
206         int min_img_filter = psampler->min_img_filter;
207         int min_mip_filter = psampler->min_mip_filter;
208         int mag_img_filter = psampler->mag_img_filter;
209 
210         if (return_size == 32) {
211                 min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
212                 mag_img_filter = PIPE_TEX_FILTER_NEAREST;
213                 mag_img_filter = PIPE_TEX_FILTER_NEAREST;
214         }
215 
216         bool min_nearest = min_img_filter == PIPE_TEX_FILTER_NEAREST;
217         switch (min_mip_filter) {
218         case PIPE_TEX_MIPFILTER_NONE:
219                 unpacked.filter += min_nearest ? 2 : 0;
220                 break;
221         case PIPE_TEX_MIPFILTER_NEAREST:
222                 unpacked.filter += min_nearest ? 4 : 8;
223                 break;
224         case PIPE_TEX_MIPFILTER_LINEAR:
225                 unpacked.filter += min_nearest ? 4 : 8;
226                 unpacked.filter += 2;
227                 break;
228         }
229 
230         if (mag_img_filter == PIPE_TEX_FILTER_NEAREST)
231                 unpacked.filter++;
232 
233         if (psampler->max_anisotropy > 8)
234                 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_16_1;
235         else if (psampler->max_anisotropy > 4)
236                 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_8_1;
237         else if (psampler->max_anisotropy > 2)
238                 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_4_1;
239         else if (psampler->max_anisotropy)
240                 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_2_1;
241 
242         uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)];
243         cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked);
244 
245         for (int i = 0; i < ARRAY_SIZE(packed); i++)
246                 packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i];
247 
248         /* TMU indirect structs need to be 32b aligned. */
249         vc5_cl_ensure_space(&job->indirect, ARRAY_SIZE(packed), 32);
250         cl_emit_prepacked(&job->indirect, &packed);
251 }
252 
253 static void
emit_textures(struct vc5_context * vc5,struct vc5_texture_stateobj * stage_tex)254 emit_textures(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex)
255 {
256         for (int i = 0; i < stage_tex->num_textures; i++) {
257                 if (stage_tex->textures[i])
258                         emit_one_texture(vc5, stage_tex, i);
259         }
260 }
261 #endif /* V3D_VERSION < 40 */
262 
263 static uint32_t
translate_colormask(struct vc5_context * vc5,uint32_t colormask,int rt)264 translate_colormask(struct vc5_context *vc5, uint32_t colormask, int rt)
265 {
266         if (vc5->swap_color_rb & (1 << rt)) {
267                 colormask = ((colormask & (2 | 8)) |
268                              ((colormask & 1) << 2) |
269                              ((colormask & 4) >> 2));
270         }
271 
272         return (~colormask) & 0xf;
273 }
274 
275 static void
emit_rt_blend(struct vc5_context * vc5,struct vc5_job * job,struct pipe_blend_state * blend,int rt)276 emit_rt_blend(struct vc5_context *vc5, struct vc5_job *job,
277               struct pipe_blend_state *blend, int rt)
278 {
279         cl_emit(&job->bcl, BLEND_CONFIG, config) {
280                 struct pipe_rt_blend_state *rtblend = &blend->rt[rt];
281 
282 #if V3D_VERSION >= 40
283                 config.render_target_mask = 1 << rt;
284 #else
285                 assert(rt == 0);
286 #endif
287 
288                 config.colour_blend_mode = rtblend->rgb_func;
289                 config.colour_blend_dst_factor =
290                         vc5_factor(rtblend->rgb_dst_factor,
291                                    vc5->blend_dst_alpha_one);
292                 config.colour_blend_src_factor =
293                         vc5_factor(rtblend->rgb_src_factor,
294                                    vc5->blend_dst_alpha_one);
295 
296                 config.alpha_blend_mode = rtblend->alpha_func;
297                 config.alpha_blend_dst_factor =
298                         vc5_factor(rtblend->alpha_dst_factor,
299                                    vc5->blend_dst_alpha_one);
300                 config.alpha_blend_src_factor =
301                         vc5_factor(rtblend->alpha_src_factor,
302                                    vc5->blend_dst_alpha_one);
303         }
304 }
305 
306 void
v3dX(emit_state)307 v3dX(emit_state)(struct pipe_context *pctx)
308 {
309         struct vc5_context *vc5 = vc5_context(pctx);
310         struct vc5_job *job = vc5->job;
311 
312         if (vc5->dirty & (VC5_DIRTY_SCISSOR | VC5_DIRTY_VIEWPORT |
313                           VC5_DIRTY_RASTERIZER)) {
314                 float *vpscale = vc5->viewport.scale;
315                 float *vptranslate = vc5->viewport.translate;
316                 float vp_minx = -fabsf(vpscale[0]) + vptranslate[0];
317                 float vp_maxx = fabsf(vpscale[0]) + vptranslate[0];
318                 float vp_miny = -fabsf(vpscale[1]) + vptranslate[1];
319                 float vp_maxy = fabsf(vpscale[1]) + vptranslate[1];
320 
321                 /* Clip to the scissor if it's enabled, but still clip to the
322                  * drawable regardless since that controls where the binner
323                  * tries to put things.
324                  *
325                  * Additionally, always clip the rendering to the viewport,
326                  * since the hardware does guardband clipping, meaning
327                  * primitives would rasterize outside of the view volume.
328                  */
329                 uint32_t minx, miny, maxx, maxy;
330                 if (!vc5->rasterizer->base.scissor) {
331                         minx = MAX2(vp_minx, 0);
332                         miny = MAX2(vp_miny, 0);
333                         maxx = MIN2(vp_maxx, job->draw_width);
334                         maxy = MIN2(vp_maxy, job->draw_height);
335                 } else {
336                         minx = MAX2(vp_minx, vc5->scissor.minx);
337                         miny = MAX2(vp_miny, vc5->scissor.miny);
338                         maxx = MIN2(vp_maxx, vc5->scissor.maxx);
339                         maxy = MIN2(vp_maxy, vc5->scissor.maxy);
340                 }
341 
342                 cl_emit(&job->bcl, CLIP_WINDOW, clip) {
343                         clip.clip_window_left_pixel_coordinate = minx;
344                         clip.clip_window_bottom_pixel_coordinate = miny;
345                         clip.clip_window_width_in_pixels = maxx - minx;
346                         clip.clip_window_height_in_pixels = maxy - miny;
347                 }
348 
349                 job->draw_min_x = MIN2(job->draw_min_x, minx);
350                 job->draw_min_y = MIN2(job->draw_min_y, miny);
351                 job->draw_max_x = MAX2(job->draw_max_x, maxx);
352                 job->draw_max_y = MAX2(job->draw_max_y, maxy);
353         }
354 
355         if (vc5->dirty & (VC5_DIRTY_RASTERIZER |
356                           VC5_DIRTY_ZSA |
357                           VC5_DIRTY_BLEND |
358                           VC5_DIRTY_COMPILED_FS)) {
359                 cl_emit(&job->bcl, CONFIGURATION_BITS, config) {
360                         config.enable_forward_facing_primitive =
361                                 !vc5->rasterizer->base.rasterizer_discard &&
362                                 !(vc5->rasterizer->base.cull_face &
363                                   PIPE_FACE_FRONT);
364                         config.enable_reverse_facing_primitive =
365                                 !vc5->rasterizer->base.rasterizer_discard &&
366                                 !(vc5->rasterizer->base.cull_face &
367                                   PIPE_FACE_BACK);
368                         /* This seems backwards, but it's what gets the
369                          * clipflat test to pass.
370                          */
371                         config.clockwise_primitives =
372                                 vc5->rasterizer->base.front_ccw;
373 
374                         config.enable_depth_offset =
375                                 vc5->rasterizer->base.offset_tri;
376 
377                         config.rasterizer_oversample_mode =
378                                 vc5->rasterizer->base.multisample;
379 
380                         config.direct3d_provoking_vertex =
381                                 vc5->rasterizer->base.flatshade_first;
382 
383                         config.blend_enable = vc5->blend->rt[0].blend_enable;
384 
385                         config.early_z_updates_enable = true;
386                         if (vc5->zsa->base.depth.enabled) {
387                                 config.z_updates_enable =
388                                         vc5->zsa->base.depth.writemask;
389                                 config.early_z_enable =
390                                         (vc5->zsa->early_z_enable &&
391                                          !vc5->prog.fs->prog_data.fs->writes_z);
392                                 config.depth_test_function =
393                                         vc5->zsa->base.depth.func;
394                         } else {
395                                 config.depth_test_function = PIPE_FUNC_ALWAYS;
396                         }
397 
398                         config.stencil_enable =
399                                 vc5->zsa->base.stencil[0].enabled;
400                 }
401 
402         }
403 
404         if (vc5->dirty & VC5_DIRTY_RASTERIZER &&
405             vc5->rasterizer->base.offset_tri) {
406                 cl_emit(&job->bcl, DEPTH_OFFSET, depth) {
407                         depth.depth_offset_factor =
408                                 vc5->rasterizer->offset_factor;
409                         depth.depth_offset_units =
410                                 vc5->rasterizer->offset_units;
411                 }
412         }
413 
414         if (vc5->dirty & VC5_DIRTY_RASTERIZER) {
415                 cl_emit(&job->bcl, POINT_SIZE, point_size) {
416                         point_size.point_size = vc5->rasterizer->point_size;
417                 }
418 
419                 cl_emit(&job->bcl, LINE_WIDTH, line_width) {
420                         line_width.line_width = vc5->rasterizer->base.line_width;
421                 }
422         }
423 
424         if (vc5->dirty & VC5_DIRTY_VIEWPORT) {
425                 cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
426                         clip.viewport_half_width_in_1_256th_of_pixel =
427                                 vc5->viewport.scale[0] * 256.0f;
428                         clip.viewport_half_height_in_1_256th_of_pixel =
429                                 vc5->viewport.scale[1] * 256.0f;
430                 }
431 
432                 cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
433                         clip.viewport_z_offset_zc_to_zs =
434                                 vc5->viewport.translate[2];
435                         clip.viewport_z_scale_zc_to_zs =
436                                 vc5->viewport.scale[2];
437                 }
438                 cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
439                         clip.minimum_zw = (vc5->viewport.translate[2] -
440                                            vc5->viewport.scale[2]);
441                         clip.maximum_zw = (vc5->viewport.translate[2] +
442                                            vc5->viewport.scale[2]);
443                 }
444 
445                 cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
446                         vp.viewport_centre_x_coordinate =
447                                 vc5->viewport.translate[0];
448                         vp.viewport_centre_y_coordinate =
449                                 vc5->viewport.translate[1];
450                 }
451         }
452 
453         if (vc5->dirty & VC5_DIRTY_BLEND && vc5->blend->rt[0].blend_enable) {
454                 struct pipe_blend_state *blend = vc5->blend;
455 
456                 if (blend->independent_blend_enable) {
457                         for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++)
458                                 emit_rt_blend(vc5, job, blend, i);
459                 } else {
460                         emit_rt_blend(vc5, job, blend, 0);
461                 }
462         }
463 
464         if (vc5->dirty & VC5_DIRTY_BLEND) {
465                 struct pipe_blend_state *blend = vc5->blend;
466 
467                 cl_emit(&job->bcl, COLOUR_WRITE_MASKS, mask) {
468                         if (blend->independent_blend_enable) {
469                                 mask.render_target_0_per_colour_component_write_masks =
470                                         translate_colormask(vc5, blend->rt[0].colormask, 0);
471                                 mask.render_target_1_per_colour_component_write_masks =
472                                         translate_colormask(vc5, blend->rt[1].colormask, 1);
473                                 mask.render_target_2_per_colour_component_write_masks =
474                                         translate_colormask(vc5, blend->rt[2].colormask, 2);
475                                 mask.render_target_3_per_colour_component_write_masks =
476                                         translate_colormask(vc5, blend->rt[3].colormask, 3);
477                         } else {
478                                 mask.render_target_0_per_colour_component_write_masks =
479                                         translate_colormask(vc5, blend->rt[0].colormask, 0);
480                                 mask.render_target_1_per_colour_component_write_masks =
481                                         translate_colormask(vc5, blend->rt[0].colormask, 1);
482                                 mask.render_target_2_per_colour_component_write_masks =
483                                         translate_colormask(vc5, blend->rt[0].colormask, 2);
484                                 mask.render_target_3_per_colour_component_write_masks =
485                                         translate_colormask(vc5, blend->rt[0].colormask, 3);
486                         }
487                 }
488         }
489 
490         if (vc5->dirty & VC5_DIRTY_BLEND_COLOR) {
491                 cl_emit(&job->bcl, BLEND_CONSTANT_COLOUR, colour) {
492                         colour.red_f16 = (vc5->swap_color_rb ?
493                                           vc5->blend_color.hf[2] :
494                                           vc5->blend_color.hf[0]);
495                         colour.green_f16 = vc5->blend_color.hf[1];
496                         colour.blue_f16 = (vc5->swap_color_rb ?
497                                            vc5->blend_color.hf[0] :
498                                            vc5->blend_color.hf[2]);
499                         colour.alpha_f16 = vc5->blend_color.hf[3];
500                 }
501         }
502 
503         if (vc5->dirty & (VC5_DIRTY_ZSA | VC5_DIRTY_STENCIL_REF)) {
504                 struct pipe_stencil_state *front = &vc5->zsa->base.stencil[0];
505                 struct pipe_stencil_state *back = &vc5->zsa->base.stencil[1];
506 
507                 if (front->enabled) {
508                         cl_emit_with_prepacked(&job->bcl, STENCIL_CONFIG,
509                                                vc5->zsa->stencil_front, config) {
510                                 config.stencil_ref_value =
511                                         vc5->stencil_ref.ref_value[0];
512                         }
513                 }
514 
515                 if (back->enabled) {
516                         cl_emit_with_prepacked(&job->bcl, STENCIL_CONFIG,
517                                                vc5->zsa->stencil_back, config) {
518                                 config.stencil_ref_value =
519                                         vc5->stencil_ref.ref_value[1];
520                         }
521                 }
522         }
523 
524 #if V3D_VERSION < 40
525         /* Pre-4.x, we have texture state that depends on both the sampler and
526          * the view, so we merge them together at draw time.
527          */
528         if (vc5->dirty & VC5_DIRTY_FRAGTEX)
529                 emit_textures(vc5, &vc5->fragtex);
530 
531         if (vc5->dirty & VC5_DIRTY_VERTTEX)
532                 emit_textures(vc5, &vc5->verttex);
533 #endif
534 
535         if (vc5->dirty & VC5_DIRTY_FLAT_SHADE_FLAGS) {
536                 bool emitted_any = false;
537 
538                 for (int i = 0; i < ARRAY_SIZE(vc5->prog.fs->prog_data.fs->flat_shade_flags); i++) {
539                         if (!vc5->prog.fs->prog_data.fs->flat_shade_flags[i])
540                                 continue;
541 
542                         cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
543                                 flags.varying_offset_v0 = i;
544 
545                                 if (emitted_any) {
546                                         flags.action_for_flat_shade_flags_of_lower_numbered_varyings =
547                                                 V3D_VARYING_FLAGS_ACTION_UNCHANGED;
548                                         flags.action_for_flat_shade_flags_of_higher_numbered_varyings =
549                                                 V3D_VARYING_FLAGS_ACTION_UNCHANGED;
550                                 } else {
551                                         flags.action_for_flat_shade_flags_of_lower_numbered_varyings =
552                                                 ((i == 0) ?
553                                                  V3D_VARYING_FLAGS_ACTION_UNCHANGED :
554                                                  V3D_VARYING_FLAGS_ACTION_ZEROED);
555 
556                                         flags.action_for_flat_shade_flags_of_higher_numbered_varyings =
557                                                 V3D_VARYING_FLAGS_ACTION_ZEROED;
558                                 }
559 
560                                 flags.flat_shade_flags_for_varyings_v024 =
561                                         vc5->prog.fs->prog_data.fs->flat_shade_flags[i];
562                         }
563 
564                         emitted_any = true;
565                 }
566 
567                 if (!emitted_any) {
568                         cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags);
569                 }
570         }
571 
572         if (vc5->dirty & VC5_DIRTY_STREAMOUT) {
573                 struct vc5_streamout_stateobj *so = &vc5->streamout;
574 
575                 if (so->num_targets) {
576 #if V3D_VERSION >= 40
577                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
578                                 tfe.number_of_16_bit_output_data_specs_following =
579                                         vc5->prog.bind_vs->num_tf_specs;
580                                 tfe.enable =
581                                         (vc5->prog.bind_vs->num_tf_specs != 0 &&
582                                          vc5->active_queries);
583                         };
584 #else /* V3D_VERSION < 40 */
585                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) {
586                                 tfe.number_of_32_bit_output_buffer_address_following =
587                                         so->num_targets;
588                                 tfe.number_of_16_bit_output_data_specs_following =
589                                         vc5->prog.bind_vs->num_tf_specs;
590                         };
591 #endif /* V3D_VERSION < 40 */
592                         for (int i = 0; i < vc5->prog.bind_vs->num_tf_specs; i++) {
593                                 cl_emit_prepacked(&job->bcl,
594                                                   &vc5->prog.bind_vs->tf_specs[i]);
595                         }
596 
597                         for (int i = 0; i < so->num_targets; i++) {
598                                 const struct pipe_stream_output_target *target =
599                                         so->targets[i];
600                                 struct vc5_resource *rsc =
601                                         vc5_resource(target->buffer);
602 
603 #if V3D_VERSION >= 40
604                                 cl_emit(&job->bcl, TRANSFORM_FEEDBACK_BUFFER, output) {
605                                         output.buffer_address =
606                                                 cl_address(rsc->bo,
607                                                            target->buffer_offset);
608                                         output.buffer_size_in_32_bit_words =
609                                                 target->buffer_size >> 2;
610                                         output.buffer_number = i;
611                                 }
612 #else /* V3D_VERSION < 40 */
613                                 cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) {
614                                         output.address =
615                                                 cl_address(rsc->bo,
616                                                            target->buffer_offset);
617                                 };
618 #endif /* V3D_VERSION < 40 */
619                                 vc5_job_add_write_resource(vc5->job,
620                                                            target->buffer);
621                                 /* XXX: buffer_size? */
622                         }
623                 } else {
624                         /* XXX? */
625                 }
626         }
627 
628         if (vc5->dirty & VC5_DIRTY_OQ) {
629                 cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) {
630                         job->oq_enabled = vc5->active_queries && vc5->current_oq;
631                         if (job->oq_enabled) {
632                                 counter.address = cl_address(vc5->current_oq, 0);
633                         }
634                 }
635         }
636 }
637