1 /*
2  * Copyright © 2014-2017 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "util/format/u_format.h"
25 #include "util/half_float.h"
26 #include "v3d_context.h"
27 #include "broadcom/common/v3d_macros.h"
28 #include "broadcom/cle/v3dx_pack.h"
29 #include "broadcom/compiler/v3d_compiler.h"
30 
31 static uint8_t
v3d_factor(enum pipe_blendfactor factor,bool dst_alpha_one)32 v3d_factor(enum pipe_blendfactor factor, bool dst_alpha_one)
33 {
34         /* We may get a bad blendfactor when blending is disabled. */
35         if (factor == 0)
36                 return V3D_BLEND_FACTOR_ZERO;
37 
38         switch (factor) {
39         case PIPE_BLENDFACTOR_ZERO:
40                 return V3D_BLEND_FACTOR_ZERO;
41         case PIPE_BLENDFACTOR_ONE:
42                 return V3D_BLEND_FACTOR_ONE;
43         case PIPE_BLENDFACTOR_SRC_COLOR:
44                 return V3D_BLEND_FACTOR_SRC_COLOR;
45         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
46                 return V3D_BLEND_FACTOR_INV_SRC_COLOR;
47         case PIPE_BLENDFACTOR_DST_COLOR:
48                 return V3D_BLEND_FACTOR_DST_COLOR;
49         case PIPE_BLENDFACTOR_INV_DST_COLOR:
50                 return V3D_BLEND_FACTOR_INV_DST_COLOR;
51         case PIPE_BLENDFACTOR_SRC_ALPHA:
52                 return V3D_BLEND_FACTOR_SRC_ALPHA;
53         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
54                 return V3D_BLEND_FACTOR_INV_SRC_ALPHA;
55         case PIPE_BLENDFACTOR_DST_ALPHA:
56                 return (dst_alpha_one ?
57                         V3D_BLEND_FACTOR_ONE :
58                         V3D_BLEND_FACTOR_DST_ALPHA);
59         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
60                 return (dst_alpha_one ?
61                         V3D_BLEND_FACTOR_ZERO :
62                         V3D_BLEND_FACTOR_INV_DST_ALPHA);
63         case PIPE_BLENDFACTOR_CONST_COLOR:
64                 return V3D_BLEND_FACTOR_CONST_COLOR;
65         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
66                 return V3D_BLEND_FACTOR_INV_CONST_COLOR;
67         case PIPE_BLENDFACTOR_CONST_ALPHA:
68                 return V3D_BLEND_FACTOR_CONST_ALPHA;
69         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
70                 return V3D_BLEND_FACTOR_INV_CONST_ALPHA;
71         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
72                 return (dst_alpha_one ?
73                         V3D_BLEND_FACTOR_ZERO :
74                         V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE);
75         default:
76                 unreachable("Bad blend factor");
77         }
78 }
79 
80 static inline uint16_t
swizzled_border_color(const struct v3d_device_info * devinfo,struct pipe_sampler_state * sampler,struct v3d_sampler_view * sview,int chan)81 swizzled_border_color(const struct v3d_device_info *devinfo,
82                       struct pipe_sampler_state *sampler,
83                       struct v3d_sampler_view *sview,
84                       int chan)
85 {
86         const struct util_format_description *desc =
87                 util_format_description(sview->base.format);
88         uint8_t swiz = chan;
89 
90         /* If we're doing swizzling in the sampler, then only rearrange the
91          * border color for the mismatch between the VC5 texture format and
92          * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by
93          * the sampler's swizzle.
94          *
95          * For swizzling in the shader, we don't do any pre-swizzling of the
96          * border color.
97          */
98         if (v3d_get_tex_return_size(devinfo, sview->base.format,
99                                     sampler->compare_mode) != 32)
100                 swiz = desc->swizzle[swiz];
101 
102         switch (swiz) {
103         case PIPE_SWIZZLE_0:
104                 return _mesa_float_to_half(0.0);
105         case PIPE_SWIZZLE_1:
106                 return _mesa_float_to_half(1.0);
107         default:
108                 return _mesa_float_to_half(sampler->border_color.f[swiz]);
109         }
110 }
111 
112 #if V3D_VERSION < 40
113 static uint32_t
translate_swizzle(unsigned char pipe_swizzle)114 translate_swizzle(unsigned char pipe_swizzle)
115 {
116         switch (pipe_swizzle) {
117         case PIPE_SWIZZLE_0:
118                 return 0;
119         case PIPE_SWIZZLE_1:
120                 return 1;
121         case PIPE_SWIZZLE_X:
122         case PIPE_SWIZZLE_Y:
123         case PIPE_SWIZZLE_Z:
124         case PIPE_SWIZZLE_W:
125                 return 2 + pipe_swizzle;
126         default:
127                 unreachable("unknown swizzle");
128         }
129 }
130 
131 static void
emit_one_texture(struct v3d_context * v3d,struct v3d_texture_stateobj * stage_tex,int i)132 emit_one_texture(struct v3d_context *v3d, struct v3d_texture_stateobj *stage_tex,
133                  int i)
134 {
135         struct v3d_job *job = v3d->job;
136         struct pipe_sampler_state *psampler = stage_tex->samplers[i];
137         struct v3d_sampler_state *sampler = v3d_sampler_state(psampler);
138         struct pipe_sampler_view *psview = stage_tex->textures[i];
139         struct v3d_sampler_view *sview = v3d_sampler_view(psview);
140         struct pipe_resource *prsc = psview->texture;
141         struct v3d_resource *rsc = v3d_resource(prsc);
142         const struct v3d_device_info *devinfo = &v3d->screen->devinfo;
143 
144         stage_tex->texture_state[i].offset =
145                 v3d_cl_ensure_space(&job->indirect,
146                                     cl_packet_length(TEXTURE_SHADER_STATE),
147                                     32);
148         v3d_bo_set_reference(&stage_tex->texture_state[i].bo,
149                              job->indirect.bo);
150 
151         uint32_t return_size = v3d_get_tex_return_size(devinfo, psview->format,
152                                                        psampler->compare_mode);
153 
154         struct V3D33_TEXTURE_SHADER_STATE unpacked = {
155                 /* XXX */
156                 .border_color_red = swizzled_border_color(devinfo, psampler,
157                                                           sview, 0),
158                 .border_color_green = swizzled_border_color(devinfo, psampler,
159                                                             sview, 1),
160                 .border_color_blue = swizzled_border_color(devinfo, psampler,
161                                                            sview, 2),
162                 .border_color_alpha = swizzled_border_color(devinfo, psampler,
163                                                             sview, 3),
164 
165                 /* In the normal texturing path, the LOD gets clamped between
166                  * min/max, and the base_level field (set in the sampler view
167                  * from first_level) only decides where the min/mag switch
168                  * happens, so we need to use the LOD clamps to keep us
169                  * between min and max.
170                  *
171                  * For txf, the LOD clamp is still used, despite GL not
172                  * wanting that.  We will need to have a separate
173                  * TEXTURE_SHADER_STATE that ignores psview->min/max_lod to
174                  * support txf properly.
175                  */
176                 .min_level_of_detail = MIN2(psview->u.tex.first_level +
177                                             MAX2(psampler->min_lod, 0),
178                                             psview->u.tex.last_level),
179                 .max_level_of_detail = MIN2(psview->u.tex.first_level +
180                                             psampler->max_lod,
181                                             psview->u.tex.last_level),
182 
183                 .texture_base_pointer = cl_address(rsc->bo,
184                                                    rsc->slices[0].offset),
185 
186                 .output_32_bit = return_size == 32,
187         };
188 
189         /* Set up the sampler swizzle if we're doing 16-bit sampling.  For
190          * 32-bit, we leave swizzling up to the shader compiler.
191          *
192          * Note: Contrary to the docs, the swizzle still applies even if the
193          * return size is 32.  It's just that you probably want to swizzle in
194          * the shader, because you need the Y/Z/W channels to be defined.
195          */
196         if (return_size == 32) {
197                 unpacked.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X);
198                 unpacked.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y);
199                 unpacked.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z);
200                 unpacked.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W);
201         } else {
202                 unpacked.swizzle_r = translate_swizzle(sview->swizzle[0]);
203                 unpacked.swizzle_g = translate_swizzle(sview->swizzle[1]);
204                 unpacked.swizzle_b = translate_swizzle(sview->swizzle[2]);
205                 unpacked.swizzle_a = translate_swizzle(sview->swizzle[3]);
206         }
207 
208         int min_img_filter = psampler->min_img_filter;
209         int min_mip_filter = psampler->min_mip_filter;
210         int mag_img_filter = psampler->mag_img_filter;
211 
212         if (return_size == 32) {
213                 min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
214                 min_img_filter = PIPE_TEX_FILTER_NEAREST;
215                 mag_img_filter = PIPE_TEX_FILTER_NEAREST;
216         }
217 
218         bool min_nearest = min_img_filter == PIPE_TEX_FILTER_NEAREST;
219         switch (min_mip_filter) {
220         case PIPE_TEX_MIPFILTER_NONE:
221                 unpacked.filter += min_nearest ? 2 : 0;
222                 break;
223         case PIPE_TEX_MIPFILTER_NEAREST:
224                 unpacked.filter += min_nearest ? 4 : 8;
225                 break;
226         case PIPE_TEX_MIPFILTER_LINEAR:
227                 unpacked.filter += min_nearest ? 4 : 8;
228                 unpacked.filter += 2;
229                 break;
230         }
231 
232         if (mag_img_filter == PIPE_TEX_FILTER_NEAREST)
233                 unpacked.filter++;
234 
235         if (psampler->max_anisotropy > 8)
236                 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_16_1;
237         else if (psampler->max_anisotropy > 4)
238                 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_8_1;
239         else if (psampler->max_anisotropy > 2)
240                 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_4_1;
241         else if (psampler->max_anisotropy)
242                 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_2_1;
243 
244         uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)];
245         cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked);
246 
247         for (int i = 0; i < ARRAY_SIZE(packed); i++)
248                 packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i];
249 
250         /* TMU indirect structs need to be 32b aligned. */
251         v3d_cl_ensure_space(&job->indirect, ARRAY_SIZE(packed), 32);
252         cl_emit_prepacked(&job->indirect, &packed);
253 }
254 
255 static void
emit_textures(struct v3d_context * v3d,struct v3d_texture_stateobj * stage_tex)256 emit_textures(struct v3d_context *v3d, struct v3d_texture_stateobj *stage_tex)
257 {
258         for (int i = 0; i < stage_tex->num_textures; i++) {
259                 if (stage_tex->textures[i])
260                         emit_one_texture(v3d, stage_tex, i);
261         }
262 }
263 #endif /* V3D_VERSION < 40 */
264 
265 static uint32_t
translate_colormask(struct v3d_context * v3d,uint32_t colormask,int rt)266 translate_colormask(struct v3d_context *v3d, uint32_t colormask, int rt)
267 {
268         if (v3d->swap_color_rb & (1 << rt)) {
269                 colormask = ((colormask & (2 | 8)) |
270                              ((colormask & 1) << 2) |
271                              ((colormask & 4) >> 2));
272         }
273 
274         return (~colormask) & 0xf;
275 }
276 
277 static void
emit_rt_blend(struct v3d_context * v3d,struct v3d_job * job,struct pipe_blend_state * blend,int rt)278 emit_rt_blend(struct v3d_context *v3d, struct v3d_job *job,
279               struct pipe_blend_state *blend, int rt)
280 {
281         struct pipe_rt_blend_state *rtblend = &blend->rt[rt];
282 
283 #if V3D_VERSION >= 40
284         /* We don't need to emit blend state for disabled RTs. */
285         if (!rtblend->blend_enable)
286                 return;
287 #endif
288 
289         cl_emit(&job->bcl, BLEND_CFG, config) {
290 #if V3D_VERSION >= 40
291                 if (blend->independent_blend_enable)
292                         config.render_target_mask = 1 << rt;
293                 else
294                         config.render_target_mask = (1 << V3D_MAX_DRAW_BUFFERS) - 1;
295 #else
296                 assert(rt == 0);
297 #endif
298 
299                 config.color_blend_mode = rtblend->rgb_func;
300                 config.color_blend_dst_factor =
301                         v3d_factor(rtblend->rgb_dst_factor,
302                                    v3d->blend_dst_alpha_one);
303                 config.color_blend_src_factor =
304                         v3d_factor(rtblend->rgb_src_factor,
305                                    v3d->blend_dst_alpha_one);
306 
307                 config.alpha_blend_mode = rtblend->alpha_func;
308                 config.alpha_blend_dst_factor =
309                         v3d_factor(rtblend->alpha_dst_factor,
310                                    v3d->blend_dst_alpha_one);
311                 config.alpha_blend_src_factor =
312                         v3d_factor(rtblend->alpha_src_factor,
313                                    v3d->blend_dst_alpha_one);
314         }
315 }
316 
317 static void
emit_flat_shade_flags(struct v3d_job * job,int varying_offset,uint32_t varyings,enum V3DX (Varying_Flags_Action)lower,enum V3DX (Varying_Flags_Action)higher)318 emit_flat_shade_flags(struct v3d_job *job,
319                       int varying_offset,
320                       uint32_t varyings,
321                       enum V3DX(Varying_Flags_Action) lower,
322                       enum V3DX(Varying_Flags_Action) higher)
323 {
324         cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
325                 flags.varying_offset_v0 = varying_offset;
326                 flags.flat_shade_flags_for_varyings_v024 = varyings;
327                 flags.action_for_flat_shade_flags_of_lower_numbered_varyings =
328                         lower;
329                 flags.action_for_flat_shade_flags_of_higher_numbered_varyings =
330                         higher;
331         }
332 }
333 
334 #if V3D_VERSION >= 40
335 static void
emit_noperspective_flags(struct v3d_job * job,int varying_offset,uint32_t varyings,enum V3DX (Varying_Flags_Action)lower,enum V3DX (Varying_Flags_Action)higher)336 emit_noperspective_flags(struct v3d_job *job,
337                          int varying_offset,
338                          uint32_t varyings,
339                          enum V3DX(Varying_Flags_Action) lower,
340                          enum V3DX(Varying_Flags_Action) higher)
341 {
342         cl_emit(&job->bcl, NON_PERSPECTIVE_FLAGS, flags) {
343                 flags.varying_offset_v0 = varying_offset;
344                 flags.non_perspective_flags_for_varyings_v024 = varyings;
345                 flags.action_for_non_perspective_flags_of_lower_numbered_varyings =
346                         lower;
347                 flags.action_for_non_perspective_flags_of_higher_numbered_varyings =
348                         higher;
349         }
350 }
351 
352 static void
emit_centroid_flags(struct v3d_job * job,int varying_offset,uint32_t varyings,enum V3DX (Varying_Flags_Action)lower,enum V3DX (Varying_Flags_Action)higher)353 emit_centroid_flags(struct v3d_job *job,
354                     int varying_offset,
355                     uint32_t varyings,
356                     enum V3DX(Varying_Flags_Action) lower,
357                     enum V3DX(Varying_Flags_Action) higher)
358 {
359         cl_emit(&job->bcl, CENTROID_FLAGS, flags) {
360                 flags.varying_offset_v0 = varying_offset;
361                 flags.centroid_flags_for_varyings_v024 = varyings;
362                 flags.action_for_centroid_flags_of_lower_numbered_varyings =
363                         lower;
364                 flags.action_for_centroid_flags_of_higher_numbered_varyings =
365                         higher;
366         }
367 }
368 #endif /* V3D_VERSION >= 40 */
369 
370 static bool
emit_varying_flags(struct v3d_job * job,uint32_t * flags,void (* flag_emit_callback)(struct v3d_job * job,int varying_offset,uint32_t flags,enum V3DX (Varying_Flags_Action)lower,enum V3DX (Varying_Flags_Action)higher))371 emit_varying_flags(struct v3d_job *job, uint32_t *flags,
372                    void (*flag_emit_callback)(struct v3d_job *job,
373                                               int varying_offset,
374                                               uint32_t flags,
375                                               enum V3DX(Varying_Flags_Action) lower,
376                                               enum V3DX(Varying_Flags_Action) higher))
377 {
378         struct v3d_context *v3d = job->v3d;
379         bool emitted_any = false;
380 
381         for (int i = 0; i < ARRAY_SIZE(v3d->prog.fs->prog_data.fs->flat_shade_flags); i++) {
382                 if (!flags[i])
383                         continue;
384 
385                 if (emitted_any) {
386                         flag_emit_callback(job, i, flags[i],
387                                            V3D_VARYING_FLAGS_ACTION_UNCHANGED,
388                                            V3D_VARYING_FLAGS_ACTION_UNCHANGED);
389                 } else if (i == 0) {
390                         flag_emit_callback(job, i, flags[i],
391                                            V3D_VARYING_FLAGS_ACTION_UNCHANGED,
392                                            V3D_VARYING_FLAGS_ACTION_ZEROED);
393                 } else {
394                         flag_emit_callback(job, i, flags[i],
395                                            V3D_VARYING_FLAGS_ACTION_ZEROED,
396                                            V3D_VARYING_FLAGS_ACTION_ZEROED);
397                 }
398                 emitted_any = true;
399         }
400 
401         return emitted_any;
402 }
403 
404 static inline struct v3d_uncompiled_shader *
get_tf_shader(struct v3d_context * v3d)405 get_tf_shader(struct v3d_context *v3d)
406 {
407         if (v3d->prog.bind_gs)
408                 return v3d->prog.bind_gs;
409         else
410                 return v3d->prog.bind_vs;
411 }
412 
413 void
v3dX(emit_state)414 v3dX(emit_state)(struct pipe_context *pctx)
415 {
416         struct v3d_context *v3d = v3d_context(pctx);
417         struct v3d_job *job = v3d->job;
418         bool rasterizer_discard = v3d->rasterizer->base.rasterizer_discard;
419 
420         if (v3d->dirty & (VC5_DIRTY_SCISSOR | VC5_DIRTY_VIEWPORT |
421                           VC5_DIRTY_RASTERIZER)) {
422                 float *vpscale = v3d->viewport.scale;
423                 float *vptranslate = v3d->viewport.translate;
424                 float vp_minx = -fabsf(vpscale[0]) + vptranslate[0];
425                 float vp_maxx = fabsf(vpscale[0]) + vptranslate[0];
426                 float vp_miny = -fabsf(vpscale[1]) + vptranslate[1];
427                 float vp_maxy = fabsf(vpscale[1]) + vptranslate[1];
428 
429                 /* Clip to the scissor if it's enabled, but still clip to the
430                  * drawable regardless since that controls where the binner
431                  * tries to put things.
432                  *
433                  * Additionally, always clip the rendering to the viewport,
434                  * since the hardware does guardband clipping, meaning
435                  * primitives would rasterize outside of the view volume.
436                  */
437                 uint32_t minx, miny, maxx, maxy;
438                 if (!v3d->rasterizer->base.scissor) {
439                         minx = MAX2(vp_minx, 0);
440                         miny = MAX2(vp_miny, 0);
441                         maxx = MIN2(vp_maxx, job->draw_width);
442                         maxy = MIN2(vp_maxy, job->draw_height);
443                 } else {
444                         minx = MAX2(vp_minx, v3d->scissor.minx);
445                         miny = MAX2(vp_miny, v3d->scissor.miny);
446                         maxx = MIN2(vp_maxx, v3d->scissor.maxx);
447                         maxy = MIN2(vp_maxy, v3d->scissor.maxy);
448                 }
449 
450                 cl_emit(&job->bcl, CLIP_WINDOW, clip) {
451                         clip.clip_window_left_pixel_coordinate = minx;
452                         clip.clip_window_bottom_pixel_coordinate = miny;
453                         if (maxx > minx && maxy > miny) {
454                                 clip.clip_window_width_in_pixels = maxx - minx;
455                                 clip.clip_window_height_in_pixels = maxy - miny;
456                         } else if (V3D_VERSION < 41) {
457                                 /* The HW won't entirely clip out when scissor
458                                  * w/h is 0.  Just treat it the same as
459                                  * rasterizer discard.
460                                  */
461                                 rasterizer_discard = true;
462                                 clip.clip_window_width_in_pixels = 1;
463                                 clip.clip_window_height_in_pixels = 1;
464                         }
465                 }
466 
467                 job->draw_min_x = MIN2(job->draw_min_x, minx);
468                 job->draw_min_y = MIN2(job->draw_min_y, miny);
469                 job->draw_max_x = MAX2(job->draw_max_x, maxx);
470                 job->draw_max_y = MAX2(job->draw_max_y, maxy);
471         }
472 
473         if (v3d->dirty & (VC5_DIRTY_RASTERIZER |
474                           VC5_DIRTY_ZSA |
475                           VC5_DIRTY_BLEND |
476                           VC5_DIRTY_COMPILED_FS)) {
477                 cl_emit(&job->bcl, CFG_BITS, config) {
478                         config.enable_forward_facing_primitive =
479                                 !rasterizer_discard &&
480                                 !(v3d->rasterizer->base.cull_face &
481                                   PIPE_FACE_FRONT);
482                         config.enable_reverse_facing_primitive =
483                                 !rasterizer_discard &&
484                                 !(v3d->rasterizer->base.cull_face &
485                                   PIPE_FACE_BACK);
486                         /* This seems backwards, but it's what gets the
487                          * clipflat test to pass.
488                          */
489                         config.clockwise_primitives =
490                                 v3d->rasterizer->base.front_ccw;
491 
492                         config.enable_depth_offset =
493                                 v3d->rasterizer->base.offset_tri;
494 
495                         /* V3D follows GL behavior where the sample mask only
496                          * applies when MSAA is enabled.  Gallium has sample
497                          * mask apply anyway, and the MSAA blit shaders will
498                          * set sample mask without explicitly setting
499                          * rasterizer oversample.  Just force it on here,
500                          * since the blit shaders are the only way to have
501                          * !multisample && samplemask != 0xf.
502                          */
503                         config.rasterizer_oversample_mode =
504                                 v3d->rasterizer->base.multisample ||
505                                 v3d->sample_mask != 0xf;
506 
507                         config.direct3d_provoking_vertex =
508                                 v3d->rasterizer->base.flatshade_first;
509 
510                         config.blend_enable = v3d->blend->blend_enables;
511 
512                         /* Note: EZ state may update based on the compiled FS,
513                          * along with ZSA
514                          */
515                         config.early_z_updates_enable =
516                                 (job->ez_state != VC5_EZ_DISABLED);
517                         if (v3d->zsa->base.depth.enabled) {
518                                 config.z_updates_enable =
519                                         v3d->zsa->base.depth.writemask;
520                                 config.early_z_enable =
521                                         config.early_z_updates_enable;
522                                 config.depth_test_function =
523                                         v3d->zsa->base.depth.func;
524                         } else {
525                                 config.depth_test_function = PIPE_FUNC_ALWAYS;
526                         }
527 
528                         config.stencil_enable =
529                                 v3d->zsa->base.stencil[0].enabled;
530 
531                         /* Use nicer line caps when line smoothing is
532                          * enabled
533                          */
534                         config.line_rasterization =
535                                 v3d_line_smoothing_enabled(v3d) ? 1 : 0;
536                 }
537 
538         }
539 
540         if (v3d->dirty & VC5_DIRTY_RASTERIZER &&
541             v3d->rasterizer->base.offset_tri) {
542                 if (job->zsbuf &&
543                     job->zsbuf->format == PIPE_FORMAT_Z16_UNORM) {
544                         cl_emit_prepacked_sized(&job->bcl,
545                                                 v3d->rasterizer->depth_offset_z16,
546                                                 cl_packet_length(DEPTH_OFFSET));
547                 } else {
548                         cl_emit_prepacked_sized(&job->bcl,
549                                                 v3d->rasterizer->depth_offset,
550                                                 cl_packet_length(DEPTH_OFFSET));
551                 }
552         }
553 
554         if (v3d->dirty & VC5_DIRTY_RASTERIZER) {
555                 cl_emit(&job->bcl, POINT_SIZE, point_size) {
556                         point_size.point_size = v3d->rasterizer->point_size;
557                 }
558 
559                 cl_emit(&job->bcl, LINE_WIDTH, line_width) {
560                         line_width.line_width = v3d_get_real_line_width(v3d);
561                 }
562         }
563 
564         if (v3d->dirty & VC5_DIRTY_VIEWPORT) {
565                 cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
566                         clip.viewport_half_width_in_1_256th_of_pixel =
567                                 v3d->viewport.scale[0] * 256.0f;
568                         clip.viewport_half_height_in_1_256th_of_pixel =
569                                 v3d->viewport.scale[1] * 256.0f;
570                 }
571 
572                 cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
573                         clip.viewport_z_offset_zc_to_zs =
574                                 v3d->viewport.translate[2];
575                         clip.viewport_z_scale_zc_to_zs =
576                                 v3d->viewport.scale[2];
577                 }
578                 cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
579                         float z1 = (v3d->viewport.translate[2] -
580                                     v3d->viewport.scale[2]);
581                         float z2 = (v3d->viewport.translate[2] +
582                                     v3d->viewport.scale[2]);
583                         clip.minimum_zw = MIN2(z1, z2);
584                         clip.maximum_zw = MAX2(z1, z2);
585                 }
586 
587                 cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
588                         vp.viewport_centre_x_coordinate =
589                                 v3d->viewport.translate[0];
590                         vp.viewport_centre_y_coordinate =
591                                 v3d->viewport.translate[1];
592                 }
593         }
594 
595         if (v3d->dirty & VC5_DIRTY_BLEND) {
596                 struct v3d_blend_state *blend = v3d->blend;
597 
598                 if (blend->blend_enables) {
599 #if V3D_VERSION >= 40
600                         cl_emit(&job->bcl, BLEND_ENABLES, enables) {
601                                 enables.mask = blend->blend_enables;
602                         }
603 #endif
604 
605                         if (blend->base.independent_blend_enable) {
606                                 for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++)
607                                         emit_rt_blend(v3d, job, &blend->base, i);
608                         } else {
609                                 emit_rt_blend(v3d, job, &blend->base, 0);
610                         }
611                 }
612         }
613 
614         if (v3d->dirty & VC5_DIRTY_BLEND) {
615                 struct pipe_blend_state *blend = &v3d->blend->base;
616 
617                 cl_emit(&job->bcl, COLOR_WRITE_MASKS, mask) {
618                         for (int i = 0; i < 4; i++) {
619                                 int rt = blend->independent_blend_enable ? i : 0;
620                                 int rt_mask = blend->rt[rt].colormask;
621 
622                                 mask.mask |= translate_colormask(v3d, rt_mask,
623                                                                  i) << (4 * i);
624                         }
625                 }
626         }
627 
628         /* GFXH-1431: On V3D 3.x, writing BLEND_CONFIG resets the constant
629          * color.
630          */
631         if (v3d->dirty & VC5_DIRTY_BLEND_COLOR ||
632             (V3D_VERSION < 41 && (v3d->dirty & VC5_DIRTY_BLEND))) {
633                 cl_emit(&job->bcl, BLEND_CONSTANT_COLOR, color) {
634                         color.red_f16 = (v3d->swap_color_rb ?
635                                           v3d->blend_color.hf[2] :
636                                           v3d->blend_color.hf[0]);
637                         color.green_f16 = v3d->blend_color.hf[1];
638                         color.blue_f16 = (v3d->swap_color_rb ?
639                                            v3d->blend_color.hf[0] :
640                                            v3d->blend_color.hf[2]);
641                         color.alpha_f16 = v3d->blend_color.hf[3];
642                 }
643         }
644 
645         if (v3d->dirty & (VC5_DIRTY_ZSA | VC5_DIRTY_STENCIL_REF)) {
646                 struct pipe_stencil_state *front = &v3d->zsa->base.stencil[0];
647                 struct pipe_stencil_state *back = &v3d->zsa->base.stencil[1];
648 
649                 if (front->enabled) {
650                         cl_emit_with_prepacked(&job->bcl, STENCIL_CFG,
651                                                v3d->zsa->stencil_front, config) {
652                                 config.stencil_ref_value =
653                                         v3d->stencil_ref.ref_value[0];
654                         }
655                 }
656 
657                 if (back->enabled) {
658                         cl_emit_with_prepacked(&job->bcl, STENCIL_CFG,
659                                                v3d->zsa->stencil_back, config) {
660                                 config.stencil_ref_value =
661                                         v3d->stencil_ref.ref_value[1];
662                         }
663                 }
664         }
665 
666 #if V3D_VERSION < 40
667         /* Pre-4.x, we have texture state that depends on both the sampler and
668          * the view, so we merge them together at draw time.
669          */
670         if (v3d->dirty & VC5_DIRTY_FRAGTEX)
671                 emit_textures(v3d, &v3d->tex[PIPE_SHADER_FRAGMENT]);
672 
673         if (v3d->dirty & VC5_DIRTY_GEOMTEX)
674                 emit_textures(v3d, &v3d->tex[PIPE_SHADER_GEOMETRY]);
675 
676         if (v3d->dirty & VC5_DIRTY_VERTTEX)
677                 emit_textures(v3d, &v3d->tex[PIPE_SHADER_VERTEX]);
678 #endif
679 
680         if (v3d->dirty & VC5_DIRTY_FLAT_SHADE_FLAGS) {
681                 if (!emit_varying_flags(job,
682                                         v3d->prog.fs->prog_data.fs->flat_shade_flags,
683                                         emit_flat_shade_flags)) {
684                         cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags);
685                 }
686         }
687 
688 #if V3D_VERSION >= 40
689         if (v3d->dirty & VC5_DIRTY_NOPERSPECTIVE_FLAGS) {
690                 if (!emit_varying_flags(job,
691                                         v3d->prog.fs->prog_data.fs->noperspective_flags,
692                                         emit_noperspective_flags)) {
693                         cl_emit(&job->bcl, ZERO_ALL_NON_PERSPECTIVE_FLAGS, flags);
694                 }
695         }
696 
697         if (v3d->dirty & VC5_DIRTY_CENTROID_FLAGS) {
698                 if (!emit_varying_flags(job,
699                                         v3d->prog.fs->prog_data.fs->centroid_flags,
700                                         emit_centroid_flags)) {
701                         cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags);
702                 }
703         }
704 #endif
705 
706         /* Set up the transform feedback data specs (which VPM entries to
707          * output to which buffers).
708          */
709         if (v3d->dirty & (VC5_DIRTY_STREAMOUT |
710                           VC5_DIRTY_RASTERIZER |
711                           VC5_DIRTY_PRIM_MODE)) {
712                 struct v3d_streamout_stateobj *so = &v3d->streamout;
713                 if (so->num_targets) {
714                         bool psiz_per_vertex = (v3d->prim_mode == PIPE_PRIM_POINTS &&
715                                                 v3d->rasterizer->base.point_size_per_vertex);
716                         struct v3d_uncompiled_shader *tf_shader =
717                                 get_tf_shader(v3d);
718                         uint16_t *tf_specs = (psiz_per_vertex ?
719                                               tf_shader->tf_specs_psiz :
720                                               tf_shader->tf_specs);
721 
722 #if V3D_VERSION >= 40
723                         bool tf_enabled = v3d_transform_feedback_enabled(v3d);
724                         job->tf_enabled |= tf_enabled;
725 
726                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
727                                 tfe.number_of_16_bit_output_data_specs_following =
728                                         tf_shader->num_tf_specs;
729                                 tfe.enable = tf_enabled;
730                         };
731 #else /* V3D_VERSION < 40 */
732                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) {
733                                 tfe.number_of_32_bit_output_buffer_address_following =
734                                         so->num_targets;
735                                 tfe.number_of_16_bit_output_data_specs_following =
736                                         tf_shader->num_tf_specs;
737                         };
738 #endif /* V3D_VERSION < 40 */
739                         for (int i = 0; i < tf_shader->num_tf_specs; i++) {
740                                 cl_emit_prepacked(&job->bcl, &tf_specs[i]);
741                         }
742                 } else {
743 #if V3D_VERSION >= 40
744                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
745                                 tfe.enable = false;
746                         };
747 #endif /* V3D_VERSION >= 40 */
748                 }
749         }
750 
751         /* Set up the trasnform feedback buffers. */
752         if (v3d->dirty & VC5_DIRTY_STREAMOUT) {
753                 struct v3d_uncompiled_shader *tf_shader = get_tf_shader(v3d);
754                 struct v3d_streamout_stateobj *so = &v3d->streamout;
755                 for (int i = 0; i < so->num_targets; i++) {
756                         const struct pipe_stream_output_target *target =
757                                 so->targets[i];
758                         struct v3d_resource *rsc = target ?
759                                 v3d_resource(target->buffer) : NULL;
760                         struct pipe_shader_state *ss = &tf_shader->base;
761                         struct pipe_stream_output_info *info = &ss->stream_output;
762                         uint32_t offset = (v3d->streamout.offsets[i] *
763                                            info->stride[i] * 4);
764 
765 #if V3D_VERSION >= 40
766                         if (!target)
767                                 continue;
768 
769                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_BUFFER, output) {
770                                 output.buffer_address =
771                                         cl_address(rsc->bo,
772                                                    target->buffer_offset +
773                                                    offset);
774                                 output.buffer_size_in_32_bit_words =
775                                         (target->buffer_size - offset) >> 2;
776                                 output.buffer_number = i;
777                         }
778 #else /* V3D_VERSION < 40 */
779                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) {
780                                 if (target) {
781                                         output.address =
782                                                 cl_address(rsc->bo,
783                                                            target->buffer_offset +
784                                                            offset);
785                                 }
786                         };
787 #endif /* V3D_VERSION < 40 */
788                         if (target) {
789                                 v3d_job_add_tf_write_resource(v3d->job,
790                                                               target->buffer);
791                         }
792                         /* XXX: buffer_size? */
793                 }
794         }
795 
796         if (v3d->dirty & VC5_DIRTY_OQ) {
797                 cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) {
798                         if (v3d->active_queries && v3d->current_oq) {
799                                 counter.address = cl_address(v3d->current_oq, 0);
800                         }
801                 }
802         }
803 
804 #if V3D_VERSION >= 40
805         if (v3d->dirty & VC5_DIRTY_SAMPLE_STATE) {
806                 cl_emit(&job->bcl, SAMPLE_STATE, state) {
807                         /* Note: SampleCoverage was handled at the
808                          * frontend level by converting to sample_mask.
809                          */
810                         state.coverage = 1.0;
811                         state.mask = job->msaa ? v3d->sample_mask : 0xf;
812                 }
813         }
814 #endif
815 }
816