1 /*
2  * Copyright © 2017 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "util/u_format.h"
25 #include "vc5_context.h"
26 #include "vc5_tiling.h"
27 #include "broadcom/common/v3d_macros.h"
28 #include "broadcom/cle/v3dx_pack.h"
29 
30 #define PIPE_CLEAR_COLOR_BUFFERS (PIPE_CLEAR_COLOR0 |                   \
31                                   PIPE_CLEAR_COLOR1 |                   \
32                                   PIPE_CLEAR_COLOR2 |                   \
33                                   PIPE_CLEAR_COLOR3)                    \
34 
35 #define PIPE_FIRST_COLOR_BUFFER_BIT (ffs(PIPE_CLEAR_COLOR0) - 1)
36 
37 static void
load_general(struct vc5_cl * cl,struct pipe_surface * psurf,int buffer)38 load_general(struct vc5_cl *cl, struct pipe_surface *psurf, int buffer)
39 {
40         struct vc5_surface *surf = vc5_surface(psurf);
41         struct vc5_resource *rsc = vc5_resource(psurf->texture);
42 
43         struct vc5_resource *separate_stencil = NULL;
44         if (rsc->separate_stencil && buffer == STENCIL)
45                 separate_stencil = rsc->separate_stencil;
46 
47         cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
48                 load.buffer_to_load = buffer;
49                 if (separate_stencil) {
50                         load.address = cl_address(separate_stencil->bo,
51                                                   surf->separate_stencil_offset);
52                 } else {
53                         load.address = cl_address(rsc->bo, surf->offset);
54                 }
55 
56 #if V3D_VERSION >= 40
57                 if (separate_stencil) {
58                         load.input_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8;
59                         load.memory_format = surf->separate_stencil_tiling;
60                 }  else {
61                         load.input_image_format = surf->format;
62                         load.memory_format = surf->tiling;
63                 }
64 
65                 if (surf->tiling == VC5_TILING_UIF_NO_XOR ||
66                     surf->tiling == VC5_TILING_UIF_XOR) {
67                         if (separate_stencil) {
68                                 load.height_in_ub_or_stride =
69                                         surf->separate_stencil_padded_height_of_output_image_in_uif_blocks;
70                         } else {
71                                 load.height_in_ub_or_stride =
72                                         surf->padded_height_of_output_image_in_uif_blocks;
73                         }
74                 } else if (surf->tiling == VC5_TILING_RASTER) {
75                         struct vc5_resource_slice *slice =
76                                 &rsc->slices[psurf->u.tex.level];
77                         load.height_in_ub_or_stride = slice->stride;
78                 }
79 
80                 /* XXX: MSAA */
81 #else /* V3D_VERSION < 40 */
82                 load.raw_mode = true;
83                 load.padded_height_of_output_image_in_uif_blocks =
84                         surf->padded_height_of_output_image_in_uif_blocks;
85 #endif /* V3D_VERSION < 40 */
86         }
87 }
88 
89 static void
store_general(struct vc5_job * job,struct vc5_cl * cl,struct pipe_surface * psurf,int buffer,int pipe_bit,bool last_store,bool general_color_clear)90 store_general(struct vc5_job *job,
91               struct vc5_cl *cl, struct pipe_surface *psurf, int buffer,
92               int pipe_bit, bool last_store, bool general_color_clear)
93 {
94         struct vc5_surface *surf = vc5_surface(psurf);
95         struct vc5_resource *rsc = vc5_resource(psurf->texture);
96 
97         struct vc5_resource *separate_stencil = NULL;
98         if (rsc->separate_stencil && buffer == STENCIL) {
99                 separate_stencil = rsc->separate_stencil;
100                 separate_stencil->writes++;
101         } else {
102                 rsc->writes++;
103         }
104 
105         cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
106                 store.buffer_to_store = buffer;
107                 if (separate_stencil) {
108                         store.address = cl_address(separate_stencil->bo,
109                                                    surf->separate_stencil_offset);
110                 } else {
111                         store.address = cl_address(rsc->bo, surf->offset);
112                 }
113 
114 #if V3D_VERSION >= 40
115                 store.clear_buffer_being_stored =
116                         ((job->cleared & pipe_bit) &&
117                          (general_color_clear ||
118                           !(pipe_bit & PIPE_CLEAR_COLOR_BUFFERS)));
119 
120                 if (separate_stencil) {
121                         store.output_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8;
122                         store.memory_format = surf->separate_stencil_tiling;
123                 }  else {
124                         store.output_image_format = surf->format;
125                         store.memory_format = surf->tiling;
126                 }
127 
128                 if (surf->tiling == VC5_TILING_UIF_NO_XOR ||
129                     surf->tiling == VC5_TILING_UIF_XOR) {
130                         if (separate_stencil) {
131                                 store.height_in_ub_or_stride =
132                                         surf->separate_stencil_padded_height_of_output_image_in_uif_blocks;
133                         } else {
134                                 store.height_in_ub_or_stride =
135                                         surf->padded_height_of_output_image_in_uif_blocks;
136                         }
137                 } else if (surf->tiling == VC5_TILING_RASTER) {
138                         struct vc5_resource_slice *slice =
139                                 &rsc->slices[psurf->u.tex.level];
140                         store.height_in_ub_or_stride = slice->stride;
141                 }
142 #else /* V3D_VERSION < 40 */
143                 store.raw_mode = true;
144                 if (!last_store) {
145                         store.disable_colour_buffers_clear_on_write = true;
146                         store.disable_z_buffer_clear_on_write = true;
147                         store.disable_stencil_buffer_clear_on_write = true;
148                 } else {
149                         store.disable_colour_buffers_clear_on_write =
150                                 !(((pipe_bit & PIPE_CLEAR_COLOR_BUFFERS) &&
151                                    general_color_clear &&
152                                    (job->cleared & pipe_bit)));
153                         store.disable_z_buffer_clear_on_write =
154                                 !(job->cleared & PIPE_CLEAR_DEPTH);
155                         store.disable_stencil_buffer_clear_on_write =
156                                 !(job->cleared & PIPE_CLEAR_STENCIL);
157                 }
158                 store.padded_height_of_output_image_in_uif_blocks =
159                         surf->padded_height_of_output_image_in_uif_blocks;
160 #endif /* V3D_VERSION < 40 */
161         }
162 }
163 
164 static int
zs_buffer_from_pipe_bits(int pipe_clear_bits)165 zs_buffer_from_pipe_bits(int pipe_clear_bits)
166 {
167         switch (pipe_clear_bits & PIPE_CLEAR_DEPTHSTENCIL) {
168         case PIPE_CLEAR_DEPTHSTENCIL:
169                 return ZSTENCIL;
170         case PIPE_CLEAR_DEPTH:
171                 return Z;
172         case PIPE_CLEAR_STENCIL:
173                 return STENCIL;
174         default:
175                 return NONE;
176         }
177 }
178 
179 /* The HW queues up the load until the tile coordinates show up, but can only
180  * track one at a time.  If we need to do more than one load, then we need to
181  * flush out the previous load by emitting the tile coordinates and doing a
182  * dummy store.
183  */
184 static void
flush_last_load(struct vc5_cl * cl)185 flush_last_load(struct vc5_cl *cl)
186 {
187         if (V3D_VERSION >= 40)
188                 return;
189 
190         cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
191         cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
192                 store.buffer_to_store = NONE;
193         }
194 }
195 
196 static void
vc5_rcl_emit_loads(struct vc5_job * job,struct vc5_cl * cl)197 vc5_rcl_emit_loads(struct vc5_job *job, struct vc5_cl *cl)
198 {
199         uint32_t read_but_not_cleared = job->resolve & ~job->cleared;
200 
201         for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
202                 uint32_t bit = PIPE_CLEAR_COLOR0 << i;
203                 if (!(read_but_not_cleared & bit))
204                         continue;
205 
206                 struct pipe_surface *psurf = job->cbufs[i];
207                 if (!psurf || (V3D_VERSION < 40 &&
208                                psurf->texture->nr_samples <= 1)) {
209                         continue;
210                 }
211 
212                 load_general(cl, psurf, RENDER_TARGET_0 + i);
213                 read_but_not_cleared &= ~bit;
214 
215                 if (read_but_not_cleared)
216                         flush_last_load(cl);
217         }
218 
219         if (read_but_not_cleared & PIPE_CLEAR_DEPTHSTENCIL &&
220             (V3D_VERSION >= 40 ||
221              (job->zsbuf && job->zsbuf->texture->nr_samples > 1))) {
222                 load_general(cl, job->zsbuf,
223                              zs_buffer_from_pipe_bits(read_but_not_cleared));
224                 read_but_not_cleared &= ~PIPE_CLEAR_DEPTHSTENCIL;
225                 if (read_but_not_cleared)
226                         cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
227         }
228 
229 #if V3D_VERSION < 40
230         /* The initial reload will be queued until we get the
231          * tile coordinates.
232          */
233         if (read_but_not_cleared) {
234                 cl_emit(cl, RELOAD_TILE_COLOUR_BUFFER, load) {
235                         load.disable_colour_buffer_load =
236                                 (~read_but_not_cleared &
237                                  PIPE_CLEAR_COLOR_BUFFERS) >>
238                                 PIPE_FIRST_COLOR_BUFFER_BIT;
239                         load.enable_z_load =
240                                 read_but_not_cleared & PIPE_CLEAR_DEPTH;
241                         load.enable_stencil_load =
242                                 read_but_not_cleared & PIPE_CLEAR_STENCIL;
243                 }
244         }
245 #else /* V3D_VERSION >= 40 */
246         assert(!read_but_not_cleared);
247         cl_emit(cl, END_OF_LOADS, end);
248 #endif
249 }
250 
251 static void
vc5_rcl_emit_stores(struct vc5_job * job,struct vc5_cl * cl)252 vc5_rcl_emit_stores(struct vc5_job *job, struct vc5_cl *cl)
253 {
254         MAYBE_UNUSED bool needs_color_clear = job->cleared & PIPE_CLEAR_COLOR_BUFFERS;
255         MAYBE_UNUSED bool needs_z_clear = job->cleared & PIPE_CLEAR_DEPTH;
256         MAYBE_UNUSED bool needs_s_clear = job->cleared & PIPE_CLEAR_STENCIL;
257 
258         /* For clearing color in a TLB general on V3D 3.3:
259          *
260          * - NONE buffer store clears all TLB color buffers.
261          * - color buffer store clears just the TLB color buffer being stored.
262          * - Z/S buffers store may not clear the TLB color buffer.
263          *
264          * And on V3D 4.1, we only have one flag for "clear the buffer being
265          * stored" in the general packet, and a separate packet to clear all
266          * color TLB buffers.
267          *
268          * As a result, we only bother flagging TLB color clears in a general
269          * packet when we don't have to emit a separate packet to clear all
270          * TLB color buffers.
271          */
272         bool general_color_clear = (needs_color_clear &&
273                                     (job->cleared & PIPE_CLEAR_COLOR_BUFFERS) ==
274                                     (job->resolve & PIPE_CLEAR_COLOR_BUFFERS));
275 
276         uint32_t stores_pending = job->resolve;
277 
278         /* For V3D 4.1, use general stores for all TLB stores.
279          *
280          * For V3D 3.3, we only use general stores to do raw stores for any
281          * MSAA surfaces.  These output UIF tiled images where each 4x MSAA
282          * pixel is a 2x2 quad, and the format will be that of the
283          * internal_type/internal_bpp, rather than the format from GL's
284          * perspective.  Non-MSAA surfaces will use
285          * STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED.
286          */
287         for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
288                 uint32_t bit = PIPE_CLEAR_COLOR0 << i;
289                 if (!(job->resolve & bit))
290                         continue;
291 
292                 struct pipe_surface *psurf = job->cbufs[i];
293                 if (!psurf ||
294                     (V3D_VERSION < 40 && psurf->texture->nr_samples <= 1)) {
295                         continue;
296                 }
297 
298                 stores_pending &= ~bit;
299                 store_general(job, cl, psurf, RENDER_TARGET_0 + i, bit,
300                               !stores_pending, general_color_clear);
301                 if (V3D_VERSION < 40 && stores_pending)
302                         cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
303         }
304 
305         if (job->resolve & PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf &&
306             !(V3D_VERSION < 40 && job->zsbuf->texture->nr_samples <= 1)) {
307                 stores_pending &= ~PIPE_CLEAR_DEPTHSTENCIL;
308 
309                 struct vc5_resource *rsc = vc5_resource(job->zsbuf->texture);
310                 if (rsc->separate_stencil) {
311                         if (job->resolve & PIPE_CLEAR_DEPTH) {
312                                 store_general(job, cl, job->zsbuf, Z,
313                                               PIPE_CLEAR_DEPTH,
314                                               !stores_pending,
315                                               general_color_clear);
316                         }
317                         if (job->resolve & PIPE_CLEAR_STENCIL) {
318                                 store_general(job, cl, job->zsbuf, STENCIL,
319                                               PIPE_CLEAR_STENCIL,
320                                               !stores_pending,
321                                               general_color_clear);
322                         }
323                 } else {
324                         store_general(job, cl, job->zsbuf,
325                                       zs_buffer_from_pipe_bits(job->resolve),
326                                       job->resolve & PIPE_CLEAR_DEPTHSTENCIL,
327                                       !stores_pending, general_color_clear);
328                 }
329 
330                 if (V3D_VERSION < 40 && stores_pending)
331                         cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
332         }
333 
334         if (stores_pending) {
335 #if V3D_VERSION < 40
336                 cl_emit(cl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) {
337 
338                         store.disable_color_buffer_write =
339                                 (~stores_pending >>
340                                  PIPE_FIRST_COLOR_BUFFER_BIT) & 0xf;
341                         store.enable_z_write = stores_pending & PIPE_CLEAR_DEPTH;
342                         store.enable_stencil_write = stores_pending & PIPE_CLEAR_STENCIL;
343 
344                         /* Note that when set this will clear all of the color
345                          * buffers.
346                          */
347                         store.disable_colour_buffers_clear_on_write =
348                                 !needs_color_clear;
349                         store.disable_z_buffer_clear_on_write =
350                                 !needs_z_clear;
351                         store.disable_stencil_buffer_clear_on_write =
352                                 !needs_s_clear;
353                 };
354 #else /* V3D_VERSION >= 40 */
355                 unreachable("All color buffers should have been stored.");
356 #endif /* V3D_VERSION >= 40 */
357         } else if (needs_color_clear && !general_color_clear) {
358                 /* If we didn't do our color clears in the general packet,
359                  * then emit a packet to clear all the TLB color buffers now.
360                  */
361 #if V3D_VERSION < 40
362                 cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
363                         store.buffer_to_store = NONE;
364                 }
365 #else /* V3D_VERSION >= 40 */
366                 cl_emit(cl, CLEAR_TILE_BUFFERS, clear) {
367                         clear.clear_all_render_targets = true;
368                 }
369 #endif /* V3D_VERSION >= 40 */
370         }
371 }
372 
373 static void
vc5_rcl_emit_generic_per_tile_list(struct vc5_job * job,int last_cbuf)374 vc5_rcl_emit_generic_per_tile_list(struct vc5_job *job, int last_cbuf)
375 {
376         /* Emit the generic list in our indirect state -- the rcl will just
377          * have pointers into it.
378          */
379         struct vc5_cl *cl = &job->indirect;
380         vc5_cl_ensure_space(cl, 200, 1);
381         struct vc5_cl_reloc tile_list_start = cl_get_address(cl);
382 
383         if (V3D_VERSION >= 40) {
384                 /* V3D 4.x only requires a single tile coordinates, and
385                  * END_OF_LOADS switches us between loading and rendering.
386                  */
387                 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
388         }
389 
390         vc5_rcl_emit_loads(job, cl);
391 
392         if (V3D_VERSION < 40) {
393                 /* Tile Coordinates triggers the last reload and sets where
394                  * the stores go. There must be one per store packet.
395                  */
396                 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
397         }
398 
399         /* The binner starts out writing tiles assuming that the initial mode
400          * is triangles, so make sure that's the case.
401          */
402         cl_emit(cl, PRIMITIVE_LIST_FORMAT, fmt) {
403                 fmt.data_type = LIST_INDEXED;
404                 fmt.primitive_type = LIST_TRIANGLES;
405         }
406 
407         cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
408 
409         vc5_rcl_emit_stores(job, cl);
410 
411 #if V3D_VERSION >= 40
412         cl_emit(cl, END_OF_TILE_MARKER, end);
413 #endif
414 
415         cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
416 
417         cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
418                 branch.start = tile_list_start;
419                 branch.end = cl_get_address(cl);
420         }
421 }
422 
423 #if V3D_VERSION >= 40
424 static void
v3d_setup_render_target(struct vc5_job * job,int cbuf,uint32_t * rt_bpp,uint32_t * rt_type,uint32_t * rt_clamp)425 v3d_setup_render_target(struct vc5_job *job, int cbuf,
426                         uint32_t *rt_bpp, uint32_t *rt_type, uint32_t *rt_clamp)
427 {
428         if (!job->cbufs[cbuf])
429                 return;
430 
431         struct vc5_surface *surf = vc5_surface(job->cbufs[cbuf]);
432         *rt_bpp = surf->internal_bpp;
433         *rt_type = surf->internal_type;
434         *rt_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
435 }
436 #endif /* V3D_VERSION >= 40 */
437 
438 #define div_round_up(a, b) (((a) + (b) - 1) / b)
439 
440 void
v3dX(emit_rcl)441 v3dX(emit_rcl)(struct vc5_job *job)
442 {
443         /* The RCL list should be empty. */
444         assert(!job->rcl.bo);
445 
446         vc5_cl_ensure_space_with_branch(&job->rcl, 200 + 256 *
447                                         cl_packet_length(SUPERTILE_COORDINATES));
448         job->submit.rcl_start = job->rcl.bo->offset;
449         vc5_job_add_bo(job, job->rcl.bo);
450 
451         int nr_cbufs = 0;
452         for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
453                 if (job->cbufs[i])
454                         nr_cbufs = i + 1;
455         }
456 
457         /* Comon config must be the first TILE_RENDERING_MODE_CONFIGURATION
458          * and Z_STENCIL_CLEAR_VALUES must be last.  The ones in between are
459          * optional updates to the previous HW state.
460          */
461         cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_COMMON_CONFIGURATION,
462                 config) {
463 #if V3D_VERSION < 40
464                 config.enable_z_store = job->resolve & PIPE_CLEAR_DEPTH;
465                 config.enable_stencil_store = job->resolve & PIPE_CLEAR_STENCIL;
466 #else /* V3D_VERSION >= 40 */
467                 if (job->zsbuf) {
468                         struct vc5_surface *surf = vc5_surface(job->zsbuf);
469                         config.internal_depth_type = surf->internal_type;
470                 }
471 #endif /* V3D_VERSION >= 40 */
472 
473                 /* XXX: Early D/S clear */
474 
475                 config.early_z_disable = !job->uses_early_z;
476 
477                 config.image_width_pixels = job->draw_width;
478                 config.image_height_pixels = job->draw_height;
479 
480                 config.number_of_render_targets_minus_1 =
481                         MAX2(nr_cbufs, 1) - 1;
482 
483                 config.multisample_mode_4x = job->msaa;
484 
485                 config.maximum_bpp_of_all_render_targets = job->internal_bpp;
486         }
487 
488         for (int i = 0; i < nr_cbufs; i++) {
489                 struct pipe_surface *psurf = job->cbufs[i];
490                 if (!psurf)
491                         continue;
492                 struct vc5_surface *surf = vc5_surface(psurf);
493                 struct vc5_resource *rsc = vc5_resource(psurf->texture);
494 
495                 MAYBE_UNUSED uint32_t config_pad = 0;
496                 uint32_t clear_pad = 0;
497 
498                 /* XXX: Set the pad for raster. */
499                 if (surf->tiling == VC5_TILING_UIF_NO_XOR ||
500                     surf->tiling == VC5_TILING_UIF_XOR) {
501                         int uif_block_height = vc5_utile_height(rsc->cpp) * 2;
502                         uint32_t implicit_padded_height = (align(job->draw_height, uif_block_height) /
503                                                            uif_block_height);
504                         if (surf->padded_height_of_output_image_in_uif_blocks -
505                             implicit_padded_height < 15) {
506                                 config_pad = (surf->padded_height_of_output_image_in_uif_blocks -
507                                               implicit_padded_height);
508                         } else {
509                                 config_pad = 15;
510                                 clear_pad = surf->padded_height_of_output_image_in_uif_blocks;
511                         }
512                 }
513 
514 #if V3D_VERSION < 40
515                 cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG, rt) {
516                         rt.address = cl_address(rsc->bo, surf->offset);
517                         rt.internal_type = surf->internal_type;
518                         rt.output_image_format = surf->format;
519                         rt.memory_format = surf->tiling;
520                         rt.internal_bpp = surf->internal_bpp;
521                         rt.render_target_number = i;
522                         rt.pad = config_pad;
523 
524                         if (job->resolve & PIPE_CLEAR_COLOR0 << i)
525                                 rsc->writes++;
526                 }
527 #endif /* V3D_VERSION < 40 */
528 
529                 cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART1,
530                         clear) {
531                         clear.clear_color_low_32_bits = job->clear_color[i][0];
532                         clear.clear_color_next_24_bits = job->clear_color[i][1] & 0xffffff;
533                         clear.render_target_number = i;
534                 };
535 
536                 if (surf->internal_bpp >= V3D_INTERNAL_BPP_64) {
537                         cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART2,
538                                 clear) {
539                                 clear.clear_color_mid_low_32_bits =
540                                         ((job->clear_color[i][1] >> 24) |
541                                          (job->clear_color[i][2] << 8));
542                                 clear.clear_color_mid_high_24_bits =
543                                         ((job->clear_color[i][2] >> 24) |
544                                          ((job->clear_color[i][3] & 0xffff) << 8));
545                                 clear.render_target_number = i;
546                         };
547                 }
548 
549                 if (surf->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
550                         cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART3,
551                                 clear) {
552                                 clear.uif_padded_height_in_uif_blocks = clear_pad;
553                                 clear.clear_color_high_16_bits = job->clear_color[i][3] >> 16;
554                                 clear.render_target_number = i;
555                         };
556                 }
557         }
558 
559 #if V3D_VERSION >= 40
560         cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG, rt) {
561                 v3d_setup_render_target(job, 0,
562                                         &rt.render_target_0_internal_bpp,
563                                         &rt.render_target_0_internal_type,
564                                         &rt.render_target_0_clamp);
565                 v3d_setup_render_target(job, 1,
566                                         &rt.render_target_1_internal_bpp,
567                                         &rt.render_target_1_internal_type,
568                                         &rt.render_target_1_clamp);
569                 v3d_setup_render_target(job, 2,
570                                         &rt.render_target_2_internal_bpp,
571                                         &rt.render_target_2_internal_type,
572                                         &rt.render_target_2_clamp);
573                 v3d_setup_render_target(job, 3,
574                                         &rt.render_target_3_internal_bpp,
575                                         &rt.render_target_3_internal_type,
576                                         &rt.render_target_3_clamp);
577         }
578 #endif
579 
580 #if V3D_VERSION < 40
581         /* TODO: Don't bother emitting if we don't load/clear Z/S. */
582         if (job->zsbuf) {
583                 struct pipe_surface *psurf = job->zsbuf;
584                 struct vc5_surface *surf = vc5_surface(psurf);
585                 struct vc5_resource *rsc = vc5_resource(psurf->texture);
586 
587                 cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CONFIG, zs) {
588                         zs.address = cl_address(rsc->bo, surf->offset);
589 
590                         zs.internal_type = surf->internal_type;
591                         zs.output_image_format = surf->format;
592                         zs.padded_height_of_output_image_in_uif_blocks =
593                                 surf->padded_height_of_output_image_in_uif_blocks;
594 
595                         assert(surf->tiling != VC5_TILING_RASTER);
596                         zs.memory_format = surf->tiling;
597                 }
598 
599                 if (job->resolve & PIPE_CLEAR_DEPTHSTENCIL)
600                         rsc->writes++;
601 
602                 /* Emit the separate stencil packet if we have a resource for
603                  * it.  The HW will only load/store this buffer if the
604                  * Z/Stencil config doesn't have stencil in its format.
605                  */
606                 if (rsc->separate_stencil) {
607                         cl_emit(&job->rcl,
608                                 TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CONFIG,
609                                 zs) {
610                                 zs.address =
611                                         cl_address(rsc->separate_stencil->bo,
612                                                    surf->separate_stencil_offset);
613 
614                                 zs.z_stencil_id = 1; /* Separate stencil */
615 
616                                 zs.padded_height_of_output_image_in_uif_blocks =
617                                         surf->separate_stencil_padded_height_of_output_image_in_uif_blocks;
618 
619                                 assert(surf->tiling != VC5_TILING_RASTER);
620                                 zs.memory_format = surf->separate_stencil_tiling;
621                         }
622                 }
623         }
624 #endif /* V3D_VERSION < 40 */
625 
626         /* Ends rendering mode config. */
627         cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CLEAR_VALUES,
628                 clear) {
629                 clear.z_clear_value = job->clear_z;
630                 clear.stencil_vg_mask_clear_value = job->clear_s;
631         };
632 
633         /* Always set initial block size before the first branch, which needs
634          * to match the value from binning mode config.
635          */
636         cl_emit(&job->rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
637                 init.use_auto_chained_tile_lists = true;
638                 init.size_of_first_block_in_chained_tile_lists =
639                         TILE_ALLOCATION_BLOCK_SIZE_64B;
640         }
641 
642         uint32_t supertile_w = 1, supertile_h = 1;
643 
644         /* If doing multicore binning, we would need to initialize each core's
645          * tile list here.
646          */
647         cl_emit(&job->rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
648                 list.address = cl_address(job->tile_alloc, 0);
649         }
650 
651         cl_emit(&job->rcl, MULTICORE_RENDERING_SUPERTILE_CONFIGURATION, config) {
652                 uint32_t frame_w_in_supertiles, frame_h_in_supertiles;
653                 const uint32_t max_supertiles = 256;
654 
655                 /* Size up our supertiles until we get under the limit. */
656                 for (;;) {
657                         frame_w_in_supertiles = div_round_up(job->draw_tiles_x,
658                                                              supertile_w);
659                         frame_h_in_supertiles = div_round_up(job->draw_tiles_y,
660                                                              supertile_h);
661                         if (frame_w_in_supertiles * frame_h_in_supertiles <
662                             max_supertiles) {
663                                 break;
664                         }
665 
666                         if (supertile_w < supertile_h)
667                                 supertile_w++;
668                         else
669                                 supertile_h++;
670                 }
671 
672                 config.total_frame_width_in_tiles = job->draw_tiles_x;
673                 config.total_frame_height_in_tiles = job->draw_tiles_y;
674 
675                 config.supertile_width_in_tiles_minus_1 = supertile_w - 1;
676                 config.supertile_height_in_tiles_minus_1 = supertile_h - 1;
677 
678                 config.total_frame_width_in_supertiles = frame_w_in_supertiles;
679                 config.total_frame_height_in_supertiles = frame_h_in_supertiles;
680         }
681 
682         /* Start by clearing the tile buffer. */
683         cl_emit(&job->rcl, TILE_COORDINATES, coords) {
684                 coords.tile_column_number = 0;
685                 coords.tile_row_number = 0;
686         }
687 
688 #if V3D_VERSION < 40
689         cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) {
690                 store.buffer_to_store = NONE;
691         }
692 #else
693         cl_emit(&job->rcl, END_OF_LOADS, end);
694         cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) {
695                 store.buffer_to_store = NONE;
696         }
697         cl_emit(&job->rcl, CLEAR_TILE_BUFFERS, clear) {
698                 clear.clear_z_stencil_buffer = true;
699                 clear.clear_all_render_targets = true;
700         }
701         cl_emit(&job->rcl, END_OF_TILE_MARKER, end);
702 #endif
703 
704         cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush);
705 
706         vc5_rcl_emit_generic_per_tile_list(job, nr_cbufs - 1);
707 
708         cl_emit(&job->rcl, WAIT_ON_SEMAPHORE, sem);
709 
710         /* XXX: Use Morton order */
711         uint32_t supertile_w_in_pixels = job->tile_width * supertile_w;
712         uint32_t supertile_h_in_pixels = job->tile_height * supertile_h;
713         uint32_t min_x_supertile = job->draw_min_x / supertile_w_in_pixels;
714         uint32_t min_y_supertile = job->draw_min_y / supertile_h_in_pixels;
715         uint32_t max_x_supertile = (job->draw_max_x - 1) / supertile_w_in_pixels;
716         uint32_t max_y_supertile = (job->draw_max_y - 1) / supertile_h_in_pixels;
717 
718         for (int y = min_y_supertile; y <= max_y_supertile; y++) {
719                 for (int x = min_x_supertile; x <= max_x_supertile; x++) {
720                         cl_emit(&job->rcl, SUPERTILE_COORDINATES, coords) {
721                                 coords.column_number_in_supertiles = x;
722                                 coords.row_number_in_supertiles = y;
723                         }
724                 }
725         }
726 
727         cl_emit(&job->rcl, END_OF_RENDERING, end);
728 }
729