1 /*
2  * Copyright © 2015-2017 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "util/format/u_format.h"
25 #include "util/u_surface.h"
26 #include "util/u_blitter.h"
27 #include "v3d_context.h"
28 #include "v3d_tiling.h"
29 
30 #if 0
31 static struct pipe_surface *
32 v3d_get_blit_surface(struct pipe_context *pctx,
33                      struct pipe_resource *prsc, unsigned level)
34 {
35         struct pipe_surface tmpl;
36 
37         memset(&tmpl, 0, sizeof(tmpl));
38         tmpl.format = prsc->format;
39         tmpl.u.tex.level = level;
40         tmpl.u.tex.first_layer = 0;
41         tmpl.u.tex.last_layer = 0;
42 
43         return pctx->create_surface(pctx, prsc, &tmpl);
44 }
45 
46 static bool
47 is_tile_unaligned(unsigned size, unsigned tile_size)
48 {
49         return size & (tile_size - 1);
50 }
51 
52 static bool
53 v3d_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
54 {
55         struct v3d_context *v3d = v3d_context(pctx);
56         bool msaa = (info->src.resource->nr_samples > 1 ||
57                      info->dst.resource->nr_samples > 1);
58         int tile_width = msaa ? 32 : 64;
59         int tile_height = msaa ? 32 : 64;
60 
61         if (util_format_is_depth_or_stencil(info->dst.resource->format))
62                 return false;
63 
64         if (info->scissor_enable)
65                 return false;
66 
67         if ((info->mask & PIPE_MASK_RGBA) == 0)
68                 return false;
69 
70         if (info->dst.box.x != info->src.box.x ||
71             info->dst.box.y != info->src.box.y ||
72             info->dst.box.width != info->src.box.width ||
73             info->dst.box.height != info->src.box.height) {
74                 return false;
75         }
76 
77         int dst_surface_width = u_minify(info->dst.resource->width0,
78                                          info->dst.level);
79         int dst_surface_height = u_minify(info->dst.resource->height0,
80                                          info->dst.level);
81         if (is_tile_unaligned(info->dst.box.x, tile_width) ||
82             is_tile_unaligned(info->dst.box.y, tile_height) ||
83             (is_tile_unaligned(info->dst.box.width, tile_width) &&
84              info->dst.box.x + info->dst.box.width != dst_surface_width) ||
85             (is_tile_unaligned(info->dst.box.height, tile_height) &&
86              info->dst.box.y + info->dst.box.height != dst_surface_height)) {
87                 return false;
88         }
89 
90         /* VC5_PACKET_LOAD_TILE_BUFFER_GENERAL uses the
91          * VC5_PACKET_TILE_RENDERING_MODE_CONFIG's width (determined by our
92          * destination surface) to determine the stride.  This may be wrong
93          * when reading from texture miplevels > 0, which are stored in
94          * POT-sized areas.  For MSAA, the tile addresses are computed
95          * explicitly by the RCL, but still use the destination width to
96          * determine the stride (which could be fixed by explicitly supplying
97          * it in the ABI).
98          */
99         struct v3d_resource *rsc = v3d_resource(info->src.resource);
100 
101         uint32_t stride;
102 
103         if (info->src.resource->nr_samples > 1)
104                 stride = align(dst_surface_width, 32) * 4 * rsc->cpp;
105         /* XXX else if (rsc->slices[info->src.level].tiling == VC5_TILING_FORMAT_T)
106            stride = align(dst_surface_width * rsc->cpp, 128); */
107         else
108                 stride = align(dst_surface_width * rsc->cpp, 16);
109 
110         if (stride != rsc->slices[info->src.level].stride)
111                 return false;
112 
113         if (info->dst.resource->format != info->src.resource->format)
114                 return false;
115 
116         if (false) {
117                 fprintf(stderr, "RCL blit from %d,%d to %d,%d (%d,%d)\n",
118                         info->src.box.x,
119                         info->src.box.y,
120                         info->dst.box.x,
121                         info->dst.box.y,
122                         info->dst.box.width,
123                         info->dst.box.height);
124         }
125 
126         struct pipe_surface *dst_surf =
127                 v3d_get_blit_surface(pctx, info->dst.resource, info->dst.level);
128         struct pipe_surface *src_surf =
129                 v3d_get_blit_surface(pctx, info->src.resource, info->src.level);
130 
131         v3d_flush_jobs_reading_resource(v3d, info->src.resource, false);
132 
133         struct v3d_job *job = v3d_get_job(v3d, dst_surf, NULL);
134         pipe_surface_reference(&job->color_read, src_surf);
135 
136         /* If we're resolving from MSAA to single sample, we still need to run
137          * the engine in MSAA mode for the load.
138          */
139         if (!job->msaa && info->src.resource->nr_samples > 1) {
140                 job->msaa = true;
141                 job->tile_width = 32;
142                 job->tile_height = 32;
143         }
144 
145         job->draw_min_x = info->dst.box.x;
146         job->draw_min_y = info->dst.box.y;
147         job->draw_max_x = info->dst.box.x + info->dst.box.width;
148         job->draw_max_y = info->dst.box.y + info->dst.box.height;
149         job->draw_width = dst_surf->width;
150         job->draw_height = dst_surf->height;
151 
152         job->tile_width = tile_width;
153         job->tile_height = tile_height;
154         job->msaa = msaa;
155         job->needs_flush = true;
156         job->resolve |= PIPE_CLEAR_COLOR;
157 
158         v3d_job_submit(v3d, job);
159 
160         pipe_surface_reference(&dst_surf, NULL);
161         pipe_surface_reference(&src_surf, NULL);
162 
163         return true;
164 }
165 #endif
166 
167 void
v3d_blitter_save(struct v3d_context * v3d)168 v3d_blitter_save(struct v3d_context *v3d)
169 {
170         util_blitter_save_fragment_constant_buffer_slot(v3d->blitter,
171                                                         v3d->constbuf[PIPE_SHADER_FRAGMENT].cb);
172         util_blitter_save_vertex_buffer_slot(v3d->blitter, v3d->vertexbuf.vb);
173         util_blitter_save_vertex_elements(v3d->blitter, v3d->vtx);
174         util_blitter_save_vertex_shader(v3d->blitter, v3d->prog.bind_vs);
175         util_blitter_save_geometry_shader(v3d->blitter, v3d->prog.bind_gs);
176         util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets,
177                                      v3d->streamout.targets);
178         util_blitter_save_rasterizer(v3d->blitter, v3d->rasterizer);
179         util_blitter_save_viewport(v3d->blitter, &v3d->viewport);
180         util_blitter_save_scissor(v3d->blitter, &v3d->scissor);
181         util_blitter_save_fragment_shader(v3d->blitter, v3d->prog.bind_fs);
182         util_blitter_save_blend(v3d->blitter, v3d->blend);
183         util_blitter_save_depth_stencil_alpha(v3d->blitter, v3d->zsa);
184         util_blitter_save_stencil_ref(v3d->blitter, &v3d->stencil_ref);
185         util_blitter_save_sample_mask(v3d->blitter, v3d->sample_mask);
186         util_blitter_save_framebuffer(v3d->blitter, &v3d->framebuffer);
187         util_blitter_save_fragment_sampler_states(v3d->blitter,
188                         v3d->tex[PIPE_SHADER_FRAGMENT].num_samplers,
189                         (void **)v3d->tex[PIPE_SHADER_FRAGMENT].samplers);
190         util_blitter_save_fragment_sampler_views(v3d->blitter,
191                         v3d->tex[PIPE_SHADER_FRAGMENT].num_textures,
192                         v3d->tex[PIPE_SHADER_FRAGMENT].textures);
193         util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets,
194                                      v3d->streamout.targets);
195 }
196 
197 static bool
v3d_render_blit(struct pipe_context * ctx,struct pipe_blit_info * info)198 v3d_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
199 {
200         struct v3d_context *v3d = v3d_context(ctx);
201         struct v3d_resource *src = v3d_resource(info->src.resource);
202         struct pipe_resource *tiled = NULL;
203 
204         if (!src->tiled) {
205                 struct pipe_box box = {
206                         .x = 0,
207                         .y = 0,
208                         .width = u_minify(info->src.resource->width0,
209                                            info->src.level),
210                         .height = u_minify(info->src.resource->height0,
211                                            info->src.level),
212                         .depth = 1,
213                 };
214                 struct pipe_resource tmpl = {
215                         .target = info->src.resource->target,
216                         .format = info->src.resource->format,
217                         .width0 = box.width,
218                         .height0 = box.height,
219                         .depth0 = 1,
220                         .array_size = 1,
221                 };
222                 tiled = ctx->screen->resource_create(ctx->screen, &tmpl);
223                 if (!tiled) {
224                         fprintf(stderr, "Failed to create tiled blit temp\n");
225                         return false;
226                 }
227                 ctx->resource_copy_region(ctx,
228                                           tiled, 0,
229                                           0, 0, 0,
230                                           info->src.resource, info->src.level,
231                                           &box);
232                 info->src.level = 0;
233                 info->src.resource = tiled;
234         }
235 
236         if (!util_blitter_is_blit_supported(v3d->blitter, info)) {
237                 fprintf(stderr, "blit unsupported %s -> %s\n",
238                     util_format_short_name(info->src.resource->format),
239                     util_format_short_name(info->dst.resource->format));
240                 return false;
241         }
242 
243         v3d_blitter_save(v3d);
244         util_blitter_blit(v3d->blitter, info);
245 
246         pipe_resource_reference(&tiled, NULL);
247 
248         return true;
249 }
250 
251 /* Implement stencil blits by reinterpreting the stencil data as an RGBA8888
252  * or R8 texture.
253  */
254 static void
v3d_stencil_blit(struct pipe_context * ctx,const struct pipe_blit_info * info)255 v3d_stencil_blit(struct pipe_context *ctx, const struct pipe_blit_info *info)
256 {
257         struct v3d_context *v3d = v3d_context(ctx);
258         struct v3d_resource *src = v3d_resource(info->src.resource);
259         struct v3d_resource *dst = v3d_resource(info->dst.resource);
260         enum pipe_format src_format, dst_format;
261 
262         if (src->separate_stencil) {
263                 src = src->separate_stencil;
264                 src_format = PIPE_FORMAT_R8_UNORM;
265         } else {
266                 src_format = PIPE_FORMAT_RGBA8888_UNORM;
267         }
268 
269         if (dst->separate_stencil) {
270                 dst = dst->separate_stencil;
271                 dst_format = PIPE_FORMAT_R8_UNORM;
272         } else {
273                 dst_format = PIPE_FORMAT_RGBA8888_UNORM;
274         }
275 
276         /* Initialize the surface. */
277         struct pipe_surface dst_tmpl = {
278                 .u.tex = {
279                         .level = info->dst.level,
280                         .first_layer = info->dst.box.z,
281                         .last_layer = info->dst.box.z,
282                 },
283                 .format = dst_format,
284         };
285         struct pipe_surface *dst_surf =
286                 ctx->create_surface(ctx, &dst->base, &dst_tmpl);
287 
288         /* Initialize the sampler view. */
289         struct pipe_sampler_view src_tmpl = {
290                 .target = src->base.target,
291                 .format = src_format,
292                 .u.tex = {
293                         .first_level = info->src.level,
294                         .last_level = info->src.level,
295                         .first_layer = 0,
296                         .last_layer = (PIPE_TEXTURE_3D ?
297                                        u_minify(src->base.depth0,
298                                                 info->src.level) - 1 :
299                                        src->base.array_size - 1),
300                 },
301                 .swizzle_r = PIPE_SWIZZLE_X,
302                 .swizzle_g = PIPE_SWIZZLE_Y,
303                 .swizzle_b = PIPE_SWIZZLE_Z,
304                 .swizzle_a = PIPE_SWIZZLE_W,
305         };
306         struct pipe_sampler_view *src_view =
307                 ctx->create_sampler_view(ctx, &src->base, &src_tmpl);
308 
309         v3d_blitter_save(v3d);
310         util_blitter_blit_generic(v3d->blitter, dst_surf, &info->dst.box,
311                                   src_view, &info->src.box,
312                                   src->base.width0, src->base.height0,
313                                   PIPE_MASK_R,
314                                   PIPE_TEX_FILTER_NEAREST,
315                                   info->scissor_enable ? &info->scissor : NULL,
316                                   info->alpha_blend);
317 
318         pipe_surface_reference(&dst_surf, NULL);
319         pipe_sampler_view_reference(&src_view, NULL);
320 }
321 
322 /* Disable level 0 write, just write following mipmaps */
323 #define V3D_TFU_IOA_DIMTW (1 << 0)
324 #define V3D_TFU_IOA_FORMAT_SHIFT 3
325 #define V3D_TFU_IOA_FORMAT_LINEARTILE 3
326 #define V3D_TFU_IOA_FORMAT_UBLINEAR_1_COLUMN 4
327 #define V3D_TFU_IOA_FORMAT_UBLINEAR_2_COLUMN 5
328 #define V3D_TFU_IOA_FORMAT_UIF_NO_XOR 6
329 #define V3D_TFU_IOA_FORMAT_UIF_XOR 7
330 
331 #define V3D_TFU_ICFG_NUMMM_SHIFT 5
332 #define V3D_TFU_ICFG_TTYPE_SHIFT 9
333 
334 #define V3D_TFU_ICFG_OPAD_SHIFT 22
335 
336 #define V3D_TFU_ICFG_FORMAT_SHIFT 18
337 #define V3D_TFU_ICFG_FORMAT_RASTER 0
338 #define V3D_TFU_ICFG_FORMAT_SAND_128 1
339 #define V3D_TFU_ICFG_FORMAT_SAND_256 2
340 #define V3D_TFU_ICFG_FORMAT_LINEARTILE 11
341 #define V3D_TFU_ICFG_FORMAT_UBLINEAR_1_COLUMN 12
342 #define V3D_TFU_ICFG_FORMAT_UBLINEAR_2_COLUMN 13
343 #define V3D_TFU_ICFG_FORMAT_UIF_NO_XOR 14
344 #define V3D_TFU_ICFG_FORMAT_UIF_XOR 15
345 
346 static bool
v3d_tfu(struct pipe_context * pctx,struct pipe_resource * pdst,struct pipe_resource * psrc,unsigned int src_level,unsigned int base_level,unsigned int last_level,unsigned int src_layer,unsigned int dst_layer)347 v3d_tfu(struct pipe_context *pctx,
348         struct pipe_resource *pdst,
349         struct pipe_resource *psrc,
350         unsigned int src_level,
351         unsigned int base_level,
352         unsigned int last_level,
353         unsigned int src_layer,
354         unsigned int dst_layer)
355 {
356         struct v3d_context *v3d = v3d_context(pctx);
357         struct v3d_screen *screen = v3d->screen;
358         struct v3d_resource *src = v3d_resource(psrc);
359         struct v3d_resource *dst = v3d_resource(pdst);
360         struct v3d_resource_slice *src_base_slice = &src->slices[src_level];
361         struct v3d_resource_slice *dst_base_slice = &dst->slices[base_level];
362         int msaa_scale = pdst->nr_samples > 1 ? 2 : 1;
363         int width = u_minify(pdst->width0, base_level) * msaa_scale;
364         int height = u_minify(pdst->height0, base_level) * msaa_scale;
365 
366         if (psrc->format != pdst->format)
367                 return false;
368         if (psrc->nr_samples != pdst->nr_samples)
369                 return false;
370 
371         uint32_t tex_format = v3d_get_tex_format(&screen->devinfo,
372                                                  pdst->format);
373 
374         if (!v3d_tfu_supports_tex_format(&screen->devinfo, tex_format))
375                 return false;
376 
377         if (pdst->target != PIPE_TEXTURE_2D || psrc->target != PIPE_TEXTURE_2D)
378                 return false;
379 
380         /* Can't write to raster. */
381         if (dst_base_slice->tiling == VC5_TILING_RASTER)
382                 return false;
383 
384         v3d_flush_jobs_writing_resource(v3d, psrc, V3D_FLUSH_DEFAULT, false);
385         v3d_flush_jobs_reading_resource(v3d, pdst, V3D_FLUSH_DEFAULT, false);
386 
387         struct drm_v3d_submit_tfu tfu = {
388                 .ios = (height << 16) | width,
389                 .bo_handles = {
390                         dst->bo->handle,
391                         src != dst ? src->bo->handle : 0
392                 },
393                 .in_sync = v3d->out_sync,
394                 .out_sync = v3d->out_sync,
395         };
396         uint32_t src_offset = (src->bo->offset +
397                                v3d_layer_offset(psrc, src_level, src_layer));
398         tfu.iia |= src_offset;
399         if (src_base_slice->tiling == VC5_TILING_RASTER) {
400                 tfu.icfg |= (V3D_TFU_ICFG_FORMAT_RASTER <<
401                              V3D_TFU_ICFG_FORMAT_SHIFT);
402         } else {
403                 tfu.icfg |= ((V3D_TFU_ICFG_FORMAT_LINEARTILE +
404                               (src_base_slice->tiling - VC5_TILING_LINEARTILE)) <<
405                              V3D_TFU_ICFG_FORMAT_SHIFT);
406         }
407 
408         uint32_t dst_offset = (dst->bo->offset +
409                                v3d_layer_offset(pdst, base_level, dst_layer));
410         tfu.ioa |= dst_offset;
411         if (last_level != base_level)
412                 tfu.ioa |= V3D_TFU_IOA_DIMTW;
413         tfu.ioa |= ((V3D_TFU_IOA_FORMAT_LINEARTILE +
414                      (dst_base_slice->tiling - VC5_TILING_LINEARTILE)) <<
415                     V3D_TFU_IOA_FORMAT_SHIFT);
416 
417         tfu.icfg |= tex_format << V3D_TFU_ICFG_TTYPE_SHIFT;
418         tfu.icfg |= (last_level - base_level) << V3D_TFU_ICFG_NUMMM_SHIFT;
419 
420         switch (src_base_slice->tiling) {
421         case VC5_TILING_UIF_NO_XOR:
422         case VC5_TILING_UIF_XOR:
423                 tfu.iis |= (src_base_slice->padded_height /
424                             (2 * v3d_utile_height(src->cpp)));
425                 break;
426         case VC5_TILING_RASTER:
427                 tfu.iis |= src_base_slice->stride / src->cpp;
428                 break;
429         case VC5_TILING_LINEARTILE:
430         case VC5_TILING_UBLINEAR_1_COLUMN:
431         case VC5_TILING_UBLINEAR_2_COLUMN:
432                 break;
433        }
434 
435         /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the
436          * OPAD field for the destination (how many extra UIF blocks beyond
437          * those necessary to cover the height).  When filling mipmaps, the
438          * miplevel 1+ tiling state is inferred.
439          */
440         if (dst_base_slice->tiling == VC5_TILING_UIF_NO_XOR ||
441             dst_base_slice->tiling == VC5_TILING_UIF_XOR) {
442                 int uif_block_h = 2 * v3d_utile_height(dst->cpp);
443                 int implicit_padded_height = align(height, uif_block_h);
444 
445                 tfu.icfg |= (((dst_base_slice->padded_height -
446                                implicit_padded_height) / uif_block_h) <<
447                              V3D_TFU_ICFG_OPAD_SHIFT);
448         }
449 
450         int ret = v3d_ioctl(screen->fd, DRM_IOCTL_V3D_SUBMIT_TFU, &tfu);
451         if (ret != 0) {
452                 fprintf(stderr, "Failed to submit TFU job: %d\n", ret);
453                 return false;
454         }
455 
456         dst->writes++;
457 
458         return true;
459 }
460 
461 bool
v3d_generate_mipmap(struct pipe_context * pctx,struct pipe_resource * prsc,enum pipe_format format,unsigned int base_level,unsigned int last_level,unsigned int first_layer,unsigned int last_layer)462 v3d_generate_mipmap(struct pipe_context *pctx,
463                     struct pipe_resource *prsc,
464                     enum pipe_format format,
465                     unsigned int base_level,
466                     unsigned int last_level,
467                     unsigned int first_layer,
468                     unsigned int last_layer)
469 {
470         if (format != prsc->format)
471                 return false;
472 
473         /* We could maybe support looping over layers for array textures, but
474          * we definitely don't support 3D.
475          */
476         if (first_layer != last_layer)
477                 return false;
478 
479         return v3d_tfu(pctx,
480                        prsc, prsc,
481                        base_level,
482                        base_level, last_level,
483                        first_layer, first_layer);
484 }
485 
486 static bool
v3d_tfu_blit(struct pipe_context * pctx,const struct pipe_blit_info * info)487 v3d_tfu_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
488 {
489         int dst_width = u_minify(info->dst.resource->width0, info->dst.level);
490         int dst_height = u_minify(info->dst.resource->height0, info->dst.level);
491 
492         if ((info->mask & PIPE_MASK_RGBA) == 0)
493                 return false;
494 
495         if (info->scissor_enable ||
496             info->dst.box.x != 0 ||
497             info->dst.box.y != 0 ||
498             info->dst.box.width != dst_width ||
499             info->dst.box.height != dst_height ||
500             info->src.box.x != 0 ||
501             info->src.box.y != 0 ||
502             info->src.box.width != info->dst.box.width ||
503             info->src.box.height != info->dst.box.height) {
504                 return false;
505         }
506 
507         if (info->dst.format != info->src.format)
508                 return false;
509 
510         return v3d_tfu(pctx, info->dst.resource, info->src.resource,
511                        info->src.level,
512                        info->dst.level, info->dst.level,
513                        info->src.box.z, info->dst.box.z);
514 }
515 
516 /* Optimal hardware path for blitting pixels.
517  * Scaling, format conversion, up- and downsampling (resolve) are allowed.
518  */
519 void
v3d_blit(struct pipe_context * pctx,const struct pipe_blit_info * blit_info)520 v3d_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
521 {
522         struct v3d_context *v3d = v3d_context(pctx);
523         struct pipe_blit_info info = *blit_info;
524 
525         if (info.mask & PIPE_MASK_S) {
526                 v3d_stencil_blit(pctx, blit_info);
527                 info.mask &= ~PIPE_MASK_S;
528         }
529 
530         if (v3d_tfu_blit(pctx, blit_info))
531                 info.mask &= ~PIPE_MASK_RGBA;
532 
533         if (info.mask)
534                 v3d_render_blit(pctx, &info);
535 
536         /* Flush our blit jobs immediately.  They're unlikely to get reused by
537          * normal drawing or other blits, and without flushing we can easily
538          * run into unexpected OOMs when blits are used for a large series of
539          * texture uploads before using the textures.
540          */
541         v3d_flush_jobs_writing_resource(v3d, info.dst.resource,
542                                         V3D_FLUSH_DEFAULT, false);
543 }
544