1 /*
2  * Copyright (C) 2008 VMware, Inc.
3  * Copyright (C) 2014 Broadcom
4  * Copyright (C) 2018-2019 Alyssa Rosenzweig
5  * Copyright (C) 2019-2020 Collabora, Ltd.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24  * SOFTWARE.
25  *
26  */
27 
28 #include "util/macros.h"
29 #include "util/u_math.h"
30 #include "pan_texture.h"
31 #include "panfrost-quirks.h"
32 
33 /* Generates a texture descriptor. Ideally, descriptors are immutable after the
34  * texture is created, so we can keep these hanging around in GPU memory in a
35  * dedicated BO and not have to worry. In practice there are some minor gotchas
36  * with this (the driver sometimes will change the format of a texture on the
37  * fly for compression) but it's fast enough to just regenerate the descriptor
38  * in those cases, rather than monkeypatching at drawtime. A texture descriptor
39  * consists of a 32-byte header followed by pointers.
40  */
41 
42 /* List of supported modifiers, in descending order of preference. AFBC is
43  * faster than u-interleaved tiling which is faster than linear. Within AFBC,
44  * enabling the YUV-like transform is typically a win where possible. */
45 
46 uint64_t pan_best_modifiers[PAN_MODIFIER_COUNT] = {
47         DRM_FORMAT_MOD_ARM_AFBC(
48                 AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
49                 AFBC_FORMAT_MOD_SPARSE |
50                 AFBC_FORMAT_MOD_YTR),
51 
52         DRM_FORMAT_MOD_ARM_AFBC(
53                 AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
54                 AFBC_FORMAT_MOD_SPARSE),
55 
56         DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED,
57         DRM_FORMAT_MOD_LINEAR
58 };
59 
60 /* Map modifiers to mali_texture_layout for packing in a texture descriptor */
61 
62 static enum mali_texture_layout
panfrost_modifier_to_layout(uint64_t modifier)63 panfrost_modifier_to_layout(uint64_t modifier)
64 {
65         if (drm_is_afbc(modifier))
66                 return MALI_TEXTURE_LAYOUT_AFBC;
67         else if (modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED)
68                 return MALI_TEXTURE_LAYOUT_TILED;
69         else if (modifier == DRM_FORMAT_MOD_LINEAR)
70                 return MALI_TEXTURE_LAYOUT_LINEAR;
71         else
72                 unreachable("Invalid modifer");
73 }
74 
75 /* Check if we need to set a custom stride by computing the "expected"
76  * stride and comparing it to what the user actually wants. Only applies
77  * to linear textures, since tiled/compressed textures have strict
78  * alignment requirements for their strides as it is */
79 
80 static bool
panfrost_needs_explicit_stride(struct panfrost_slice * slices,uint16_t width,unsigned first_level,unsigned last_level,unsigned bytes_per_pixel)81 panfrost_needs_explicit_stride(
82                 struct panfrost_slice *slices,
83                 uint16_t width,
84                 unsigned first_level, unsigned last_level,
85                 unsigned bytes_per_pixel)
86 {
87         for (unsigned l = first_level; l <= last_level; ++l) {
88                 unsigned actual = slices[l].stride;
89                 unsigned expected = u_minify(width, l) * bytes_per_pixel;
90 
91                 if (actual != expected)
92                         return true;
93         }
94 
95         return false;
96 }
97 
98 /* A Scalable Texture Compression (ASTC) corresponds to just a few texture type
99  * in the hardware, but in fact can be parametrized to have various widths and
100  * heights for the so-called "stretch factor". It turns out these parameters
101  * are stuffed in the bottom bits of the payload pointers. This functions
102  * computes these magic stuffing constants based on the ASTC format in use. The
103  * constant in a given dimension is 3-bits, and two are stored side-by-side for
104  * each active dimension.
105  */
106 
107 static unsigned
panfrost_astc_stretch(unsigned dim)108 panfrost_astc_stretch(unsigned dim)
109 {
110         assert(dim >= 4 && dim <= 12);
111         return MIN2(dim, 11) - 4;
112 }
113 
114 /* Texture addresses are tagged with information about compressed formats.
115  * AFBC uses a bit for whether the colorspace transform is enabled (RGB and
116  * RGBA only).
117  * For ASTC, this is a "stretch factor" encoding the block size. */
118 
119 static unsigned
panfrost_compression_tag(const struct util_format_description * desc,uint64_t modifier)120 panfrost_compression_tag(
121                 const struct util_format_description *desc, uint64_t modifier)
122 {
123         if (drm_is_afbc(modifier))
124                 return (modifier & AFBC_FORMAT_MOD_YTR) ? 1 : 0;
125         else if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC)
126                 return (panfrost_astc_stretch(desc->block.height) << 3) |
127                         panfrost_astc_stretch(desc->block.width);
128         else
129                 return 0;
130 }
131 
132 
133 /* Cubemaps have 6 faces as "layers" in between each actual layer. We
134  * need to fix this up. TODO: logic wrong in the asserted out cases ...
135  * can they happen, perhaps from cubemap arrays? */
136 
137 static void
panfrost_adjust_cube_dimensions(unsigned * first_face,unsigned * last_face,unsigned * first_layer,unsigned * last_layer)138 panfrost_adjust_cube_dimensions(
139                 unsigned *first_face, unsigned *last_face,
140                 unsigned *first_layer, unsigned *last_layer)
141 {
142         *first_face = *first_layer % 6;
143         *last_face = *last_layer % 6;
144         *first_layer /= 6;
145         *last_layer /= 6;
146 
147         assert((*first_layer == *last_layer) || (*first_face == 0 && *last_face == 5));
148 }
149 
150 /* Following the texture descriptor is a number of pointers. How many? */
151 
152 static unsigned
panfrost_texture_num_elements(unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned nr_samples,bool is_cube,bool manual_stride)153 panfrost_texture_num_elements(
154                 unsigned first_level, unsigned last_level,
155                 unsigned first_layer, unsigned last_layer,
156                 unsigned nr_samples,
157                 bool is_cube, bool manual_stride)
158 {
159         unsigned first_face  = 0, last_face = 0;
160 
161         if (is_cube) {
162                 panfrost_adjust_cube_dimensions(&first_face, &last_face,
163                                 &first_layer, &last_layer);
164         }
165 
166         unsigned levels = 1 + last_level - first_level;
167         unsigned layers = 1 + last_layer - first_layer;
168         unsigned faces  = 1 + last_face  - first_face;
169         unsigned num_elements = levels * layers * faces * MAX2(nr_samples, 1);
170 
171         if (manual_stride)
172                 num_elements *= 2;
173 
174         return num_elements;
175 }
176 
177 /* Conservative estimate of the size of the texture payload a priori.
178  * Average case, size equal to the actual size. Worst case, off by 2x (if
179  * a manual stride is not needed on a linear texture). Returned value
180  * must be greater than or equal to the actual size, so it's safe to use
181  * as an allocation amount */
182 
183 unsigned
panfrost_estimate_texture_payload_size(unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned nr_samples,enum mali_texture_dimension dim,uint64_t modifier)184 panfrost_estimate_texture_payload_size(
185                 unsigned first_level, unsigned last_level,
186                 unsigned first_layer, unsigned last_layer,
187                 unsigned nr_samples,
188                 enum mali_texture_dimension dim, uint64_t modifier)
189 {
190         /* Assume worst case */
191         unsigned manual_stride = (modifier == DRM_FORMAT_MOD_LINEAR);
192 
193         unsigned elements = panfrost_texture_num_elements(
194                         first_level, last_level,
195                         first_layer, last_layer,
196                         nr_samples,
197                         dim == MALI_TEXTURE_DIMENSION_CUBE, manual_stride);
198 
199         return sizeof(mali_ptr) * elements;
200 }
201 
202 /* If not explicitly, line stride is calculated for block-based formats as
203  * (ceil(width / block_width) * block_size). As a special case, this is left
204  * zero if there is only a single block vertically. So, we have a helper to
205  * extract the dimensions of a block-based format and use that to calculate the
206  * line stride as such.
207  */
208 
209 static unsigned
panfrost_block_dim(uint64_t modifier,bool width,unsigned plane)210 panfrost_block_dim(uint64_t modifier, bool width, unsigned plane)
211 {
212         if (!drm_is_afbc(modifier)) {
213                 assert(modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED);
214                 return 16;
215         }
216 
217         switch (modifier & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK) {
218         case AFBC_FORMAT_MOD_BLOCK_SIZE_16x16:
219                 return 16;
220         case AFBC_FORMAT_MOD_BLOCK_SIZE_32x8:
221                 return width ? 32 : 8;
222         case AFBC_FORMAT_MOD_BLOCK_SIZE_64x4:
223                 return width ? 64 : 4;
224         case AFBC_FORMAT_MOD_BLOCK_SIZE_32x8_64x4:
225                 return plane ? (width ? 64 : 4) : (width ? 32 : 8);
226         default:
227                 unreachable("Invalid AFBC block size");
228         }
229 }
230 
231 static unsigned
panfrost_nonlinear_stride(uint64_t modifier,unsigned bytes_per_block,unsigned pixels_per_block,unsigned width,unsigned height,bool plane)232 panfrost_nonlinear_stride(uint64_t modifier,
233                 unsigned bytes_per_block,
234                 unsigned pixels_per_block,
235                 unsigned width,
236                 unsigned height,
237                 bool plane)
238 {
239         unsigned block_w = panfrost_block_dim(modifier, true, plane);
240         unsigned block_h = panfrost_block_dim(modifier, false, plane);
241 
242         /* Calculate block size. Ensure the division happens only at the end to
243          * avoid rounding errors if bytes per block < pixels per block */
244 
245         unsigned block_size = (block_w * block_h * bytes_per_block)
246                 / pixels_per_block;
247 
248         if (height <= block_h)
249                 return 0;
250         else
251                 return DIV_ROUND_UP(width, block_w) * block_size;
252 }
253 
254 static void
panfrost_emit_texture_payload(mali_ptr * payload,const struct util_format_description * desc,enum mali_texture_dimension dim,uint64_t modifier,unsigned width,unsigned height,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned nr_samples,unsigned cube_stride,bool manual_stride,mali_ptr base,struct panfrost_slice * slices)255 panfrost_emit_texture_payload(
256         mali_ptr *payload,
257         const struct util_format_description *desc,
258         enum mali_texture_dimension dim,
259         uint64_t modifier,
260         unsigned width, unsigned height,
261         unsigned first_level, unsigned last_level,
262         unsigned first_layer, unsigned last_layer,
263         unsigned nr_samples,
264         unsigned cube_stride,
265         bool manual_stride,
266         mali_ptr base,
267         struct panfrost_slice *slices)
268 {
269         base |= panfrost_compression_tag(desc, modifier);
270 
271         /* Inject the addresses in, interleaving array indices, mip levels,
272          * cube faces, and strides in that order */
273 
274         unsigned first_face  = 0, last_face = 0, face_mult = 1;
275 
276         if (dim == MALI_TEXTURE_DIMENSION_CUBE) {
277                 face_mult = 6;
278                 panfrost_adjust_cube_dimensions(&first_face, &last_face, &first_layer, &last_layer);
279         }
280 
281         nr_samples = MAX2(nr_samples, 1);
282 
283         unsigned idx = 0;
284 
285         for (unsigned w = first_layer; w <= last_layer; ++w) {
286                 for (unsigned l = first_level; l <= last_level; ++l) {
287                         for (unsigned f = first_face; f <= last_face; ++f) {
288                                 for (unsigned s = 0; s < nr_samples; ++s) {
289                                         payload[idx++] = base + panfrost_texture_offset(
290                                                         slices, dim == MALI_TEXTURE_DIMENSION_3D,
291                                                         cube_stride, l, w * face_mult + f, s);
292 
293                                         if (manual_stride) {
294                                                 payload[idx++] = (modifier == DRM_FORMAT_MOD_LINEAR) ?
295                                                         slices[l].stride :
296                                                         panfrost_nonlinear_stride(modifier,
297                                                                         MAX2(desc->block.bits / 8, 1),
298                                                                         desc->block.width * desc->block.height,
299                                                                         u_minify(width, l),
300                                                                         u_minify(height, l), false);
301                                         }
302                                 }
303                         }
304                 }
305         }
306 }
307 
308 static void
panfrost_emit_texture_payload_v7(mali_ptr * payload,const struct util_format_description * desc,enum mali_texture_dimension dim,uint64_t modifier,unsigned width,unsigned height,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned nr_samples,unsigned cube_stride,mali_ptr base,struct panfrost_slice * slices)309 panfrost_emit_texture_payload_v7(mali_ptr *payload,
310                                  const struct util_format_description *desc,
311                                  enum mali_texture_dimension dim,
312                                  uint64_t modifier,
313                                  unsigned width, unsigned height,
314                                  unsigned first_level, unsigned last_level,
315                                  unsigned first_layer, unsigned last_layer,
316                                  unsigned nr_samples,
317                                  unsigned cube_stride,
318                                  mali_ptr base,
319                                  struct panfrost_slice *slices)
320 {
321         base |= panfrost_compression_tag(desc, modifier);
322 
323         /* Inject the addresses in, interleaving array indices, mip levels,
324          * cube faces, and strides in that order */
325 
326         unsigned first_face  = 0, last_face = 0, face_mult = 1;
327 
328         if (dim == MALI_TEXTURE_DIMENSION_CUBE) {
329                 face_mult = 6;
330                 panfrost_adjust_cube_dimensions(&first_face, &last_face, &first_layer, &last_layer);
331         }
332 
333         nr_samples = MAX2(nr_samples, 1);
334 
335         unsigned idx = 0;
336         bool is_3d = dim == MALI_TEXTURE_DIMENSION_3D;
337         bool is_linear = modifier == DRM_FORMAT_MOD_LINEAR;
338 
339         assert(nr_samples == 1 || face_mult == 1);
340 
341         for (unsigned w = first_layer; w <= last_layer; ++w) {
342                 for (unsigned f = first_face; f <= last_face; ++f) {
343                         for (unsigned s = 0; s < nr_samples; ++s) {
344                                 for (unsigned l = first_level; l <= last_level; ++l) {
345                                         payload[idx++] =
346                                                 base +
347                                                 panfrost_texture_offset(slices, is_3d,
348                                                                         cube_stride, l,
349                                                                         w * face_mult + f, s);
350 
351                                         unsigned line_stride =
352 						is_linear ?
353                                                 slices[l].stride :
354                                                 panfrost_nonlinear_stride(modifier,
355                                                                           MAX2(desc->block.bits / 8, 1),
356                                                                           desc->block.width * desc->block.height,
357                                                                           u_minify(width, l),
358                                                                           u_minify(height, l), false);
359                                         unsigned layer_stride = 0; /* FIXME */
360                                         payload[idx++] = ((uint64_t)layer_stride << 32) | line_stride;
361                                 }
362                         }
363                 }
364         }
365 }
366 
367 void
panfrost_new_texture(void * out,uint16_t width,uint16_t height,uint16_t depth,uint16_t array_size,enum pipe_format format,enum mali_texture_dimension dim,uint64_t modifier,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned nr_samples,unsigned cube_stride,unsigned swizzle,mali_ptr base,struct panfrost_slice * slices)368 panfrost_new_texture(
369         void *out,
370         uint16_t width, uint16_t height,
371         uint16_t depth, uint16_t array_size,
372         enum pipe_format format,
373         enum mali_texture_dimension dim,
374         uint64_t modifier,
375         unsigned first_level, unsigned last_level,
376         unsigned first_layer, unsigned last_layer,
377         unsigned nr_samples,
378         unsigned cube_stride,
379         unsigned swizzle,
380         mali_ptr base,
381         struct panfrost_slice *slices)
382 {
383         const struct util_format_description *desc =
384                 util_format_description(format);
385 
386         unsigned bytes_per_pixel = util_format_get_blocksize(format);
387 
388         bool manual_stride = (modifier == DRM_FORMAT_MOD_LINEAR)
389                 && panfrost_needs_explicit_stride(slices, width,
390                                 first_level, last_level, bytes_per_pixel);
391 
392         pan_pack(out, MIDGARD_TEXTURE, cfg) {
393                 cfg.width = u_minify(width, first_level);
394                 cfg.height = u_minify(height, first_level);
395                 cfg.depth = u_minify(depth, first_level);
396                 cfg.array_size = array_size;
397                 cfg.format = panfrost_pipe_format_v6[format].hw;
398                 cfg.dimension = dim;
399                 cfg.texel_ordering = panfrost_modifier_to_layout(modifier);
400                 cfg.manual_stride = manual_stride;
401                 cfg.levels = last_level - first_level;
402                 cfg.swizzle = swizzle;
403         };
404 
405         panfrost_emit_texture_payload(
406                 (mali_ptr *) (out + MALI_MIDGARD_TEXTURE_LENGTH),
407                 desc,
408                 dim,
409                 modifier,
410                 width, height,
411                 first_level, last_level,
412                 first_layer, last_layer,
413                 nr_samples,
414                 cube_stride,
415                 manual_stride,
416                 base,
417                 slices);
418 }
419 
420 void
panfrost_new_texture_bifrost(const struct panfrost_device * dev,struct mali_bifrost_texture_packed * out,uint16_t width,uint16_t height,uint16_t depth,uint16_t array_size,enum pipe_format format,enum mali_texture_dimension dim,uint64_t modifier,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned nr_samples,unsigned cube_stride,unsigned swizzle,mali_ptr base,struct panfrost_slice * slices,const struct panfrost_ptr * payload)421 panfrost_new_texture_bifrost(
422         const struct panfrost_device *dev,
423         struct mali_bifrost_texture_packed *out,
424         uint16_t width, uint16_t height,
425         uint16_t depth, uint16_t array_size,
426         enum pipe_format format,
427         enum mali_texture_dimension dim,
428         uint64_t modifier,
429         unsigned first_level, unsigned last_level,
430         unsigned first_layer, unsigned last_layer,
431         unsigned nr_samples,
432         unsigned cube_stride,
433         unsigned swizzle,
434         mali_ptr base,
435         struct panfrost_slice *slices,
436         const struct panfrost_ptr *payload)
437 {
438         const struct util_format_description *desc =
439                 util_format_description(format);
440 
441         if (dev->arch >= 7) {
442                 panfrost_emit_texture_payload_v7(payload->cpu,
443                                                  desc,
444                                                  dim,
445                                                  modifier,
446                                                  width, height,
447                                                  first_level, last_level,
448                                                  first_layer, last_layer,
449                                                  nr_samples,
450                                                  cube_stride,
451                                                  base,
452                                                  slices);
453         } else {
454                 panfrost_emit_texture_payload(payload->cpu,
455                                               desc,
456                                               dim,
457                                               modifier,
458                                               width, height,
459                                               first_level, last_level,
460                                               first_layer, last_layer,
461                                               nr_samples,
462                                               cube_stride,
463                                               true, /* Stride explicit on Bifrost */
464                                               base,
465                                               slices);
466         }
467 
468         pan_pack(out, BIFROST_TEXTURE, cfg) {
469                 cfg.dimension = dim;
470                 cfg.format = dev->formats[format].hw;
471 
472                 cfg.width = u_minify(width, first_level);
473                 cfg.height = u_minify(height, first_level);
474                 cfg.swizzle = swizzle;
475                 cfg.texel_ordering = panfrost_modifier_to_layout(modifier);
476                 cfg.levels = last_level - first_level;
477                 cfg.array_size = array_size;
478                 cfg.surfaces = payload->gpu;
479 
480                 /* We specify API-level LOD clamps in the sampler descriptor
481                  * and use these clamps simply for bounds checking */
482                 cfg.minimum_lod = FIXED_16(0, false);
483                 cfg.maximum_lod = FIXED_16(cfg.levels, false);
484         }
485 }
486 
487 /* Computes sizes for checksumming, which is 8 bytes per 16x16 tile.
488  * Checksumming is believed to be a CRC variant (CRC64 based on the size?).
489  * This feature is also known as "transaction elimination". */
490 
491 #define CHECKSUM_TILE_WIDTH 16
492 #define CHECKSUM_TILE_HEIGHT 16
493 #define CHECKSUM_BYTES_PER_TILE 8
494 
495 unsigned
panfrost_compute_checksum_size(struct panfrost_slice * slice,unsigned width,unsigned height)496 panfrost_compute_checksum_size(
497         struct panfrost_slice *slice,
498         unsigned width,
499         unsigned height)
500 {
501         unsigned aligned_width = ALIGN_POT(width, CHECKSUM_TILE_WIDTH);
502         unsigned aligned_height = ALIGN_POT(height, CHECKSUM_TILE_HEIGHT);
503 
504         unsigned tile_count_x = aligned_width / CHECKSUM_TILE_WIDTH;
505         unsigned tile_count_y = aligned_height / CHECKSUM_TILE_HEIGHT;
506 
507         slice->checksum_stride = tile_count_x * CHECKSUM_BYTES_PER_TILE;
508 
509         return slice->checksum_stride * tile_count_y;
510 }
511 
512 unsigned
panfrost_get_layer_stride(struct panfrost_slice * slices,bool is_3d,unsigned cube_stride,unsigned level)513 panfrost_get_layer_stride(struct panfrost_slice *slices, bool is_3d, unsigned cube_stride, unsigned level)
514 {
515         return is_3d ? slices[level].size0 : cube_stride;
516 }
517 
518 /* Computes the offset into a texture at a particular level/face. Add to
519  * the base address of a texture to get the address to that level/face */
520 
521 unsigned
panfrost_texture_offset(struct panfrost_slice * slices,bool is_3d,unsigned cube_stride,unsigned level,unsigned face,unsigned sample)522 panfrost_texture_offset(struct panfrost_slice *slices, bool is_3d, unsigned cube_stride, unsigned level, unsigned face, unsigned sample)
523 {
524         unsigned layer_stride = panfrost_get_layer_stride(slices, is_3d, cube_stride, level);
525         return slices[level].offset + (face * layer_stride) + (sample * slices[level].size0);
526 }
527