1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
31 #include "vk_util.h"
32 #include "radv_radeon_winsys.h"
33 #include "sid.h"
34 #include "gfx9d.h"
35 #include "util/debug.h"
36 #include "util/u_atomic.h"
37 static unsigned
radv_choose_tiling(struct radv_device * device,const struct radv_image_create_info * create_info)38 radv_choose_tiling(struct radv_device *device,
39 const struct radv_image_create_info *create_info)
40 {
41 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
42
43 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
44 assert(pCreateInfo->samples <= 1);
45 return RADEON_SURF_MODE_LINEAR_ALIGNED;
46 }
47
48 if (!vk_format_is_compressed(pCreateInfo->format) &&
49 !vk_format_is_depth_or_stencil(pCreateInfo->format)
50 && device->physical_device->rad_info.chip_class <= VI) {
51 /* this causes hangs in some VK CTS tests on GFX9. */
52 /* Textures with a very small height are recommended to be linear. */
53 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
54 /* Only very thin and long 2D textures should benefit from
55 * linear_aligned. */
56 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
57 return RADEON_SURF_MODE_LINEAR_ALIGNED;
58 }
59
60 /* MSAA resources must be 2D tiled. */
61 if (pCreateInfo->samples > 1)
62 return RADEON_SURF_MODE_2D;
63
64 return RADEON_SURF_MODE_2D;
65 }
66 static int
radv_init_surface(struct radv_device * device,struct radeon_surf * surface,const struct radv_image_create_info * create_info)67 radv_init_surface(struct radv_device *device,
68 struct radeon_surf *surface,
69 const struct radv_image_create_info *create_info)
70 {
71 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
72 unsigned array_mode = radv_choose_tiling(device, create_info);
73 const struct vk_format_description *desc =
74 vk_format_description(pCreateInfo->format);
75 bool is_depth, is_stencil, blendable;
76
77 is_depth = vk_format_has_depth(desc);
78 is_stencil = vk_format_has_stencil(desc);
79
80 surface->blk_w = vk_format_get_blockwidth(pCreateInfo->format);
81 surface->blk_h = vk_format_get_blockheight(pCreateInfo->format);
82
83 surface->bpe = vk_format_get_blocksize(vk_format_depth_only(pCreateInfo->format));
84 /* align byte per element on dword */
85 if (surface->bpe == 3) {
86 surface->bpe = 4;
87 }
88 surface->flags = RADEON_SURF_SET(array_mode, MODE);
89
90 switch (pCreateInfo->imageType){
91 case VK_IMAGE_TYPE_1D:
92 if (pCreateInfo->arrayLayers > 1)
93 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
94 else
95 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
96 break;
97 case VK_IMAGE_TYPE_2D:
98 if (pCreateInfo->arrayLayers > 1)
99 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
100 else
101 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
102 break;
103 case VK_IMAGE_TYPE_3D:
104 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
105 break;
106 default:
107 unreachable("unhandled image type");
108 }
109
110 if (is_depth) {
111 surface->flags |= RADEON_SURF_ZBUFFER;
112 if (!(pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
113 !(pCreateInfo->flags & (VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT |
114 VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR)) &&
115 pCreateInfo->tiling != VK_IMAGE_TILING_LINEAR &&
116 pCreateInfo->mipLevels <= 1 &&
117 device->physical_device->rad_info.chip_class >= VI &&
118 ((pCreateInfo->format == VK_FORMAT_D32_SFLOAT ||
119 /* for some reason TC compat with 2/4/8 samples breaks some cts tests - disable for now */
120 (pCreateInfo->samples < 2 && pCreateInfo->format == VK_FORMAT_D32_SFLOAT_S8_UINT)) ||
121 (device->physical_device->rad_info.chip_class >= GFX9 &&
122 pCreateInfo->format == VK_FORMAT_D16_UNORM)))
123 surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
124 }
125
126 if (is_stencil)
127 surface->flags |= RADEON_SURF_SBUFFER;
128
129 surface->flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
130
131 bool dcc_compatible_formats = radv_is_colorbuffer_format_supported(pCreateInfo->format, &blendable);
132 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
133 const struct VkImageFormatListCreateInfoKHR *format_list =
134 (const struct VkImageFormatListCreateInfoKHR *)
135 vk_find_struct_const(pCreateInfo->pNext,
136 IMAGE_FORMAT_LIST_CREATE_INFO_KHR);
137
138 /* We have to ignore the existence of the list if viewFormatCount = 0 */
139 if (format_list && format_list->viewFormatCount) {
140 /* compatibility is transitive, so we only need to check
141 * one format with everything else. */
142 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
143 if (!radv_dcc_formats_compatible(pCreateInfo->format,
144 format_list->pViewFormats[i]))
145 dcc_compatible_formats = false;
146 }
147 } else {
148 dcc_compatible_formats = false;
149 }
150 }
151
152 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) ||
153 (pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR) ||
154 !dcc_compatible_formats ||
155 (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) ||
156 pCreateInfo->mipLevels > 1 || pCreateInfo->arrayLayers > 1 ||
157 device->physical_device->rad_info.chip_class < VI ||
158 create_info->scanout || (device->instance->debug_flags & RADV_DEBUG_NO_DCC) ||
159 pCreateInfo->samples >= 2)
160 surface->flags |= RADEON_SURF_DISABLE_DCC;
161 if (create_info->scanout)
162 surface->flags |= RADEON_SURF_SCANOUT;
163 return 0;
164 }
165
si_get_bo_metadata_word1(struct radv_device * device)166 static uint32_t si_get_bo_metadata_word1(struct radv_device *device)
167 {
168 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
169 }
170
171 static inline unsigned
si_tile_mode_index(const struct radv_image * image,unsigned level,bool stencil)172 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
173 {
174 if (stencil)
175 return image->surface.u.legacy.stencil_tiling_index[level];
176 else
177 return image->surface.u.legacy.tiling_index[level];
178 }
179
radv_map_swizzle(unsigned swizzle)180 static unsigned radv_map_swizzle(unsigned swizzle)
181 {
182 switch (swizzle) {
183 case VK_SWIZZLE_Y:
184 return V_008F0C_SQ_SEL_Y;
185 case VK_SWIZZLE_Z:
186 return V_008F0C_SQ_SEL_Z;
187 case VK_SWIZZLE_W:
188 return V_008F0C_SQ_SEL_W;
189 case VK_SWIZZLE_0:
190 return V_008F0C_SQ_SEL_0;
191 case VK_SWIZZLE_1:
192 return V_008F0C_SQ_SEL_1;
193 default: /* VK_SWIZZLE_X */
194 return V_008F0C_SQ_SEL_X;
195 }
196 }
197
198 static void
radv_make_buffer_descriptor(struct radv_device * device,struct radv_buffer * buffer,VkFormat vk_format,unsigned offset,unsigned range,uint32_t * state)199 radv_make_buffer_descriptor(struct radv_device *device,
200 struct radv_buffer *buffer,
201 VkFormat vk_format,
202 unsigned offset,
203 unsigned range,
204 uint32_t *state)
205 {
206 const struct vk_format_description *desc;
207 unsigned stride;
208 uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
209 uint64_t va = gpu_address + buffer->offset;
210 unsigned num_format, data_format;
211 int first_non_void;
212 desc = vk_format_description(vk_format);
213 first_non_void = vk_format_get_first_non_void_channel(vk_format);
214 stride = desc->block.bits / 8;
215
216 num_format = radv_translate_buffer_numformat(desc, first_non_void);
217 data_format = radv_translate_buffer_dataformat(desc, first_non_void);
218
219 va += offset;
220 state[0] = va;
221 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
222 S_008F04_STRIDE(stride);
223
224 if (device->physical_device->rad_info.chip_class != VI && stride) {
225 range /= stride;
226 }
227
228 state[2] = range;
229 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
230 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
231 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc->swizzle[2])) |
232 S_008F0C_DST_SEL_W(radv_map_swizzle(desc->swizzle[3])) |
233 S_008F0C_NUM_FORMAT(num_format) |
234 S_008F0C_DATA_FORMAT(data_format);
235 }
236
237 static void
si_set_mutable_tex_desc_fields(struct radv_device * device,struct radv_image * image,const struct legacy_surf_level * base_level_info,unsigned base_level,unsigned first_level,unsigned block_width,bool is_stencil,bool is_storage_image,uint32_t * state)238 si_set_mutable_tex_desc_fields(struct radv_device *device,
239 struct radv_image *image,
240 const struct legacy_surf_level *base_level_info,
241 unsigned base_level, unsigned first_level,
242 unsigned block_width, bool is_stencil,
243 bool is_storage_image, uint32_t *state)
244 {
245 uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
246 uint64_t va = gpu_address;
247 enum chip_class chip_class = device->physical_device->rad_info.chip_class;
248 uint64_t meta_va = 0;
249 if (chip_class >= GFX9) {
250 if (is_stencil)
251 va += image->surface.u.gfx9.stencil_offset;
252 else
253 va += image->surface.u.gfx9.surf_offset;
254 } else
255 va += base_level_info->offset;
256
257 state[0] = va >> 8;
258 if (chip_class >= GFX9 ||
259 base_level_info->mode == RADEON_SURF_MODE_2D)
260 state[0] |= image->surface.tile_swizzle;
261 state[1] &= C_008F14_BASE_ADDRESS_HI;
262 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
263
264 if (chip_class >= VI) {
265 state[6] &= C_008F28_COMPRESSION_EN;
266 state[7] = 0;
267 if (!is_storage_image && radv_vi_dcc_enabled(image, first_level)) {
268 meta_va = gpu_address + image->dcc_offset;
269 if (chip_class <= VI)
270 meta_va += base_level_info->dcc_offset;
271 } else if(!is_storage_image && image->tc_compatible_htile &&
272 image->surface.htile_size) {
273 meta_va = gpu_address + image->htile_offset;
274 }
275
276 if (meta_va) {
277 state[6] |= S_008F28_COMPRESSION_EN(1);
278 state[7] = meta_va >> 8;
279 state[7] |= image->surface.tile_swizzle;
280 }
281 }
282
283 if (chip_class >= GFX9) {
284 state[3] &= C_008F1C_SW_MODE;
285 state[4] &= C_008F20_PITCH_GFX9;
286
287 if (is_stencil) {
288 state[3] |= S_008F1C_SW_MODE(image->surface.u.gfx9.stencil.swizzle_mode);
289 state[4] |= S_008F20_PITCH_GFX9(image->surface.u.gfx9.stencil.epitch);
290 } else {
291 state[3] |= S_008F1C_SW_MODE(image->surface.u.gfx9.surf.swizzle_mode);
292 state[4] |= S_008F20_PITCH_GFX9(image->surface.u.gfx9.surf.epitch);
293 }
294
295 state[5] &= C_008F24_META_DATA_ADDRESS &
296 C_008F24_META_PIPE_ALIGNED &
297 C_008F24_META_RB_ALIGNED;
298 if (meta_va) {
299 struct gfx9_surf_meta_flags meta;
300
301 if (image->dcc_offset)
302 meta = image->surface.u.gfx9.dcc;
303 else
304 meta = image->surface.u.gfx9.htile;
305
306 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
307 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
308 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
309 }
310 } else {
311 /* SI-CI-VI */
312 unsigned pitch = base_level_info->nblk_x * block_width;
313 unsigned index = si_tile_mode_index(image, base_level, is_stencil);
314
315 state[3] &= C_008F1C_TILING_INDEX;
316 state[3] |= S_008F1C_TILING_INDEX(index);
317 state[4] &= C_008F20_PITCH_GFX6;
318 state[4] |= S_008F20_PITCH_GFX6(pitch - 1);
319 }
320 }
321
radv_tex_dim(VkImageType image_type,VkImageViewType view_type,unsigned nr_layers,unsigned nr_samples,bool is_storage_image,bool gfx9)322 static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
323 unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
324 {
325 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
326 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
327
328 /* GFX9 allocates 1D textures as 2D. */
329 if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
330 image_type = VK_IMAGE_TYPE_2D;
331 switch (image_type) {
332 case VK_IMAGE_TYPE_1D:
333 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
334 case VK_IMAGE_TYPE_2D:
335 if (nr_samples > 1)
336 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
337 else
338 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
339 case VK_IMAGE_TYPE_3D:
340 if (view_type == VK_IMAGE_VIEW_TYPE_3D)
341 return V_008F1C_SQ_RSRC_IMG_3D;
342 else
343 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
344 default:
345 unreachable("illegale image type");
346 }
347 }
348
gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])349 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
350 {
351 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
352
353 if (swizzle[3] == VK_SWIZZLE_X) {
354 /* For the pre-defined border color values (white, opaque
355 * black, transparent black), the only thing that matters is
356 * that the alpha channel winds up in the correct place
357 * (because the RGB channels are all the same) so either of
358 * these enumerations will work.
359 */
360 if (swizzle[2] == VK_SWIZZLE_Y)
361 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
362 else
363 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
364 } else if (swizzle[0] == VK_SWIZZLE_X) {
365 if (swizzle[1] == VK_SWIZZLE_Y)
366 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
367 else
368 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
369 } else if (swizzle[1] == VK_SWIZZLE_X) {
370 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
371 } else if (swizzle[2] == VK_SWIZZLE_X) {
372 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
373 }
374
375 return bc_swizzle;
376 }
377
378 /**
379 * Build the sampler view descriptor for a texture.
380 */
381 static void
si_make_texture_descriptor(struct radv_device * device,struct radv_image * image,bool is_storage_image,VkImageViewType view_type,VkFormat vk_format,const VkComponentMapping * mapping,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned width,unsigned height,unsigned depth,uint32_t * state,uint32_t * fmask_state)382 si_make_texture_descriptor(struct radv_device *device,
383 struct radv_image *image,
384 bool is_storage_image,
385 VkImageViewType view_type,
386 VkFormat vk_format,
387 const VkComponentMapping *mapping,
388 unsigned first_level, unsigned last_level,
389 unsigned first_layer, unsigned last_layer,
390 unsigned width, unsigned height, unsigned depth,
391 uint32_t *state,
392 uint32_t *fmask_state)
393 {
394 const struct vk_format_description *desc;
395 enum vk_swizzle swizzle[4];
396 int first_non_void;
397 unsigned num_format, data_format, type;
398
399 desc = vk_format_description(vk_format);
400
401 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
402 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
403 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
404 } else {
405 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
406 }
407
408 first_non_void = vk_format_get_first_non_void_channel(vk_format);
409
410 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
411 if (num_format == ~0) {
412 num_format = 0;
413 }
414
415 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
416 if (data_format == ~0) {
417 data_format = 0;
418 }
419
420 /* S8 with either Z16 or Z32 HTILE need a special format. */
421 if (device->physical_device->rad_info.chip_class >= GFX9 &&
422 vk_format == VK_FORMAT_S8_UINT &&
423 image->tc_compatible_htile) {
424 if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
425 data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
426 else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
427 data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
428 }
429 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
430 is_storage_image, device->physical_device->rad_info.chip_class >= GFX9);
431 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
432 height = 1;
433 depth = image->info.array_size;
434 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
435 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
436 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
437 depth = image->info.array_size;
438 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
439 depth = image->info.array_size / 6;
440
441 state[0] = 0;
442 state[1] = (S_008F14_DATA_FORMAT_GFX6(data_format) |
443 S_008F14_NUM_FORMAT_GFX6(num_format));
444 state[2] = (S_008F18_WIDTH(width - 1) |
445 S_008F18_HEIGHT(height - 1) |
446 S_008F18_PERF_MOD(4));
447 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
448 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
449 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
450 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
451 S_008F1C_BASE_LEVEL(image->info.samples > 1 ?
452 0 : first_level) |
453 S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
454 util_logbase2(image->info.samples) :
455 last_level) |
456 S_008F1C_TYPE(type));
457 state[4] = 0;
458 state[5] = S_008F24_BASE_ARRAY(first_layer);
459 state[6] = 0;
460 state[7] = 0;
461
462 if (device->physical_device->rad_info.chip_class >= GFX9) {
463 unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
464
465 /* Depth is the the last accessible layer on Gfx9.
466 * The hw doesn't need to know the total number of layers.
467 */
468 if (type == V_008F1C_SQ_RSRC_IMG_3D)
469 state[4] |= S_008F20_DEPTH(depth - 1);
470 else
471 state[4] |= S_008F20_DEPTH(last_layer);
472
473 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
474 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
475 util_logbase2(image->info.samples) :
476 image->info.levels - 1);
477 } else {
478 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
479 state[4] |= S_008F20_DEPTH(depth - 1);
480 state[5] |= S_008F24_LAST_ARRAY(last_layer);
481 }
482 if (image->dcc_offset) {
483 unsigned swap = radv_translate_colorswap(vk_format, FALSE);
484
485 state[6] = S_008F28_ALPHA_IS_ON_MSB(swap <= 1);
486 } else {
487 /* The last dword is unused by hw. The shader uses it to clear
488 * bits in the first dword of sampler state.
489 */
490 if (device->physical_device->rad_info.chip_class <= CIK && image->info.samples <= 1) {
491 if (first_level == last_level)
492 state[7] = C_008F30_MAX_ANISO_RATIO;
493 else
494 state[7] = 0xffffffff;
495 }
496 }
497
498 /* Initialize the sampler view for FMASK. */
499 if (image->fmask.size) {
500 uint32_t fmask_format, num_format;
501 uint64_t gpu_address = radv_buffer_get_va(image->bo);
502 uint64_t va;
503
504 va = gpu_address + image->offset + image->fmask.offset;
505
506 if (device->physical_device->rad_info.chip_class >= GFX9) {
507 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
508 switch (image->info.samples) {
509 case 2:
510 num_format = V_008F14_IMG_FMASK_8_2_2;
511 break;
512 case 4:
513 num_format = V_008F14_IMG_FMASK_8_4_4;
514 break;
515 case 8:
516 num_format = V_008F14_IMG_FMASK_32_8_8;
517 break;
518 default:
519 unreachable("invalid nr_samples");
520 }
521 } else {
522 switch (image->info.samples) {
523 case 2:
524 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
525 break;
526 case 4:
527 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
528 break;
529 case 8:
530 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
531 break;
532 default:
533 assert(0);
534 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
535 }
536 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
537 }
538
539 fmask_state[0] = va >> 8;
540 fmask_state[0] |= image->fmask.tile_swizzle;
541 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
542 S_008F14_DATA_FORMAT_GFX6(fmask_format) |
543 S_008F14_NUM_FORMAT_GFX6(num_format);
544 fmask_state[2] = S_008F18_WIDTH(width - 1) |
545 S_008F18_HEIGHT(height - 1);
546 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
547 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
548 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
549 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
550 S_008F1C_TYPE(radv_tex_dim(image->type, view_type, 1, 0, false, false));
551 fmask_state[4] = 0;
552 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
553 fmask_state[6] = 0;
554 fmask_state[7] = 0;
555
556 if (device->physical_device->rad_info.chip_class >= GFX9) {
557 fmask_state[3] |= S_008F1C_SW_MODE(image->surface.u.gfx9.fmask.swizzle_mode);
558 fmask_state[4] |= S_008F20_DEPTH(last_layer) |
559 S_008F20_PITCH_GFX9(image->surface.u.gfx9.fmask.epitch);
560 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(image->surface.u.gfx9.cmask.pipe_aligned) |
561 S_008F24_META_RB_ALIGNED(image->surface.u.gfx9.cmask.rb_aligned);
562 } else {
563 fmask_state[3] |= S_008F1C_TILING_INDEX(image->fmask.tile_mode_index);
564 fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
565 S_008F20_PITCH_GFX6(image->fmask.pitch_in_pixels - 1);
566 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
567 }
568 } else if (fmask_state)
569 memset(fmask_state, 0, 8 * 4);
570 }
571
572 static void
radv_query_opaque_metadata(struct radv_device * device,struct radv_image * image,struct radeon_bo_metadata * md)573 radv_query_opaque_metadata(struct radv_device *device,
574 struct radv_image *image,
575 struct radeon_bo_metadata *md)
576 {
577 static const VkComponentMapping fixedmapping;
578 uint32_t desc[8], i;
579
580 /* Metadata image format format version 1:
581 * [0] = 1 (metadata format identifier)
582 * [1] = (VENDOR_ID << 16) | PCI_ID
583 * [2:9] = image descriptor for the whole resource
584 * [2] is always 0, because the base address is cleared
585 * [9] is the DCC offset bits [39:8] from the beginning of
586 * the buffer
587 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
588 */
589 md->metadata[0] = 1; /* metadata image format version 1 */
590
591 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
592 md->metadata[1] = si_get_bo_metadata_word1(device);
593
594
595 si_make_texture_descriptor(device, image, false,
596 (VkImageViewType)image->type, image->vk_format,
597 &fixedmapping, 0, image->info.levels - 1, 0,
598 image->info.array_size,
599 image->info.width, image->info.height,
600 image->info.depth,
601 desc, NULL);
602
603 si_set_mutable_tex_desc_fields(device, image, &image->surface.u.legacy.level[0], 0, 0,
604 image->surface.blk_w, false, false, desc);
605
606 /* Clear the base address and set the relative DCC offset. */
607 desc[0] = 0;
608 desc[1] &= C_008F14_BASE_ADDRESS_HI;
609 desc[7] = image->dcc_offset >> 8;
610
611 /* Dwords [2:9] contain the image descriptor. */
612 memcpy(&md->metadata[2], desc, sizeof(desc));
613
614 /* Dwords [10:..] contain the mipmap level offsets. */
615 if (device->physical_device->rad_info.chip_class <= VI) {
616 for (i = 0; i <= image->info.levels - 1; i++)
617 md->metadata[10+i] = image->surface.u.legacy.level[i].offset >> 8;
618 md->size_metadata = (11 + image->info.levels - 1) * 4;
619 }
620 }
621
622 void
radv_init_metadata(struct radv_device * device,struct radv_image * image,struct radeon_bo_metadata * metadata)623 radv_init_metadata(struct radv_device *device,
624 struct radv_image *image,
625 struct radeon_bo_metadata *metadata)
626 {
627 struct radeon_surf *surface = &image->surface;
628
629 memset(metadata, 0, sizeof(*metadata));
630
631 if (device->physical_device->rad_info.chip_class >= GFX9) {
632 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
633 } else {
634 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
635 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
636 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
637 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
638 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
639 metadata->u.legacy.bankw = surface->u.legacy.bankw;
640 metadata->u.legacy.bankh = surface->u.legacy.bankh;
641 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
642 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
643 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
644 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
645 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
646 }
647 radv_query_opaque_metadata(device, image, metadata);
648 }
649
650 /* The number of samples can be specified independently of the texture. */
651 static void
radv_image_get_fmask_info(struct radv_device * device,struct radv_image * image,unsigned nr_samples,struct radv_fmask_info * out)652 radv_image_get_fmask_info(struct radv_device *device,
653 struct radv_image *image,
654 unsigned nr_samples,
655 struct radv_fmask_info *out)
656 {
657 /* FMASK is allocated like an ordinary texture. */
658 struct radeon_surf fmask = {};
659 struct ac_surf_info info = image->info;
660 memset(out, 0, sizeof(*out));
661
662 if (device->physical_device->rad_info.chip_class >= GFX9) {
663 out->alignment = image->surface.u.gfx9.fmask_alignment;
664 out->size = image->surface.u.gfx9.fmask_size;
665 return;
666 }
667
668 fmask.blk_w = image->surface.blk_w;
669 fmask.blk_h = image->surface.blk_h;
670 info.samples = 1;
671 fmask.flags = image->surface.flags | RADEON_SURF_FMASK;
672
673 if (!image->shareable)
674 info.surf_index = &device->fmask_mrt_offset_counter;
675
676 /* Force 2D tiling if it wasn't set. This may occur when creating
677 * FMASK for MSAA resolve on R6xx. On R6xx, the single-sample
678 * destination buffer must have an FMASK too. */
679 fmask.flags = RADEON_SURF_CLR(fmask.flags, MODE);
680 fmask.flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
681
682 switch (nr_samples) {
683 case 2:
684 case 4:
685 fmask.bpe = 1;
686 break;
687 case 8:
688 fmask.bpe = 4;
689 break;
690 default:
691 return;
692 }
693
694 device->ws->surface_init(device->ws, &info, &fmask);
695 assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
696
697 out->slice_tile_max = (fmask.u.legacy.level[0].nblk_x * fmask.u.legacy.level[0].nblk_y) / 64;
698 if (out->slice_tile_max)
699 out->slice_tile_max -= 1;
700
701 out->tile_mode_index = fmask.u.legacy.tiling_index[0];
702 out->pitch_in_pixels = fmask.u.legacy.level[0].nblk_x;
703 out->bank_height = fmask.u.legacy.bankh;
704 out->tile_swizzle = fmask.tile_swizzle;
705 out->alignment = MAX2(256, fmask.surf_alignment);
706 out->size = fmask.surf_size;
707
708 assert(!out->tile_swizzle || !image->shareable);
709 }
710
711 static void
radv_image_alloc_fmask(struct radv_device * device,struct radv_image * image)712 radv_image_alloc_fmask(struct radv_device *device,
713 struct radv_image *image)
714 {
715 radv_image_get_fmask_info(device, image, image->info.samples, &image->fmask);
716
717 image->fmask.offset = align64(image->size, image->fmask.alignment);
718 image->size = image->fmask.offset + image->fmask.size;
719 image->alignment = MAX2(image->alignment, image->fmask.alignment);
720 }
721
722 static void
radv_image_get_cmask_info(struct radv_device * device,struct radv_image * image,struct radv_cmask_info * out)723 radv_image_get_cmask_info(struct radv_device *device,
724 struct radv_image *image,
725 struct radv_cmask_info *out)
726 {
727 unsigned pipe_interleave_bytes = device->physical_device->rad_info.pipe_interleave_bytes;
728 unsigned num_pipes = device->physical_device->rad_info.num_tile_pipes;
729 unsigned cl_width, cl_height;
730
731 if (device->physical_device->rad_info.chip_class >= GFX9) {
732 out->alignment = image->surface.u.gfx9.cmask_alignment;
733 out->size = image->surface.u.gfx9.cmask_size;
734 return;
735 }
736
737 switch (num_pipes) {
738 case 2:
739 cl_width = 32;
740 cl_height = 16;
741 break;
742 case 4:
743 cl_width = 32;
744 cl_height = 32;
745 break;
746 case 8:
747 cl_width = 64;
748 cl_height = 32;
749 break;
750 case 16: /* Hawaii */
751 cl_width = 64;
752 cl_height = 64;
753 break;
754 default:
755 assert(0);
756 return;
757 }
758
759 unsigned base_align = num_pipes * pipe_interleave_bytes;
760
761 unsigned width = align(image->info.width, cl_width*8);
762 unsigned height = align(image->info.height, cl_height*8);
763 unsigned slice_elements = (width * height) / (8*8);
764
765 /* Each element of CMASK is a nibble. */
766 unsigned slice_bytes = slice_elements / 2;
767
768 out->slice_tile_max = (width * height) / (128*128);
769 if (out->slice_tile_max)
770 out->slice_tile_max -= 1;
771
772 out->alignment = MAX2(256, base_align);
773 out->size = (image->type == VK_IMAGE_TYPE_3D ? image->info.depth : image->info.array_size) *
774 align(slice_bytes, base_align);
775 }
776
777 static void
radv_image_alloc_cmask(struct radv_device * device,struct radv_image * image)778 radv_image_alloc_cmask(struct radv_device *device,
779 struct radv_image *image)
780 {
781 uint32_t clear_value_size = 0;
782 radv_image_get_cmask_info(device, image, &image->cmask);
783
784 image->cmask.offset = align64(image->size, image->cmask.alignment);
785 /* + 8 for storing the clear values */
786 if (!image->clear_value_offset) {
787 image->clear_value_offset = image->cmask.offset + image->cmask.size;
788 clear_value_size = 8;
789 }
790 image->size = image->cmask.offset + image->cmask.size + clear_value_size;
791 image->alignment = MAX2(image->alignment, image->cmask.alignment);
792 }
793
794 static void
radv_image_alloc_dcc(struct radv_image * image)795 radv_image_alloc_dcc(struct radv_image *image)
796 {
797 image->dcc_offset = align64(image->size, image->surface.dcc_alignment);
798 /* + 16 for storing the clear values + dcc pred */
799 image->clear_value_offset = image->dcc_offset + image->surface.dcc_size;
800 image->dcc_pred_offset = image->clear_value_offset + 8;
801 image->size = image->dcc_offset + image->surface.dcc_size + 16;
802 image->alignment = MAX2(image->alignment, image->surface.dcc_alignment);
803 }
804
805 static void
radv_image_alloc_htile(struct radv_image * image)806 radv_image_alloc_htile(struct radv_image *image)
807 {
808 image->htile_offset = align64(image->size, image->surface.htile_alignment);
809
810 /* + 8 for storing the clear values */
811 image->clear_value_offset = image->htile_offset + image->surface.htile_size;
812 image->size = image->clear_value_offset + 8;
813 image->alignment = align64(image->alignment, image->surface.htile_alignment);
814 }
815
816 static inline bool
radv_image_can_enable_dcc_or_cmask(struct radv_image * image)817 radv_image_can_enable_dcc_or_cmask(struct radv_image *image)
818 {
819 if (image->info.samples <= 1 &&
820 image->info.width * image->info.height <= 512 * 512) {
821 /* Do not enable CMASK or DCC for small surfaces where the cost
822 * of the eliminate pass can be higher than the benefit of fast
823 * clear. RadeonSI does this, but the image threshold is
824 * different.
825 */
826 return false;
827 }
828
829 return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
830 (image->exclusive || image->queue_family_mask == 1);
831 }
832
833 static inline bool
radv_image_can_enable_dcc(struct radv_image * image)834 radv_image_can_enable_dcc(struct radv_image *image)
835 {
836 return radv_image_can_enable_dcc_or_cmask(image) &&
837 image->surface.dcc_size;
838 }
839
840 static inline bool
radv_image_can_enable_cmask(struct radv_image * image)841 radv_image_can_enable_cmask(struct radv_image *image)
842 {
843 if (image->surface.bpe > 8 && image->info.samples == 1) {
844 /* Do not enable CMASK for non-MSAA images (fast color clear)
845 * because 128 bit formats are not supported, but FMASK might
846 * still be used.
847 */
848 return false;
849 }
850
851 return radv_image_can_enable_dcc_or_cmask(image) &&
852 image->info.levels == 1 &&
853 image->info.depth == 1 &&
854 !image->surface.is_linear;
855 }
856
857 static inline bool
radv_image_can_enable_fmask(struct radv_image * image)858 radv_image_can_enable_fmask(struct radv_image *image)
859 {
860 return image->info.samples > 1 && vk_format_is_color(image->vk_format);
861 }
862
863 static inline bool
radv_image_can_enable_htile(struct radv_image * image)864 radv_image_can_enable_htile(struct radv_image *image)
865 {
866 return image->info.levels == 1 && vk_format_is_depth(image->vk_format);
867 }
868
869 VkResult
radv_image_create(VkDevice _device,const struct radv_image_create_info * create_info,const VkAllocationCallbacks * alloc,VkImage * pImage)870 radv_image_create(VkDevice _device,
871 const struct radv_image_create_info *create_info,
872 const VkAllocationCallbacks* alloc,
873 VkImage *pImage)
874 {
875 RADV_FROM_HANDLE(radv_device, device, _device);
876 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
877 struct radv_image *image = NULL;
878 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
879
880 radv_assert(pCreateInfo->mipLevels > 0);
881 radv_assert(pCreateInfo->arrayLayers > 0);
882 radv_assert(pCreateInfo->samples > 0);
883 radv_assert(pCreateInfo->extent.width > 0);
884 radv_assert(pCreateInfo->extent.height > 0);
885 radv_assert(pCreateInfo->extent.depth > 0);
886
887 image = vk_zalloc2(&device->alloc, alloc, sizeof(*image), 8,
888 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
889 if (!image)
890 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
891
892 image->type = pCreateInfo->imageType;
893 image->info.width = pCreateInfo->extent.width;
894 image->info.height = pCreateInfo->extent.height;
895 image->info.depth = pCreateInfo->extent.depth;
896 image->info.samples = pCreateInfo->samples;
897 image->info.array_size = pCreateInfo->arrayLayers;
898 image->info.levels = pCreateInfo->mipLevels;
899
900 image->vk_format = pCreateInfo->format;
901 image->tiling = pCreateInfo->tiling;
902 image->usage = pCreateInfo->usage;
903 image->flags = pCreateInfo->flags;
904
905 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
906 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
907 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
908 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL_KHR)
909 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
910 else
911 image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
912 }
913
914 image->shareable = vk_find_struct_const(pCreateInfo->pNext,
915 EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHR) != NULL;
916 if (!vk_format_is_depth(pCreateInfo->format) && !create_info->scanout && !image->shareable) {
917 image->info.surf_index = &device->image_mrt_offset_counter;
918 }
919
920 radv_init_surface(device, &image->surface, create_info);
921
922 device->ws->surface_init(device->ws, &image->info, &image->surface);
923
924 image->size = image->surface.surf_size;
925 image->alignment = image->surface.surf_alignment;
926
927 if (!create_info->no_metadata_planes) {
928 /* Try to enable DCC first. */
929 if (radv_image_can_enable_dcc(image)) {
930 radv_image_alloc_dcc(image);
931 } else {
932 /* When DCC cannot be enabled, try CMASK. */
933 image->surface.dcc_size = 0;
934 if (radv_image_can_enable_cmask(image)) {
935 radv_image_alloc_cmask(device, image);
936 }
937 }
938
939 /* Try to enable FMASK for multisampled images. */
940 if (radv_image_can_enable_fmask(image)) {
941 radv_image_alloc_fmask(device, image);
942 } else {
943 /* Otherwise, try to enable HTILE for depth surfaces. */
944 if (radv_image_can_enable_htile(image) &&
945 !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
946 radv_image_alloc_htile(image);
947 image->tc_compatible_htile = image->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
948 } else {
949 image->surface.htile_size = 0;
950 }
951 }
952 } else {
953 image->surface.dcc_size = 0;
954 image->surface.htile_size = 0;
955 }
956
957 if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
958 image->alignment = MAX2(image->alignment, 4096);
959 image->size = align64(image->size, image->alignment);
960 image->offset = 0;
961
962 image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
963 0, RADEON_FLAG_VIRTUAL);
964 if (!image->bo) {
965 vk_free2(&device->alloc, alloc, image);
966 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
967 }
968 }
969
970 *pImage = radv_image_to_handle(image);
971
972 return VK_SUCCESS;
973 }
974
975 static void
radv_image_view_make_descriptor(struct radv_image_view * iview,struct radv_device * device,const VkComponentMapping * components,bool is_storage_image)976 radv_image_view_make_descriptor(struct radv_image_view *iview,
977 struct radv_device *device,
978 const VkComponentMapping *components,
979 bool is_storage_image)
980 {
981 struct radv_image *image = iview->image;
982 bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
983 uint32_t blk_w;
984 uint32_t *descriptor;
985 uint32_t hw_level = 0;
986
987 if (is_storage_image) {
988 descriptor = iview->storage_descriptor;
989 } else {
990 descriptor = iview->descriptor;
991 }
992
993 assert(image->surface.blk_w % vk_format_get_blockwidth(image->vk_format) == 0);
994 blk_w = image->surface.blk_w / vk_format_get_blockwidth(image->vk_format) * vk_format_get_blockwidth(iview->vk_format);
995
996 if (device->physical_device->rad_info.chip_class >= GFX9)
997 hw_level = iview->base_mip;
998 si_make_texture_descriptor(device, image, is_storage_image,
999 iview->type,
1000 iview->vk_format,
1001 components,
1002 hw_level, hw_level + iview->level_count - 1,
1003 iview->base_layer,
1004 iview->base_layer + iview->layer_count - 1,
1005 iview->extent.width,
1006 iview->extent.height,
1007 iview->extent.depth,
1008 descriptor,
1009 descriptor + 8);
1010
1011 const struct legacy_surf_level *base_level_info = NULL;
1012 if (device->physical_device->rad_info.chip_class <= GFX9) {
1013 if (is_stencil)
1014 base_level_info = &image->surface.u.legacy.stencil_level[iview->base_mip];
1015 else
1016 base_level_info = &image->surface.u.legacy.level[iview->base_mip];
1017 }
1018 si_set_mutable_tex_desc_fields(device, image,
1019 base_level_info,
1020 iview->base_mip,
1021 iview->base_mip,
1022 blk_w, is_stencil, is_storage_image, descriptor);
1023 }
1024
1025 void
radv_image_view_init(struct radv_image_view * iview,struct radv_device * device,const VkImageViewCreateInfo * pCreateInfo)1026 radv_image_view_init(struct radv_image_view *iview,
1027 struct radv_device *device,
1028 const VkImageViewCreateInfo* pCreateInfo)
1029 {
1030 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
1031 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
1032
1033 switch (image->type) {
1034 case VK_IMAGE_TYPE_1D:
1035 case VK_IMAGE_TYPE_2D:
1036 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->info.array_size);
1037 break;
1038 case VK_IMAGE_TYPE_3D:
1039 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1
1040 <= radv_minify(image->info.depth, range->baseMipLevel));
1041 break;
1042 default:
1043 unreachable("bad VkImageType");
1044 }
1045 iview->image = image;
1046 iview->bo = image->bo;
1047 iview->type = pCreateInfo->viewType;
1048 iview->vk_format = pCreateInfo->format;
1049 iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
1050
1051 if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1052 iview->vk_format = vk_format_stencil_only(iview->vk_format);
1053 } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
1054 iview->vk_format = vk_format_depth_only(iview->vk_format);
1055 }
1056
1057 if (device->physical_device->rad_info.chip_class >= GFX9) {
1058 iview->extent = (VkExtent3D) {
1059 .width = image->info.width,
1060 .height = image->info.height,
1061 .depth = image->info.depth,
1062 };
1063 } else {
1064 iview->extent = (VkExtent3D) {
1065 .width = radv_minify(image->info.width , range->baseMipLevel),
1066 .height = radv_minify(image->info.height, range->baseMipLevel),
1067 .depth = radv_minify(image->info.depth , range->baseMipLevel),
1068 };
1069 }
1070
1071 if (iview->vk_format != image->vk_format) {
1072 unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
1073 unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
1074 unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
1075 unsigned img_bh = vk_format_get_blockheight(image->vk_format);
1076
1077 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
1078 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
1079
1080 /* Comment ported from amdvlk -
1081 * If we have the following image:
1082 * Uncompressed pixels Compressed block sizes (4x4)
1083 * mip0: 22 x 22 6 x 6
1084 * mip1: 11 x 11 3 x 3
1085 * mip2: 5 x 5 2 x 2
1086 * mip3: 2 x 2 1 x 1
1087 * mip4: 1 x 1 1 x 1
1088 *
1089 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1090 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1091 * divide-by-two integer math):
1092 * mip0: 6x6
1093 * mip1: 3x3
1094 * mip2: 1x1
1095 * mip3: 1x1
1096 *
1097 * This means that mip2 will be missing texels.
1098 *
1099 * Fix this by calculating the base mip's width and height, then convert that, and round it
1100 * back up to get the level 0 size.
1101 * Clamp the converted size between the original values, and next power of two, which
1102 * means we don't oversize the image.
1103 */
1104 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1105 vk_format_is_compressed(image->vk_format) &&
1106 !vk_format_is_compressed(iview->vk_format)) {
1107 unsigned rounded_img_w = util_next_power_of_two(iview->extent.width);
1108 unsigned rounded_img_h = util_next_power_of_two(iview->extent.height);
1109 unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel);
1110 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
1111
1112 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
1113 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
1114
1115 lvl_width <<= range->baseMipLevel;
1116 lvl_height <<= range->baseMipLevel;
1117
1118 iview->extent.width = CLAMP(lvl_width, iview->extent.width, rounded_img_w);
1119 iview->extent.height = CLAMP(lvl_height, iview->extent.height, rounded_img_h);
1120 }
1121 }
1122
1123 iview->base_layer = range->baseArrayLayer;
1124 iview->layer_count = radv_get_layerCount(image, range);
1125 iview->base_mip = range->baseMipLevel;
1126 iview->level_count = radv_get_levelCount(image, range);
1127
1128 radv_image_view_make_descriptor(iview, device, &pCreateInfo->components, false);
1129 radv_image_view_make_descriptor(iview, device, &pCreateInfo->components, true);
1130 }
1131
radv_layout_has_htile(const struct radv_image * image,VkImageLayout layout,unsigned queue_mask)1132 bool radv_layout_has_htile(const struct radv_image *image,
1133 VkImageLayout layout,
1134 unsigned queue_mask)
1135 {
1136 if (image->surface.htile_size && image->tc_compatible_htile)
1137 return layout != VK_IMAGE_LAYOUT_GENERAL;
1138
1139 return image->surface.htile_size &&
1140 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1141 layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) &&
1142 queue_mask == (1u << RADV_QUEUE_GENERAL);
1143 }
1144
radv_layout_is_htile_compressed(const struct radv_image * image,VkImageLayout layout,unsigned queue_mask)1145 bool radv_layout_is_htile_compressed(const struct radv_image *image,
1146 VkImageLayout layout,
1147 unsigned queue_mask)
1148 {
1149 if (image->surface.htile_size && image->tc_compatible_htile)
1150 return layout != VK_IMAGE_LAYOUT_GENERAL;
1151
1152 return image->surface.htile_size &&
1153 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1154 layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) &&
1155 queue_mask == (1u << RADV_QUEUE_GENERAL);
1156 }
1157
radv_layout_can_fast_clear(const struct radv_image * image,VkImageLayout layout,unsigned queue_mask)1158 bool radv_layout_can_fast_clear(const struct radv_image *image,
1159 VkImageLayout layout,
1160 unsigned queue_mask)
1161 {
1162 return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
1163 queue_mask == (1u << RADV_QUEUE_GENERAL);
1164 }
1165
radv_layout_dcc_compressed(const struct radv_image * image,VkImageLayout layout,unsigned queue_mask)1166 bool radv_layout_dcc_compressed(const struct radv_image *image,
1167 VkImageLayout layout,
1168 unsigned queue_mask)
1169 {
1170 /* Don't compress compute transfer dst, as image stores are not supported. */
1171 if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1172 (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
1173 return false;
1174
1175 return image->surface.dcc_size && layout != VK_IMAGE_LAYOUT_GENERAL;
1176 }
1177
1178
radv_image_queue_family_mask(const struct radv_image * image,uint32_t family,uint32_t queue_family)1179 unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
1180 {
1181 if (!image->exclusive)
1182 return image->queue_family_mask;
1183 if (family == VK_QUEUE_FAMILY_EXTERNAL_KHR)
1184 return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1185 if (family == VK_QUEUE_FAMILY_IGNORED)
1186 return 1u << queue_family;
1187 return 1u << family;
1188 }
1189
1190 VkResult
radv_CreateImage(VkDevice device,const VkImageCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkImage * pImage)1191 radv_CreateImage(VkDevice device,
1192 const VkImageCreateInfo *pCreateInfo,
1193 const VkAllocationCallbacks *pAllocator,
1194 VkImage *pImage)
1195 {
1196 #ifdef ANDROID
1197 const VkNativeBufferANDROID *gralloc_info =
1198 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
1199
1200 if (gralloc_info)
1201 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info,
1202 pAllocator, pImage);
1203 #endif
1204
1205 const struct wsi_image_create_info *wsi_info =
1206 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
1207 bool scanout = wsi_info && wsi_info->scanout;
1208
1209 return radv_image_create(device,
1210 &(struct radv_image_create_info) {
1211 .vk_info = pCreateInfo,
1212 .scanout = scanout,
1213 },
1214 pAllocator,
1215 pImage);
1216 }
1217
1218 void
radv_DestroyImage(VkDevice _device,VkImage _image,const VkAllocationCallbacks * pAllocator)1219 radv_DestroyImage(VkDevice _device, VkImage _image,
1220 const VkAllocationCallbacks *pAllocator)
1221 {
1222 RADV_FROM_HANDLE(radv_device, device, _device);
1223 RADV_FROM_HANDLE(radv_image, image, _image);
1224
1225 if (!image)
1226 return;
1227
1228 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)
1229 device->ws->buffer_destroy(image->bo);
1230
1231 if (image->owned_memory != VK_NULL_HANDLE)
1232 radv_FreeMemory(_device, image->owned_memory, pAllocator);
1233
1234 vk_free2(&device->alloc, pAllocator, image);
1235 }
1236
radv_GetImageSubresourceLayout(VkDevice _device,VkImage _image,const VkImageSubresource * pSubresource,VkSubresourceLayout * pLayout)1237 void radv_GetImageSubresourceLayout(
1238 VkDevice _device,
1239 VkImage _image,
1240 const VkImageSubresource* pSubresource,
1241 VkSubresourceLayout* pLayout)
1242 {
1243 RADV_FROM_HANDLE(radv_image, image, _image);
1244 RADV_FROM_HANDLE(radv_device, device, _device);
1245 int level = pSubresource->mipLevel;
1246 int layer = pSubresource->arrayLayer;
1247 struct radeon_surf *surface = &image->surface;
1248
1249 if (device->physical_device->rad_info.chip_class >= GFX9) {
1250 pLayout->offset = surface->u.gfx9.offset[level] + surface->u.gfx9.surf_slice_size * layer;
1251 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe;
1252 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
1253 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
1254 pLayout->size = surface->u.gfx9.surf_slice_size;
1255 if (image->type == VK_IMAGE_TYPE_3D)
1256 pLayout->size *= u_minify(image->info.depth, level);
1257 } else {
1258 pLayout->offset = surface->u.legacy.level[level].offset + (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
1259 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
1260 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1261 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1262 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1263 if (image->type == VK_IMAGE_TYPE_3D)
1264 pLayout->size *= u_minify(image->info.depth, level);
1265 }
1266 }
1267
1268
1269 VkResult
radv_CreateImageView(VkDevice _device,const VkImageViewCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkImageView * pView)1270 radv_CreateImageView(VkDevice _device,
1271 const VkImageViewCreateInfo *pCreateInfo,
1272 const VkAllocationCallbacks *pAllocator,
1273 VkImageView *pView)
1274 {
1275 RADV_FROM_HANDLE(radv_device, device, _device);
1276 struct radv_image_view *view;
1277
1278 view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
1279 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1280 if (view == NULL)
1281 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1282
1283 radv_image_view_init(view, device, pCreateInfo);
1284
1285 *pView = radv_image_view_to_handle(view);
1286
1287 return VK_SUCCESS;
1288 }
1289
1290 void
radv_DestroyImageView(VkDevice _device,VkImageView _iview,const VkAllocationCallbacks * pAllocator)1291 radv_DestroyImageView(VkDevice _device, VkImageView _iview,
1292 const VkAllocationCallbacks *pAllocator)
1293 {
1294 RADV_FROM_HANDLE(radv_device, device, _device);
1295 RADV_FROM_HANDLE(radv_image_view, iview, _iview);
1296
1297 if (!iview)
1298 return;
1299 vk_free2(&device->alloc, pAllocator, iview);
1300 }
1301
radv_buffer_view_init(struct radv_buffer_view * view,struct radv_device * device,const VkBufferViewCreateInfo * pCreateInfo)1302 void radv_buffer_view_init(struct radv_buffer_view *view,
1303 struct radv_device *device,
1304 const VkBufferViewCreateInfo* pCreateInfo)
1305 {
1306 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
1307
1308 view->bo = buffer->bo;
1309 view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
1310 buffer->size - pCreateInfo->offset : pCreateInfo->range;
1311 view->vk_format = pCreateInfo->format;
1312
1313 radv_make_buffer_descriptor(device, buffer, view->vk_format,
1314 pCreateInfo->offset, view->range, view->state);
1315 }
1316
1317 VkResult
radv_CreateBufferView(VkDevice _device,const VkBufferViewCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBufferView * pView)1318 radv_CreateBufferView(VkDevice _device,
1319 const VkBufferViewCreateInfo *pCreateInfo,
1320 const VkAllocationCallbacks *pAllocator,
1321 VkBufferView *pView)
1322 {
1323 RADV_FROM_HANDLE(radv_device, device, _device);
1324 struct radv_buffer_view *view;
1325
1326 view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
1327 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1328 if (!view)
1329 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1330
1331 radv_buffer_view_init(view, device, pCreateInfo);
1332
1333 *pView = radv_buffer_view_to_handle(view);
1334
1335 return VK_SUCCESS;
1336 }
1337
1338 void
radv_DestroyBufferView(VkDevice _device,VkBufferView bufferView,const VkAllocationCallbacks * pAllocator)1339 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
1340 const VkAllocationCallbacks *pAllocator)
1341 {
1342 RADV_FROM_HANDLE(radv_device, device, _device);
1343 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
1344
1345 if (!view)
1346 return;
1347
1348 vk_free2(&device->alloc, pAllocator, view);
1349 }
1350