1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 #include "radv_meta.h"
25 #include "nir/nir_builder.h"
26 
27 /*
28  * GFX queue: Compute shader implementation of image->buffer copy
29  * Compute queue: implementation also of buffer->image, image->image, and image clear.
30  */
31 
32 /* GFX9 needs to use a 3D sampler to access 3D resources, so the shader has the options
33  * for that.
34  */
35 static nir_shader *
build_nir_itob_compute_shader(struct radv_device * dev,bool is_3d)36 build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
37 {
38 	nir_builder b;
39 	enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
40 	const struct glsl_type *sampler_type = glsl_sampler_type(dim,
41 								 false,
42 								 false,
43 								 GLSL_TYPE_FLOAT);
44 	const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF,
45 							   false,
46 							   GLSL_TYPE_FLOAT);
47 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
48 	b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_itob_cs_3d" : "meta_itob_cs");
49 	b.shader->info.cs.local_size[0] = 16;
50 	b.shader->info.cs.local_size[1] = 16;
51 	b.shader->info.cs.local_size[2] = 1;
52 	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
53 						      sampler_type, "s_tex");
54 	input_img->data.descriptor_set = 0;
55 	input_img->data.binding = 0;
56 
57 	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
58 						       img_type, "out_img");
59 	output_img->data.descriptor_set = 0;
60 	output_img->data.binding = 1;
61 
62 	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
63 	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
64 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
65 						b.shader->info.cs.local_size[0],
66 						b.shader->info.cs.local_size[1],
67 						b.shader->info.cs.local_size[2], 0);
68 
69 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
70 
71 
72 
73 	nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
74 	nir_intrinsic_set_base(offset, 0);
75 	nir_intrinsic_set_range(offset, 16);
76 	offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
77 	offset->num_components = is_3d ? 3 : 2;
78 	nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, "offset");
79 	nir_builder_instr_insert(&b, &offset->instr);
80 
81 	nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
82 	nir_intrinsic_set_base(stride, 0);
83 	nir_intrinsic_set_range(stride, 16);
84 	stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
85 	stride->num_components = 1;
86 	nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
87 	nir_builder_instr_insert(&b, &stride->instr);
88 
89 	nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
90 	nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
91 
92 	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
93 	tex->sampler_dim = dim;
94 	tex->op = nir_texop_txf;
95 	tex->src[0].src_type = nir_tex_src_coord;
96 	tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, is_3d ? 0x7 : 0x3));
97 	tex->src[1].src_type = nir_tex_src_lod;
98 	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
99 	tex->src[2].src_type = nir_tex_src_texture_deref;
100 	tex->src[2].src = nir_src_for_ssa(input_img_deref);
101 	tex->dest_type = nir_type_float;
102 	tex->is_array = false;
103 	tex->coord_components = is_3d ? 3 : 2;
104 
105 	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
106 	nir_builder_instr_insert(&b, &tex->instr);
107 
108 	nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
109 	nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
110 
111 	nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
112 	tmp = nir_iadd(&b, tmp, pos_x);
113 
114 	nir_ssa_def *coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
115 
116 	nir_ssa_def *outval = &tex->dest.ssa;
117 	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
118 	store->num_components = 4;
119 	store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
120 	store->src[1] = nir_src_for_ssa(coord);
121 	store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
122 	store->src[3] = nir_src_for_ssa(outval);
123 	store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
124 
125 	nir_builder_instr_insert(&b, &store->instr);
126 	return b.shader;
127 }
128 
129 /* Image to buffer - don't write use image accessors */
130 static VkResult
radv_device_init_meta_itob_state(struct radv_device * device)131 radv_device_init_meta_itob_state(struct radv_device *device)
132 {
133 	VkResult result;
134 	struct radv_shader_module cs = { .nir = NULL };
135 	struct radv_shader_module cs_3d = { .nir = NULL };
136 
137 	cs.nir = build_nir_itob_compute_shader(device, false);
138 	if (device->physical_device->rad_info.chip_class >= GFX9)
139 		cs_3d.nir = build_nir_itob_compute_shader(device, true);
140 
141 	/*
142 	 * two descriptors one for the image being sampled
143 	 * one for the buffer being written.
144 	 */
145 	VkDescriptorSetLayoutCreateInfo ds_create_info = {
146 		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
147 		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
148 		.bindingCount = 2,
149 		.pBindings = (VkDescriptorSetLayoutBinding[]) {
150 			{
151 				.binding = 0,
152 				.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
153 				.descriptorCount = 1,
154 				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
155 				.pImmutableSamplers = NULL
156 			},
157 			{
158 				.binding = 1,
159 				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
160 				.descriptorCount = 1,
161 				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
162 				.pImmutableSamplers = NULL
163 			},
164 		}
165 	};
166 
167 	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
168 						&ds_create_info,
169 						&device->meta_state.alloc,
170 						&device->meta_state.itob.img_ds_layout);
171 	if (result != VK_SUCCESS)
172 		goto fail;
173 
174 
175 	VkPipelineLayoutCreateInfo pl_create_info = {
176 		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
177 		.setLayoutCount = 1,
178 		.pSetLayouts = &device->meta_state.itob.img_ds_layout,
179 		.pushConstantRangeCount = 1,
180 		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
181 	};
182 
183 	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
184 					  &pl_create_info,
185 					  &device->meta_state.alloc,
186 					  &device->meta_state.itob.img_p_layout);
187 	if (result != VK_SUCCESS)
188 		goto fail;
189 
190 	/* compute shader */
191 
192 	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
193 		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
194 		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
195 		.module = radv_shader_module_to_handle(&cs),
196 		.pName = "main",
197 		.pSpecializationInfo = NULL,
198 	};
199 
200 	VkComputePipelineCreateInfo vk_pipeline_info = {
201 		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
202 		.stage = pipeline_shader_stage,
203 		.flags = 0,
204 		.layout = device->meta_state.itob.img_p_layout,
205 	};
206 
207 	result = radv_CreateComputePipelines(radv_device_to_handle(device),
208 					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
209 					     1, &vk_pipeline_info, NULL,
210 					     &device->meta_state.itob.pipeline);
211 	if (result != VK_SUCCESS)
212 		goto fail;
213 
214 	if (device->physical_device->rad_info.chip_class >= GFX9) {
215 		VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
216 			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
217 			.stage = VK_SHADER_STAGE_COMPUTE_BIT,
218 			.module = radv_shader_module_to_handle(&cs_3d),
219 			.pName = "main",
220 			.pSpecializationInfo = NULL,
221 		};
222 
223 		VkComputePipelineCreateInfo vk_pipeline_info_3d = {
224 			.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
225 			.stage = pipeline_shader_stage_3d,
226 			.flags = 0,
227 			.layout = device->meta_state.itob.img_p_layout,
228 		};
229 
230 		result = radv_CreateComputePipelines(radv_device_to_handle(device),
231 						     radv_pipeline_cache_to_handle(&device->meta_state.cache),
232 						     1, &vk_pipeline_info_3d, NULL,
233 						     &device->meta_state.itob.pipeline_3d);
234 		if (result != VK_SUCCESS)
235 			goto fail;
236 		ralloc_free(cs_3d.nir);
237 	}
238 	ralloc_free(cs.nir);
239 
240 	return VK_SUCCESS;
241 fail:
242 	ralloc_free(cs.nir);
243 	ralloc_free(cs_3d.nir);
244 	return result;
245 }
246 
247 static void
radv_device_finish_meta_itob_state(struct radv_device * device)248 radv_device_finish_meta_itob_state(struct radv_device *device)
249 {
250 	struct radv_meta_state *state = &device->meta_state;
251 
252 	radv_DestroyPipelineLayout(radv_device_to_handle(device),
253 				   state->itob.img_p_layout, &state->alloc);
254 	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
255 				        state->itob.img_ds_layout,
256 					&state->alloc);
257 	radv_DestroyPipeline(radv_device_to_handle(device),
258 			     state->itob.pipeline, &state->alloc);
259 	if (device->physical_device->rad_info.chip_class >= GFX9)
260 		radv_DestroyPipeline(radv_device_to_handle(device),
261 				     state->itob.pipeline_3d, &state->alloc);
262 }
263 
264 static nir_shader *
build_nir_btoi_compute_shader(struct radv_device * dev,bool is_3d)265 build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
266 {
267 	nir_builder b;
268 	enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
269 	const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
270 							     false,
271 							     false,
272 							     GLSL_TYPE_FLOAT);
273 	const struct glsl_type *img_type = glsl_image_type(dim,
274 							   false,
275 							   GLSL_TYPE_FLOAT);
276 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
277 	b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
278 	b.shader->info.cs.local_size[0] = 16;
279 	b.shader->info.cs.local_size[1] = 16;
280 	b.shader->info.cs.local_size[2] = 1;
281 	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
282 						      buf_type, "s_tex");
283 	input_img->data.descriptor_set = 0;
284 	input_img->data.binding = 0;
285 
286 	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
287 						       img_type, "out_img");
288 	output_img->data.descriptor_set = 0;
289 	output_img->data.binding = 1;
290 
291 	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
292 	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
293 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
294 						b.shader->info.cs.local_size[0],
295 						b.shader->info.cs.local_size[1],
296 						b.shader->info.cs.local_size[2], 0);
297 
298 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
299 
300 	nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
301 	nir_intrinsic_set_base(offset, 0);
302 	nir_intrinsic_set_range(offset, 16);
303 	offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
304 	offset->num_components = is_3d ? 3 : 2;
305 	nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, "offset");
306 	nir_builder_instr_insert(&b, &offset->instr);
307 
308 	nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
309 	nir_intrinsic_set_base(stride, 0);
310 	nir_intrinsic_set_range(stride, 16);
311 	stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
312 	stride->num_components = 1;
313 	nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
314 	nir_builder_instr_insert(&b, &stride->instr);
315 
316 	nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
317 	nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
318 
319 	nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
320 	tmp = nir_iadd(&b, tmp, pos_x);
321 
322 	nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
323 
324 	nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
325 	nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
326 
327 	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
328 	tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
329 	tex->op = nir_texop_txf;
330 	tex->src[0].src_type = nir_tex_src_coord;
331 	tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
332 	tex->src[1].src_type = nir_tex_src_lod;
333 	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
334 	tex->src[2].src_type = nir_tex_src_texture_deref;
335 	tex->src[2].src = nir_src_for_ssa(input_img_deref);
336 	tex->dest_type = nir_type_float;
337 	tex->is_array = false;
338 	tex->coord_components = 1;
339 
340 	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
341 	nir_builder_instr_insert(&b, &tex->instr);
342 
343 	nir_ssa_def *outval = &tex->dest.ssa;
344 	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
345 	store->num_components = 4;
346 	store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
347 	store->src[1] = nir_src_for_ssa(img_coord);
348 	store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
349 	store->src[3] = nir_src_for_ssa(outval);
350 	store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
351 
352 	nir_builder_instr_insert(&b, &store->instr);
353 	return b.shader;
354 }
355 
356 /* Buffer to image - don't write use image accessors */
357 static VkResult
radv_device_init_meta_btoi_state(struct radv_device * device)358 radv_device_init_meta_btoi_state(struct radv_device *device)
359 {
360 	VkResult result;
361 	struct radv_shader_module cs = { .nir = NULL };
362 	struct radv_shader_module cs_3d = { .nir = NULL };
363 	cs.nir = build_nir_btoi_compute_shader(device, false);
364 	if (device->physical_device->rad_info.chip_class >= GFX9)
365 		cs_3d.nir = build_nir_btoi_compute_shader(device, true);
366 	/*
367 	 * two descriptors one for the image being sampled
368 	 * one for the buffer being written.
369 	 */
370 	VkDescriptorSetLayoutCreateInfo ds_create_info = {
371 		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
372 		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
373 		.bindingCount = 2,
374 		.pBindings = (VkDescriptorSetLayoutBinding[]) {
375 			{
376 				.binding = 0,
377 				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
378 				.descriptorCount = 1,
379 				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
380 				.pImmutableSamplers = NULL
381 			},
382 			{
383 				.binding = 1,
384 				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
385 				.descriptorCount = 1,
386 				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
387 				.pImmutableSamplers = NULL
388 			},
389 		}
390 	};
391 
392 	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
393 						&ds_create_info,
394 						&device->meta_state.alloc,
395 						&device->meta_state.btoi.img_ds_layout);
396 	if (result != VK_SUCCESS)
397 		goto fail;
398 
399 
400 	VkPipelineLayoutCreateInfo pl_create_info = {
401 		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
402 		.setLayoutCount = 1,
403 		.pSetLayouts = &device->meta_state.btoi.img_ds_layout,
404 		.pushConstantRangeCount = 1,
405 		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
406 	};
407 
408 	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
409 					  &pl_create_info,
410 					  &device->meta_state.alloc,
411 					  &device->meta_state.btoi.img_p_layout);
412 	if (result != VK_SUCCESS)
413 		goto fail;
414 
415 	/* compute shader */
416 
417 	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
418 		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
419 		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
420 		.module = radv_shader_module_to_handle(&cs),
421 		.pName = "main",
422 		.pSpecializationInfo = NULL,
423 	};
424 
425 	VkComputePipelineCreateInfo vk_pipeline_info = {
426 		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
427 		.stage = pipeline_shader_stage,
428 		.flags = 0,
429 		.layout = device->meta_state.btoi.img_p_layout,
430 	};
431 
432 	result = radv_CreateComputePipelines(radv_device_to_handle(device),
433 					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
434 					     1, &vk_pipeline_info, NULL,
435 					     &device->meta_state.btoi.pipeline);
436 	if (result != VK_SUCCESS)
437 		goto fail;
438 
439 	if (device->physical_device->rad_info.chip_class >= GFX9) {
440 		VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
441 			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
442 			.stage = VK_SHADER_STAGE_COMPUTE_BIT,
443 			.module = radv_shader_module_to_handle(&cs_3d),
444 			.pName = "main",
445 			.pSpecializationInfo = NULL,
446 		};
447 
448 		VkComputePipelineCreateInfo vk_pipeline_info_3d = {
449 			.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
450 			.stage = pipeline_shader_stage_3d,
451 			.flags = 0,
452 			.layout = device->meta_state.btoi.img_p_layout,
453 		};
454 
455 		result = radv_CreateComputePipelines(radv_device_to_handle(device),
456 						     radv_pipeline_cache_to_handle(&device->meta_state.cache),
457 						     1, &vk_pipeline_info_3d, NULL,
458 						     &device->meta_state.btoi.pipeline_3d);
459 		ralloc_free(cs_3d.nir);
460 	}
461 	ralloc_free(cs.nir);
462 
463 	return VK_SUCCESS;
464 fail:
465 	ralloc_free(cs_3d.nir);
466 	ralloc_free(cs.nir);
467 	return result;
468 }
469 
470 static void
radv_device_finish_meta_btoi_state(struct radv_device * device)471 radv_device_finish_meta_btoi_state(struct radv_device *device)
472 {
473 	struct radv_meta_state *state = &device->meta_state;
474 
475 	radv_DestroyPipelineLayout(radv_device_to_handle(device),
476 				   state->btoi.img_p_layout, &state->alloc);
477 	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
478 				        state->btoi.img_ds_layout,
479 					&state->alloc);
480 	radv_DestroyPipeline(radv_device_to_handle(device),
481 			     state->btoi.pipeline, &state->alloc);
482 	radv_DestroyPipeline(radv_device_to_handle(device),
483 			     state->btoi.pipeline_3d, &state->alloc);
484 }
485 
486 /* Buffer to image - special path for R32G32B32 */
487 static nir_shader *
build_nir_btoi_r32g32b32_compute_shader(struct radv_device * dev)488 build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
489 {
490 	nir_builder b;
491 	const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
492 							     false,
493 							     false,
494 							     GLSL_TYPE_FLOAT);
495 	const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF,
496 							   false,
497 							   GLSL_TYPE_FLOAT);
498 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
499 	b.shader->info.name = ralloc_strdup(b.shader, "meta_btoi_r32g32b32_cs");
500 	b.shader->info.cs.local_size[0] = 16;
501 	b.shader->info.cs.local_size[1] = 16;
502 	b.shader->info.cs.local_size[2] = 1;
503 	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
504 						      buf_type, "s_tex");
505 	input_img->data.descriptor_set = 0;
506 	input_img->data.binding = 0;
507 
508 	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
509 						       img_type, "out_img");
510 	output_img->data.descriptor_set = 0;
511 	output_img->data.binding = 1;
512 
513 	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
514 	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
515 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
516 						b.shader->info.cs.local_size[0],
517 						b.shader->info.cs.local_size[1],
518 						b.shader->info.cs.local_size[2], 0);
519 
520 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
521 
522 	nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
523 	nir_intrinsic_set_base(offset, 0);
524 	nir_intrinsic_set_range(offset, 16);
525 	offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
526 	offset->num_components = 2;
527 	nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
528 	nir_builder_instr_insert(&b, &offset->instr);
529 
530 	nir_intrinsic_instr *pitch = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
531 	nir_intrinsic_set_base(pitch, 0);
532 	nir_intrinsic_set_range(pitch, 16);
533 	pitch->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
534 	pitch->num_components = 1;
535 	nir_ssa_dest_init(&pitch->instr, &pitch->dest, 1, 32, "pitch");
536 	nir_builder_instr_insert(&b, &pitch->instr);
537 
538 	nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
539 	nir_intrinsic_set_base(stride, 0);
540 	nir_intrinsic_set_range(stride, 16);
541 	stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
542 	stride->num_components = 1;
543 	nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
544 	nir_builder_instr_insert(&b, &stride->instr);
545 
546 	nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
547 	nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
548 
549 	nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
550 	tmp = nir_iadd(&b, tmp, pos_x);
551 
552 	nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
553 
554 	nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
555 
556 	nir_ssa_def *global_pos =
557 		nir_iadd(&b,
558 			 nir_imul(&b, nir_channel(&b, img_coord, 1), &pitch->dest.ssa),
559 			 nir_imul(&b, nir_channel(&b, img_coord, 0), nir_imm_int(&b, 3)));
560 
561 	nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
562 
563 	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
564 	tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
565 	tex->op = nir_texop_txf;
566 	tex->src[0].src_type = nir_tex_src_coord;
567 	tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
568 	tex->src[1].src_type = nir_tex_src_lod;
569 	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
570 	tex->src[2].src_type = nir_tex_src_texture_deref;
571 	tex->src[2].src = nir_src_for_ssa(input_img_deref);
572 	tex->dest_type = nir_type_float;
573 	tex->is_array = false;
574 	tex->coord_components = 1;
575 	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
576 	nir_builder_instr_insert(&b, &tex->instr);
577 
578 	nir_ssa_def *outval = &tex->dest.ssa;
579 
580 	for (int chan = 0; chan < 3; chan++) {
581 		nir_ssa_def *local_pos =
582                        nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
583 
584                nir_ssa_def *coord =
585                        nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
586 
587 		nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
588 		store->num_components = 1;
589 		store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
590 		store->src[1] = nir_src_for_ssa(coord);
591 		store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
592 		store->src[3] = nir_src_for_ssa(nir_channel(&b, outval, chan));
593 		store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
594 		nir_builder_instr_insert(&b, &store->instr);
595 	}
596 
597 	return b.shader;
598 }
599 
600 static VkResult
radv_device_init_meta_btoi_r32g32b32_state(struct radv_device * device)601 radv_device_init_meta_btoi_r32g32b32_state(struct radv_device *device)
602 {
603 	VkResult result;
604 	struct radv_shader_module cs = { .nir = NULL };
605 
606 	cs.nir = build_nir_btoi_r32g32b32_compute_shader(device);
607 
608 	VkDescriptorSetLayoutCreateInfo ds_create_info = {
609 		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
610 		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
611 		.bindingCount = 2,
612 		.pBindings = (VkDescriptorSetLayoutBinding[]) {
613 			{
614 				.binding = 0,
615 				.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
616 				.descriptorCount = 1,
617 				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
618 				.pImmutableSamplers = NULL
619 			},
620 			{
621 				.binding = 1,
622 				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
623 				.descriptorCount = 1,
624 				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
625 				.pImmutableSamplers = NULL
626 			},
627 		}
628 	};
629 
630 	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
631 						&ds_create_info,
632 						&device->meta_state.alloc,
633 						&device->meta_state.btoi_r32g32b32.img_ds_layout);
634 	if (result != VK_SUCCESS)
635 		goto fail;
636 
637 
638 	VkPipelineLayoutCreateInfo pl_create_info = {
639 		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
640 		.setLayoutCount = 1,
641 		.pSetLayouts = &device->meta_state.btoi_r32g32b32.img_ds_layout,
642 		.pushConstantRangeCount = 1,
643 		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
644 	};
645 
646 	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
647 					  &pl_create_info,
648 					  &device->meta_state.alloc,
649 					  &device->meta_state.btoi_r32g32b32.img_p_layout);
650 	if (result != VK_SUCCESS)
651 		goto fail;
652 
653 	/* compute shader */
654 
655 	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
656 		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
657 		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
658 		.module = radv_shader_module_to_handle(&cs),
659 		.pName = "main",
660 		.pSpecializationInfo = NULL,
661 	};
662 
663 	VkComputePipelineCreateInfo vk_pipeline_info = {
664 		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
665 		.stage = pipeline_shader_stage,
666 		.flags = 0,
667 		.layout = device->meta_state.btoi_r32g32b32.img_p_layout,
668 	};
669 
670 	result = radv_CreateComputePipelines(radv_device_to_handle(device),
671 					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
672 					     1, &vk_pipeline_info, NULL,
673 					     &device->meta_state.btoi_r32g32b32.pipeline);
674 
675 fail:
676 	ralloc_free(cs.nir);
677 	return result;
678 }
679 
680 static void
radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device * device)681 radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device *device)
682 {
683 	struct radv_meta_state *state = &device->meta_state;
684 
685 	radv_DestroyPipelineLayout(radv_device_to_handle(device),
686 				   state->btoi_r32g32b32.img_p_layout, &state->alloc);
687 	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
688 				        state->btoi_r32g32b32.img_ds_layout,
689 					&state->alloc);
690 	radv_DestroyPipeline(radv_device_to_handle(device),
691 			     state->btoi_r32g32b32.pipeline, &state->alloc);
692 }
693 
694 static nir_shader *
build_nir_itoi_compute_shader(struct radv_device * dev,bool is_3d)695 build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d)
696 {
697 	nir_builder b;
698 	enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
699 	const struct glsl_type *buf_type = glsl_sampler_type(dim,
700 							     false,
701 							     false,
702 							     GLSL_TYPE_FLOAT);
703 	const struct glsl_type *img_type = glsl_image_type(dim,
704 							   false,
705 							   GLSL_TYPE_FLOAT);
706 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
707 	b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_itoi_cs_3d" : "meta_itoi_cs");
708 	b.shader->info.cs.local_size[0] = 16;
709 	b.shader->info.cs.local_size[1] = 16;
710 	b.shader->info.cs.local_size[2] = 1;
711 	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
712 						      buf_type, "s_tex");
713 	input_img->data.descriptor_set = 0;
714 	input_img->data.binding = 0;
715 
716 	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
717 						       img_type, "out_img");
718 	output_img->data.descriptor_set = 0;
719 	output_img->data.binding = 1;
720 
721 	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
722 	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
723 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
724 						b.shader->info.cs.local_size[0],
725 						b.shader->info.cs.local_size[1],
726 						b.shader->info.cs.local_size[2], 0);
727 
728 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
729 
730 	nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
731 	nir_intrinsic_set_base(src_offset, 0);
732 	nir_intrinsic_set_range(src_offset, 24);
733 	src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
734 	src_offset->num_components = is_3d ? 3 : 2;
735 	nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, is_3d ? 3 : 2, 32, "src_offset");
736 	nir_builder_instr_insert(&b, &src_offset->instr);
737 
738 	nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
739 	nir_intrinsic_set_base(dst_offset, 0);
740 	nir_intrinsic_set_range(dst_offset, 24);
741 	dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
742 	dst_offset->num_components = is_3d ? 3 : 2;
743 	nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, is_3d ? 3 : 2, 32, "dst_offset");
744 	nir_builder_instr_insert(&b, &dst_offset->instr);
745 
746 	nir_ssa_def *src_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa);
747 	nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
748 
749 	nir_ssa_def *dst_coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
750 
751 	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
752 	tex->sampler_dim = dim;
753 	tex->op = nir_texop_txf;
754 	tex->src[0].src_type = nir_tex_src_coord;
755 	tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, is_3d ? 0x7 : 0x3));
756 	tex->src[1].src_type = nir_tex_src_lod;
757 	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
758 	tex->src[2].src_type = nir_tex_src_texture_deref;
759 	tex->src[2].src = nir_src_for_ssa(input_img_deref);
760 	tex->dest_type = nir_type_float;
761 	tex->is_array = false;
762 	tex->coord_components = is_3d ? 3 : 2;
763 
764 	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
765 	nir_builder_instr_insert(&b, &tex->instr);
766 
767 	nir_ssa_def *outval = &tex->dest.ssa;
768 	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
769 	store->num_components = 4;
770 	store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
771 	store->src[1] = nir_src_for_ssa(dst_coord);
772 	store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
773 	store->src[3] = nir_src_for_ssa(outval);
774 	store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
775 
776 	nir_builder_instr_insert(&b, &store->instr);
777 	return b.shader;
778 }
779 
780 /* image to image - don't write use image accessors */
781 static VkResult
radv_device_init_meta_itoi_state(struct radv_device * device)782 radv_device_init_meta_itoi_state(struct radv_device *device)
783 {
784 	VkResult result;
785 	struct radv_shader_module cs = { .nir = NULL };
786 	struct radv_shader_module cs_3d = { .nir = NULL };
787 	cs.nir = build_nir_itoi_compute_shader(device, false);
788 	if (device->physical_device->rad_info.chip_class >= GFX9)
789 		cs_3d.nir = build_nir_itoi_compute_shader(device, true);
790 	/*
791 	 * two descriptors one for the image being sampled
792 	 * one for the buffer being written.
793 	 */
794 	VkDescriptorSetLayoutCreateInfo ds_create_info = {
795 		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
796 		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
797 		.bindingCount = 2,
798 		.pBindings = (VkDescriptorSetLayoutBinding[]) {
799 			{
800 				.binding = 0,
801 				.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
802 				.descriptorCount = 1,
803 				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
804 				.pImmutableSamplers = NULL
805 			},
806 			{
807 				.binding = 1,
808 				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
809 				.descriptorCount = 1,
810 				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
811 				.pImmutableSamplers = NULL
812 			},
813 		}
814 	};
815 
816 	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
817 						&ds_create_info,
818 						&device->meta_state.alloc,
819 						&device->meta_state.itoi.img_ds_layout);
820 	if (result != VK_SUCCESS)
821 		goto fail;
822 
823 
824 	VkPipelineLayoutCreateInfo pl_create_info = {
825 		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
826 		.setLayoutCount = 1,
827 		.pSetLayouts = &device->meta_state.itoi.img_ds_layout,
828 		.pushConstantRangeCount = 1,
829 		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
830 	};
831 
832 	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
833 					  &pl_create_info,
834 					  &device->meta_state.alloc,
835 					  &device->meta_state.itoi.img_p_layout);
836 	if (result != VK_SUCCESS)
837 		goto fail;
838 
839 	/* compute shader */
840 
841 	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
842 		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
843 		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
844 		.module = radv_shader_module_to_handle(&cs),
845 		.pName = "main",
846 		.pSpecializationInfo = NULL,
847 	};
848 
849 	VkComputePipelineCreateInfo vk_pipeline_info = {
850 		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
851 		.stage = pipeline_shader_stage,
852 		.flags = 0,
853 		.layout = device->meta_state.itoi.img_p_layout,
854 	};
855 
856 	result = radv_CreateComputePipelines(radv_device_to_handle(device),
857 					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
858 					     1, &vk_pipeline_info, NULL,
859 					     &device->meta_state.itoi.pipeline);
860 	if (result != VK_SUCCESS)
861 		goto fail;
862 
863 	if (device->physical_device->rad_info.chip_class >= GFX9) {
864 		VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
865 			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
866 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
867 			.module = radv_shader_module_to_handle(&cs_3d),
868 			.pName = "main",
869 			.pSpecializationInfo = NULL,
870 		};
871 
872 		VkComputePipelineCreateInfo vk_pipeline_info_3d = {
873 			.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
874 			.stage = pipeline_shader_stage_3d,
875 			.flags = 0,
876 			.layout = device->meta_state.itoi.img_p_layout,
877 		};
878 
879 		result = radv_CreateComputePipelines(radv_device_to_handle(device),
880 						     radv_pipeline_cache_to_handle(&device->meta_state.cache),
881 						     1, &vk_pipeline_info_3d, NULL,
882 						     &device->meta_state.itoi.pipeline_3d);
883 
884 		ralloc_free(cs_3d.nir);
885 	}
886 	ralloc_free(cs.nir);
887 
888 	return VK_SUCCESS;
889 fail:
890 	ralloc_free(cs.nir);
891 	ralloc_free(cs_3d.nir);
892 	return result;
893 }
894 
895 static void
radv_device_finish_meta_itoi_state(struct radv_device * device)896 radv_device_finish_meta_itoi_state(struct radv_device *device)
897 {
898 	struct radv_meta_state *state = &device->meta_state;
899 
900 	radv_DestroyPipelineLayout(radv_device_to_handle(device),
901 				   state->itoi.img_p_layout, &state->alloc);
902 	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
903 				        state->itoi.img_ds_layout,
904 					&state->alloc);
905 	radv_DestroyPipeline(radv_device_to_handle(device),
906 			     state->itoi.pipeline, &state->alloc);
907 	if (device->physical_device->rad_info.chip_class >= GFX9)
908 		radv_DestroyPipeline(radv_device_to_handle(device),
909 				     state->itoi.pipeline_3d, &state->alloc);
910 }
911 
912 static nir_shader *
build_nir_itoi_r32g32b32_compute_shader(struct radv_device * dev)913 build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev)
914 {
915 	nir_builder b;
916 	const struct glsl_type *type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
917 							 false,
918 							 false,
919 							 GLSL_TYPE_FLOAT);
920 	const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF,
921 							   false,
922 							   GLSL_TYPE_FLOAT);
923 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
924 	b.shader->info.name = ralloc_strdup(b.shader, "meta_itoi_r32g32b32_cs");
925 	b.shader->info.cs.local_size[0] = 16;
926 	b.shader->info.cs.local_size[1] = 16;
927 	b.shader->info.cs.local_size[2] = 1;
928 	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
929 						      type, "input_img");
930 	input_img->data.descriptor_set = 0;
931 	input_img->data.binding = 0;
932 
933 	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
934 						      img_type, "output_img");
935 	output_img->data.descriptor_set = 0;
936 	output_img->data.binding = 1;
937 
938 	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
939 	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
940 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
941 						b.shader->info.cs.local_size[0],
942 						b.shader->info.cs.local_size[1],
943 						b.shader->info.cs.local_size[2], 0);
944 
945 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
946 
947 	nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
948 	nir_intrinsic_set_base(src_offset, 0);
949 	nir_intrinsic_set_range(src_offset, 24);
950 	src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
951 	src_offset->num_components = 3;
952 	nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 3, 32, "src_offset");
953 	nir_builder_instr_insert(&b, &src_offset->instr);
954 
955 	nir_ssa_def *src_stride = nir_channel(&b, &src_offset->dest.ssa, 2);
956 
957 	nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
958 	nir_intrinsic_set_base(dst_offset, 0);
959 	nir_intrinsic_set_range(dst_offset, 24);
960 	dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
961 	dst_offset->num_components = 3;
962 	nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 3, 32, "dst_offset");
963 	nir_builder_instr_insert(&b, &dst_offset->instr);
964 
965 	nir_ssa_def *dst_stride = nir_channel(&b, &dst_offset->dest.ssa, 2);
966 
967 	nir_ssa_def *src_img_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa);
968 	nir_ssa_def *dst_img_coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
969 
970 	nir_ssa_def *src_global_pos =
971 		nir_iadd(&b,
972 			 nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride),
973 			 nir_imul(&b, nir_channel(&b, src_img_coord, 0), nir_imm_int(&b, 3)));
974 
975 	nir_ssa_def *dst_global_pos =
976 		nir_iadd(&b,
977 			 nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride),
978 			 nir_imul(&b, nir_channel(&b, dst_img_coord, 0), nir_imm_int(&b, 3)));
979 
980 	for (int chan = 0; chan < 3; chan++) {
981 		/* src */
982 		nir_ssa_def *src_local_pos =
983 			nir_iadd(&b, src_global_pos, nir_imm_int(&b, chan));
984 
985 		nir_ssa_def *src_coord =
986 			nir_vec4(&b, src_local_pos, src_local_pos,
987 				 src_local_pos, src_local_pos);
988 
989 		nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
990 
991 		nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
992 		tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
993 		tex->op = nir_texop_txf;
994 		tex->src[0].src_type = nir_tex_src_coord;
995 		tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, 1));
996 		tex->src[1].src_type = nir_tex_src_lod;
997 		tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
998 		tex->src[2].src_type = nir_tex_src_texture_deref;
999 		tex->src[2].src = nir_src_for_ssa(input_img_deref);
1000 		tex->dest_type = nir_type_float;
1001 		tex->is_array = false;
1002 		tex->coord_components = 1;
1003 		nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
1004 		nir_builder_instr_insert(&b, &tex->instr);
1005 
1006 		nir_ssa_def *outval = &tex->dest.ssa;
1007 
1008 		/* dst */
1009 		nir_ssa_def *dst_local_pos =
1010 			nir_iadd(&b, dst_global_pos, nir_imm_int(&b, chan));
1011 
1012 		nir_ssa_def *dst_coord =
1013 			nir_vec4(&b, dst_local_pos, dst_local_pos,
1014 				 dst_local_pos, dst_local_pos);
1015 
1016 		nir_intrinsic_instr *store =
1017 			nir_intrinsic_instr_create(b.shader,
1018 						   nir_intrinsic_image_deref_store);
1019 		store->num_components = 1;
1020 		store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
1021 		store->src[1] = nir_src_for_ssa(dst_coord);
1022 		store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
1023 		store->src[3] = nir_src_for_ssa(nir_channel(&b, outval, 0));
1024 		store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
1025 		nir_builder_instr_insert(&b, &store->instr);
1026 	}
1027 
1028 	return b.shader;
1029 }
1030 
1031 /* Image to image - special path for R32G32B32 */
1032 static VkResult
radv_device_init_meta_itoi_r32g32b32_state(struct radv_device * device)1033 radv_device_init_meta_itoi_r32g32b32_state(struct radv_device *device)
1034 {
1035 	VkResult result;
1036 	struct radv_shader_module cs = { .nir = NULL };
1037 
1038 	cs.nir = build_nir_itoi_r32g32b32_compute_shader(device);
1039 
1040 	VkDescriptorSetLayoutCreateInfo ds_create_info = {
1041 		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1042 		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1043 		.bindingCount = 2,
1044 		.pBindings = (VkDescriptorSetLayoutBinding[]) {
1045 			{
1046 				.binding = 0,
1047 				.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1048 				.descriptorCount = 1,
1049 				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1050 				.pImmutableSamplers = NULL
1051 			},
1052 			{
1053 				.binding = 1,
1054 				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1055 				.descriptorCount = 1,
1056 				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1057 				.pImmutableSamplers = NULL
1058 			},
1059 		}
1060 	};
1061 
1062 	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
1063 						&ds_create_info,
1064 						&device->meta_state.alloc,
1065 						&device->meta_state.itoi_r32g32b32.img_ds_layout);
1066 	if (result != VK_SUCCESS)
1067 		goto fail;
1068 
1069 
1070 	VkPipelineLayoutCreateInfo pl_create_info = {
1071 		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1072 		.setLayoutCount = 1,
1073 		.pSetLayouts = &device->meta_state.itoi_r32g32b32.img_ds_layout,
1074 		.pushConstantRangeCount = 1,
1075 		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
1076 	};
1077 
1078 	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
1079 					  &pl_create_info,
1080 					  &device->meta_state.alloc,
1081 					  &device->meta_state.itoi_r32g32b32.img_p_layout);
1082 	if (result != VK_SUCCESS)
1083 		goto fail;
1084 
1085 	/* compute shader */
1086 
1087 	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
1088 		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1089 		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
1090 		.module = radv_shader_module_to_handle(&cs),
1091 		.pName = "main",
1092 		.pSpecializationInfo = NULL,
1093 	};
1094 
1095 	VkComputePipelineCreateInfo vk_pipeline_info = {
1096 		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1097 		.stage = pipeline_shader_stage,
1098 		.flags = 0,
1099 		.layout = device->meta_state.itoi_r32g32b32.img_p_layout,
1100 	};
1101 
1102 	result = radv_CreateComputePipelines(radv_device_to_handle(device),
1103 					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
1104 					     1, &vk_pipeline_info, NULL,
1105 					     &device->meta_state.itoi_r32g32b32.pipeline);
1106 
1107 fail:
1108 	ralloc_free(cs.nir);
1109 	return result;
1110 }
1111 
1112 static void
radv_device_finish_meta_itoi_r32g32b32_state(struct radv_device * device)1113 radv_device_finish_meta_itoi_r32g32b32_state(struct radv_device *device)
1114 {
1115 	struct radv_meta_state *state = &device->meta_state;
1116 
1117 	radv_DestroyPipelineLayout(radv_device_to_handle(device),
1118 				   state->itoi_r32g32b32.img_p_layout, &state->alloc);
1119 	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
1120 				        state->itoi_r32g32b32.img_ds_layout,
1121 					&state->alloc);
1122 	radv_DestroyPipeline(radv_device_to_handle(device),
1123 			     state->itoi_r32g32b32.pipeline, &state->alloc);
1124 }
1125 
1126 static nir_shader *
build_nir_cleari_compute_shader(struct radv_device * dev,bool is_3d)1127 build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d)
1128 {
1129 	nir_builder b;
1130 	enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
1131 	const struct glsl_type *img_type = glsl_image_type(dim,
1132 							   false,
1133 							   GLSL_TYPE_FLOAT);
1134 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
1135 	b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_cleari_cs_3d" : "meta_cleari_cs");
1136 	b.shader->info.cs.local_size[0] = 16;
1137 	b.shader->info.cs.local_size[1] = 16;
1138 	b.shader->info.cs.local_size[2] = 1;
1139 
1140 	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
1141 						       img_type, "out_img");
1142 	output_img->data.descriptor_set = 0;
1143 	output_img->data.binding = 0;
1144 
1145 	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
1146 	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
1147 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
1148 						b.shader->info.cs.local_size[0],
1149 						b.shader->info.cs.local_size[1],
1150 						b.shader->info.cs.local_size[2], 0);
1151 
1152 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
1153 
1154 	nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
1155 	nir_intrinsic_set_base(clear_val, 0);
1156 	nir_intrinsic_set_range(clear_val, 20);
1157 	clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
1158 	clear_val->num_components = 4;
1159 	nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 4, 32, "clear_value");
1160 	nir_builder_instr_insert(&b, &clear_val->instr);
1161 
1162 	nir_intrinsic_instr *layer = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
1163 	nir_intrinsic_set_base(layer, 0);
1164 	nir_intrinsic_set_range(layer, 20);
1165 	layer->src[0] = nir_src_for_ssa(nir_imm_int(&b, 16));
1166 	layer->num_components = 1;
1167 	nir_ssa_dest_init(&layer->instr, &layer->dest, 1, 32, "layer");
1168 	nir_builder_instr_insert(&b, &layer->instr);
1169 
1170 	nir_ssa_def *global_z = nir_iadd(&b, nir_channel(&b, global_id, 2), &layer->dest.ssa);
1171 
1172 	nir_ssa_def *comps[4];
1173 	comps[0] = nir_channel(&b, global_id, 0);
1174 	comps[1] = nir_channel(&b, global_id, 1);
1175 	comps[2] = global_z;
1176 	comps[3] = nir_imm_int(&b, 0);
1177 	global_id = nir_vec(&b, comps, 4);
1178 
1179 	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
1180 	store->num_components = 4;
1181 	store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
1182 	store->src[1] = nir_src_for_ssa(global_id);
1183 	store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
1184 	store->src[3] = nir_src_for_ssa(&clear_val->dest.ssa);
1185 	store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
1186 
1187 	nir_builder_instr_insert(&b, &store->instr);
1188 	return b.shader;
1189 }
1190 
1191 static VkResult
radv_device_init_meta_cleari_state(struct radv_device * device)1192 radv_device_init_meta_cleari_state(struct radv_device *device)
1193 {
1194 	VkResult result;
1195 	struct radv_shader_module cs = { .nir = NULL };
1196 	struct radv_shader_module cs_3d = { .nir = NULL };
1197 	cs.nir = build_nir_cleari_compute_shader(device, false);
1198 	if (device->physical_device->rad_info.chip_class >= GFX9)
1199 		cs_3d.nir = build_nir_cleari_compute_shader(device, true);
1200 
1201 	/*
1202 	 * two descriptors one for the image being sampled
1203 	 * one for the buffer being written.
1204 	 */
1205 	VkDescriptorSetLayoutCreateInfo ds_create_info = {
1206 		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1207 		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1208 		.bindingCount = 1,
1209 		.pBindings = (VkDescriptorSetLayoutBinding[]) {
1210 			{
1211 				.binding = 0,
1212 				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1213 				.descriptorCount = 1,
1214 				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1215 				.pImmutableSamplers = NULL
1216 			},
1217 		}
1218 	};
1219 
1220 	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
1221 						&ds_create_info,
1222 						&device->meta_state.alloc,
1223 						&device->meta_state.cleari.img_ds_layout);
1224 	if (result != VK_SUCCESS)
1225 		goto fail;
1226 
1227 
1228 	VkPipelineLayoutCreateInfo pl_create_info = {
1229 		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1230 		.setLayoutCount = 1,
1231 		.pSetLayouts = &device->meta_state.cleari.img_ds_layout,
1232 		.pushConstantRangeCount = 1,
1233 		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
1234 	};
1235 
1236 	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
1237 					  &pl_create_info,
1238 					  &device->meta_state.alloc,
1239 					  &device->meta_state.cleari.img_p_layout);
1240 	if (result != VK_SUCCESS)
1241 		goto fail;
1242 
1243 	/* compute shader */
1244 
1245 	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
1246 		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1247 		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
1248 		.module = radv_shader_module_to_handle(&cs),
1249 		.pName = "main",
1250 		.pSpecializationInfo = NULL,
1251 	};
1252 
1253 	VkComputePipelineCreateInfo vk_pipeline_info = {
1254 		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1255 		.stage = pipeline_shader_stage,
1256 		.flags = 0,
1257 		.layout = device->meta_state.cleari.img_p_layout,
1258 	};
1259 
1260 	result = radv_CreateComputePipelines(radv_device_to_handle(device),
1261 					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
1262 					     1, &vk_pipeline_info, NULL,
1263 					     &device->meta_state.cleari.pipeline);
1264 	if (result != VK_SUCCESS)
1265 		goto fail;
1266 
1267 
1268 	if (device->physical_device->rad_info.chip_class >= GFX9) {
1269 		/* compute shader */
1270 		VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
1271 			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1272 			.stage = VK_SHADER_STAGE_COMPUTE_BIT,
1273 			.module = radv_shader_module_to_handle(&cs_3d),
1274 			.pName = "main",
1275 			.pSpecializationInfo = NULL,
1276 		};
1277 
1278 		VkComputePipelineCreateInfo vk_pipeline_info_3d = {
1279 			.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1280 			.stage = pipeline_shader_stage_3d,
1281 			.flags = 0,
1282 			.layout = device->meta_state.cleari.img_p_layout,
1283 		};
1284 
1285 		result = radv_CreateComputePipelines(radv_device_to_handle(device),
1286 						     radv_pipeline_cache_to_handle(&device->meta_state.cache),
1287 						     1, &vk_pipeline_info_3d, NULL,
1288 						     &device->meta_state.cleari.pipeline_3d);
1289 		if (result != VK_SUCCESS)
1290 			goto fail;
1291 
1292 		ralloc_free(cs_3d.nir);
1293 	}
1294 	ralloc_free(cs.nir);
1295 	return VK_SUCCESS;
1296 fail:
1297 	ralloc_free(cs.nir);
1298 	ralloc_free(cs_3d.nir);
1299 	return result;
1300 }
1301 
1302 static void
radv_device_finish_meta_cleari_state(struct radv_device * device)1303 radv_device_finish_meta_cleari_state(struct radv_device *device)
1304 {
1305 	struct radv_meta_state *state = &device->meta_state;
1306 
1307 	radv_DestroyPipelineLayout(radv_device_to_handle(device),
1308 				   state->cleari.img_p_layout, &state->alloc);
1309 	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
1310 				        state->cleari.img_ds_layout,
1311 					&state->alloc);
1312 	radv_DestroyPipeline(radv_device_to_handle(device),
1313 			     state->cleari.pipeline, &state->alloc);
1314 	radv_DestroyPipeline(radv_device_to_handle(device),
1315 			     state->cleari.pipeline_3d, &state->alloc);
1316 }
1317 
1318 /* Special path for clearing R32G32B32 images using a compute shader. */
1319 static nir_shader *
build_nir_cleari_r32g32b32_compute_shader(struct radv_device * dev)1320 build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev)
1321 {
1322 	nir_builder b;
1323 	const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF,
1324 							   false,
1325 							   GLSL_TYPE_FLOAT);
1326 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
1327 	b.shader->info.name = ralloc_strdup(b.shader, "meta_cleari_r32g32b32_cs");
1328 	b.shader->info.cs.local_size[0] = 16;
1329 	b.shader->info.cs.local_size[1] = 16;
1330 	b.shader->info.cs.local_size[2] = 1;
1331 
1332 	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
1333 						       img_type, "out_img");
1334 	output_img->data.descriptor_set = 0;
1335 	output_img->data.binding = 0;
1336 
1337 	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
1338 	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
1339 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
1340 						b.shader->info.cs.local_size[0],
1341 						b.shader->info.cs.local_size[1],
1342 						b.shader->info.cs.local_size[2], 0);
1343 
1344 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
1345 
1346 	nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
1347 	nir_intrinsic_set_base(clear_val, 0);
1348 	nir_intrinsic_set_range(clear_val, 16);
1349 	clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
1350 	clear_val->num_components = 3;
1351 	nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 3, 32, "clear_value");
1352 	nir_builder_instr_insert(&b, &clear_val->instr);
1353 
1354 	nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
1355 	nir_intrinsic_set_base(stride, 0);
1356 	nir_intrinsic_set_range(stride, 16);
1357 	stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
1358 	stride->num_components = 1;
1359 	nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
1360 	nir_builder_instr_insert(&b, &stride->instr);
1361 
1362 	nir_ssa_def *global_x = nir_channel(&b, global_id, 0);
1363 	nir_ssa_def *global_y = nir_channel(&b, global_id, 1);
1364 
1365 	nir_ssa_def *global_pos =
1366 		nir_iadd(&b,
1367 			 nir_imul(&b, global_y, &stride->dest.ssa),
1368 			 nir_imul(&b, global_x, nir_imm_int(&b, 3)));
1369 
1370 	for (unsigned chan = 0; chan < 3; chan++) {
1371 		nir_ssa_def *local_pos =
1372 			nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
1373 
1374 		nir_ssa_def *coord =
1375 			nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
1376 
1377 		nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
1378 		store->num_components = 1;
1379 		store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
1380 		store->src[1] = nir_src_for_ssa(coord);
1381 		store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
1382 		store->src[3] = nir_src_for_ssa(nir_channel(&b, &clear_val->dest.ssa, chan));
1383 		store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
1384 		nir_builder_instr_insert(&b, &store->instr);
1385 	}
1386 
1387 	return b.shader;
1388 }
1389 
1390 static VkResult
radv_device_init_meta_cleari_r32g32b32_state(struct radv_device * device)1391 radv_device_init_meta_cleari_r32g32b32_state(struct radv_device *device)
1392 {
1393 	VkResult result;
1394 	struct radv_shader_module cs = { .nir = NULL };
1395 
1396 	cs.nir = build_nir_cleari_r32g32b32_compute_shader(device);
1397 
1398 	VkDescriptorSetLayoutCreateInfo ds_create_info = {
1399 		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1400 		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1401 		.bindingCount = 1,
1402 		.pBindings = (VkDescriptorSetLayoutBinding[]) {
1403 			{
1404 				.binding = 0,
1405 				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1406 				.descriptorCount = 1,
1407 				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1408 				.pImmutableSamplers = NULL
1409 			},
1410 		}
1411 	};
1412 
1413 	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
1414 						&ds_create_info,
1415 						&device->meta_state.alloc,
1416 						&device->meta_state.cleari_r32g32b32.img_ds_layout);
1417 	if (result != VK_SUCCESS)
1418 		goto fail;
1419 
1420 	VkPipelineLayoutCreateInfo pl_create_info = {
1421 		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1422 		.setLayoutCount = 1,
1423 		.pSetLayouts = &device->meta_state.cleari_r32g32b32.img_ds_layout,
1424 		.pushConstantRangeCount = 1,
1425 		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
1426 	};
1427 
1428 	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
1429 					   &pl_create_info,
1430 					   &device->meta_state.alloc,
1431 					   &device->meta_state.cleari_r32g32b32.img_p_layout);
1432 	if (result != VK_SUCCESS)
1433 		goto fail;
1434 
1435 	/* compute shader */
1436 	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
1437 		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1438 		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
1439 		.module = radv_shader_module_to_handle(&cs),
1440 		.pName = "main",
1441 		.pSpecializationInfo = NULL,
1442 	};
1443 
1444 	VkComputePipelineCreateInfo vk_pipeline_info = {
1445 		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1446 		.stage = pipeline_shader_stage,
1447 		.flags = 0,
1448 		.layout = device->meta_state.cleari_r32g32b32.img_p_layout,
1449 	};
1450 
1451 	result = radv_CreateComputePipelines(radv_device_to_handle(device),
1452 					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
1453 					     1, &vk_pipeline_info, NULL,
1454 					     &device->meta_state.cleari_r32g32b32.pipeline);
1455 
1456 fail:
1457 	ralloc_free(cs.nir);
1458 	return result;
1459 }
1460 
1461 static void
radv_device_finish_meta_cleari_r32g32b32_state(struct radv_device * device)1462 radv_device_finish_meta_cleari_r32g32b32_state(struct radv_device *device)
1463 {
1464 	struct radv_meta_state *state = &device->meta_state;
1465 
1466 	radv_DestroyPipelineLayout(radv_device_to_handle(device),
1467 				   state->cleari_r32g32b32.img_p_layout,
1468 				   &state->alloc);
1469 	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
1470 				        state->cleari_r32g32b32.img_ds_layout,
1471 					&state->alloc);
1472 	radv_DestroyPipeline(radv_device_to_handle(device),
1473 			     state->cleari_r32g32b32.pipeline, &state->alloc);
1474 }
1475 
1476 void
radv_device_finish_meta_bufimage_state(struct radv_device * device)1477 radv_device_finish_meta_bufimage_state(struct radv_device *device)
1478 {
1479 	radv_device_finish_meta_itob_state(device);
1480 	radv_device_finish_meta_btoi_state(device);
1481 	radv_device_finish_meta_btoi_r32g32b32_state(device);
1482 	radv_device_finish_meta_itoi_state(device);
1483 	radv_device_finish_meta_itoi_r32g32b32_state(device);
1484 	radv_device_finish_meta_cleari_state(device);
1485 	radv_device_finish_meta_cleari_r32g32b32_state(device);
1486 }
1487 
1488 VkResult
radv_device_init_meta_bufimage_state(struct radv_device * device)1489 radv_device_init_meta_bufimage_state(struct radv_device *device)
1490 {
1491 	VkResult result;
1492 
1493 	result = radv_device_init_meta_itob_state(device);
1494 	if (result != VK_SUCCESS)
1495 		goto fail_itob;
1496 
1497 	result = radv_device_init_meta_btoi_state(device);
1498 	if (result != VK_SUCCESS)
1499 		goto fail_btoi;
1500 
1501 	result = radv_device_init_meta_btoi_r32g32b32_state(device);
1502 	if (result != VK_SUCCESS)
1503 		goto fail_btoi_r32g32b32;
1504 
1505 	result = radv_device_init_meta_itoi_state(device);
1506 	if (result != VK_SUCCESS)
1507 		goto fail_itoi;
1508 
1509 	result = radv_device_init_meta_itoi_r32g32b32_state(device);
1510 	if (result != VK_SUCCESS)
1511 		goto fail_itoi_r32g32b32;
1512 
1513 	result = radv_device_init_meta_cleari_state(device);
1514 	if (result != VK_SUCCESS)
1515 		goto fail_cleari;
1516 
1517 	result = radv_device_init_meta_cleari_r32g32b32_state(device);
1518 	if (result != VK_SUCCESS)
1519 		goto fail_cleari_r32g32b32;
1520 
1521 	return VK_SUCCESS;
1522 fail_cleari_r32g32b32:
1523 	radv_device_finish_meta_cleari_r32g32b32_state(device);
1524 fail_cleari:
1525 	radv_device_finish_meta_cleari_state(device);
1526 fail_itoi_r32g32b32:
1527 	radv_device_finish_meta_itoi_r32g32b32_state(device);
1528 fail_itoi:
1529 	radv_device_finish_meta_itoi_state(device);
1530 fail_btoi_r32g32b32:
1531 	radv_device_finish_meta_btoi_r32g32b32_state(device);
1532 fail_btoi:
1533 	radv_device_finish_meta_btoi_state(device);
1534 fail_itob:
1535 	radv_device_finish_meta_itob_state(device);
1536 	return result;
1537 }
1538 
1539 static void
create_iview(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * surf,struct radv_image_view * iview)1540 create_iview(struct radv_cmd_buffer *cmd_buffer,
1541              struct radv_meta_blit2d_surf *surf,
1542              struct radv_image_view *iview)
1543 {
1544 	VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9 ? VK_IMAGE_VIEW_TYPE_2D :
1545 		radv_meta_get_view_type(surf->image);
1546 	radv_image_view_init(iview, cmd_buffer->device,
1547 			     &(VkImageViewCreateInfo) {
1548 				     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1549 					     .image = radv_image_to_handle(surf->image),
1550 					     .viewType = view_type,
1551 					     .format = surf->format,
1552 					     .subresourceRange = {
1553 					     .aspectMask = surf->aspect_mask,
1554 					     .baseMipLevel = surf->level,
1555 					     .levelCount = 1,
1556 					     .baseArrayLayer = surf->layer,
1557 					     .layerCount = 1
1558 				     },
1559 			     }, NULL);
1560 }
1561 
1562 static void
create_bview(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer * buffer,unsigned offset,VkFormat format,struct radv_buffer_view * bview)1563 create_bview(struct radv_cmd_buffer *cmd_buffer,
1564 	     struct radv_buffer *buffer,
1565 	     unsigned offset,
1566 	     VkFormat format,
1567 	     struct radv_buffer_view *bview)
1568 {
1569 	radv_buffer_view_init(bview, cmd_buffer->device,
1570 			      &(VkBufferViewCreateInfo) {
1571 				      .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
1572 				      .flags = 0,
1573 				      .buffer = radv_buffer_to_handle(buffer),
1574 				      .format = format,
1575 				      .offset = offset,
1576 				      .range = VK_WHOLE_SIZE,
1577 			      });
1578 
1579 }
1580 
1581 static void
create_buffer_from_image(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * surf,VkBufferUsageFlagBits usage,VkBuffer * buffer)1582 create_buffer_from_image(struct radv_cmd_buffer *cmd_buffer,
1583 			 struct radv_meta_blit2d_surf *surf,
1584 			 VkBufferUsageFlagBits usage,
1585 			 VkBuffer *buffer)
1586 {
1587 	struct radv_device *device = cmd_buffer->device;
1588 	struct radv_device_memory mem = { .bo = surf->image->bo };
1589 
1590 	radv_CreateBuffer(radv_device_to_handle(device),
1591 			  &(VkBufferCreateInfo) {
1592 				.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1593 				.flags = 0,
1594 				.size = surf->image->size,
1595 				.usage = usage,
1596 				.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
1597 			  }, NULL, buffer);
1598 
1599 	radv_BindBufferMemory2(radv_device_to_handle(device), 1,
1600 			       (VkBindBufferMemoryInfo[]) {
1601 				    {
1602 					.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
1603 					.buffer = *buffer,
1604 					.memory = radv_device_memory_to_handle(&mem),
1605 					.memoryOffset = surf->image->offset,
1606 				    }
1607 			       });
1608 }
1609 
1610 static void
create_bview_for_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer * buffer,unsigned offset,VkFormat src_format,struct radv_buffer_view * bview)1611 create_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1612 			   struct radv_buffer *buffer,
1613 			   unsigned offset,
1614 			   VkFormat src_format,
1615 			   struct radv_buffer_view *bview)
1616 {
1617 	VkFormat format;
1618 
1619 	switch (src_format) {
1620 	case VK_FORMAT_R32G32B32_UINT:
1621 		format = VK_FORMAT_R32_UINT;
1622 		break;
1623 	case VK_FORMAT_R32G32B32_SINT:
1624 		format = VK_FORMAT_R32_SINT;
1625 		break;
1626 	case VK_FORMAT_R32G32B32_SFLOAT:
1627 		format = VK_FORMAT_R32_SFLOAT;
1628 		break;
1629 	default:
1630 		unreachable("invalid R32G32B32 format");
1631 	}
1632 
1633 	radv_buffer_view_init(bview, cmd_buffer->device,
1634 			      &(VkBufferViewCreateInfo) {
1635 				      .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
1636 				      .flags = 0,
1637 				      .buffer = radv_buffer_to_handle(buffer),
1638 				      .format = format,
1639 				      .offset = offset,
1640 				      .range = VK_WHOLE_SIZE,
1641 			      });
1642 }
1643 
1644 static unsigned
get_image_stride_for_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * surf)1645 get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1646 			       struct radv_meta_blit2d_surf *surf)
1647 {
1648 	unsigned stride;
1649 
1650 	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
1651 		stride = surf->image->planes[0].surface.u.gfx9.surf_pitch;
1652 	} else {
1653 		stride = surf->image->planes[0].surface.u.legacy.level[0].nblk_x * 3;
1654 	}
1655 
1656 	return stride;
1657 }
1658 
1659 static void
itob_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src,struct radv_buffer_view * dst)1660 itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1661 		      struct radv_image_view *src,
1662 		      struct radv_buffer_view *dst)
1663 {
1664 	struct radv_device *device = cmd_buffer->device;
1665 
1666 	radv_meta_push_descriptor_set(cmd_buffer,
1667 				      VK_PIPELINE_BIND_POINT_COMPUTE,
1668 				      device->meta_state.itob.img_p_layout,
1669 				      0, /* set */
1670 				      2, /* descriptorWriteCount */
1671 				      (VkWriteDescriptorSet[]) {
1672 				              {
1673 				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1674 				                      .dstBinding = 0,
1675 				                      .dstArrayElement = 0,
1676 				                      .descriptorCount = 1,
1677 				                      .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1678 				                      .pImageInfo = (VkDescriptorImageInfo[]) {
1679 				                              {
1680 				                                      .sampler = VK_NULL_HANDLE,
1681 				                                      .imageView = radv_image_view_to_handle(src),
1682 				                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1683 				                              },
1684 				                      }
1685 				              },
1686 				              {
1687 				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1688 				                      .dstBinding = 1,
1689 				                      .dstArrayElement = 0,
1690 				                      .descriptorCount = 1,
1691 				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1692 				                      .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(dst) },
1693 				              }
1694 				      });
1695 }
1696 
1697 void
radv_meta_image_to_buffer(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * src,struct radv_meta_blit2d_buffer * dst,unsigned num_rects,struct radv_meta_blit2d_rect * rects)1698 radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
1699 			  struct radv_meta_blit2d_surf *src,
1700 			  struct radv_meta_blit2d_buffer *dst,
1701 			  unsigned num_rects,
1702 			  struct radv_meta_blit2d_rect *rects)
1703 {
1704 	VkPipeline pipeline = cmd_buffer->device->meta_state.itob.pipeline;
1705 	struct radv_device *device = cmd_buffer->device;
1706 	struct radv_image_view src_view;
1707 	struct radv_buffer_view dst_view;
1708 
1709 	create_iview(cmd_buffer, src, &src_view);
1710 	create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &dst_view);
1711 	itob_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1712 
1713 	if (device->physical_device->rad_info.chip_class >= GFX9 &&
1714 	    src->image->type == VK_IMAGE_TYPE_3D)
1715 		pipeline = cmd_buffer->device->meta_state.itob.pipeline_3d;
1716 
1717 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1718 			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1719 
1720 	for (unsigned r = 0; r < num_rects; ++r) {
1721 		unsigned push_constants[4] = {
1722 			rects[r].src_x,
1723 			rects[r].src_y,
1724 			src->layer,
1725 			dst->pitch
1726 		};
1727 		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1728 				      device->meta_state.itob.img_p_layout,
1729 				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
1730 				      push_constants);
1731 
1732 		radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1733 	}
1734 }
1735 
1736 static void
btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer_view * src,struct radv_buffer_view * dst)1737 btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1738 				struct radv_buffer_view *src,
1739 				struct radv_buffer_view *dst)
1740 {
1741 	struct radv_device *device = cmd_buffer->device;
1742 
1743 	radv_meta_push_descriptor_set(cmd_buffer,
1744 				      VK_PIPELINE_BIND_POINT_COMPUTE,
1745 				      device->meta_state.btoi_r32g32b32.img_p_layout,
1746 				      0, /* set */
1747 				      2, /* descriptorWriteCount */
1748 				      (VkWriteDescriptorSet[]) {
1749 				              {
1750 				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1751 				                      .dstBinding = 0,
1752 				                      .dstArrayElement = 0,
1753 				                      .descriptorCount = 1,
1754 				                      .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1755 				                      .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(src) },
1756 				              },
1757 				              {
1758 				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1759 				                      .dstBinding = 1,
1760 				                      .dstArrayElement = 0,
1761 				                      .descriptorCount = 1,
1762 				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1763 				                      .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(dst) },
1764 				              }
1765 				      });
1766 }
1767 
1768 static void
radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_buffer * src,struct radv_meta_blit2d_surf * dst,unsigned num_rects,struct radv_meta_blit2d_rect * rects)1769 radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1770 				       struct radv_meta_blit2d_buffer *src,
1771 				       struct radv_meta_blit2d_surf *dst,
1772 				       unsigned num_rects,
1773 				       struct radv_meta_blit2d_rect *rects)
1774 {
1775 	VkPipeline pipeline = cmd_buffer->device->meta_state.btoi_r32g32b32.pipeline;
1776 	struct radv_device *device = cmd_buffer->device;
1777 	struct radv_buffer_view src_view, dst_view;
1778 	unsigned dst_offset = 0;
1779 	unsigned stride;
1780 	VkBuffer buffer;
1781 
1782 	/* This special btoi path for R32G32B32 formats will write the linear
1783 	 * image as a buffer with the same underlying memory. The compute
1784 	 * shader will copy all components separately using a R32 format.
1785 	 */
1786 	create_buffer_from_image(cmd_buffer, dst,
1787 				 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
1788 				 &buffer);
1789 
1790 	create_bview(cmd_buffer, src->buffer, src->offset,
1791 		     src->format, &src_view);
1792 	create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer),
1793 				   dst_offset, dst->format, &dst_view);
1794 	btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1795 
1796 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1797 			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1798 
1799 	stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
1800 
1801 	for (unsigned r = 0; r < num_rects; ++r) {
1802 		unsigned push_constants[4] = {
1803 			rects[r].dst_x,
1804 			rects[r].dst_y,
1805 			stride,
1806 			src->pitch,
1807 		};
1808 
1809 		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1810 				      device->meta_state.btoi_r32g32b32.img_p_layout,
1811 				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
1812 				      push_constants);
1813 
1814 		radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1815 	}
1816 
1817 	radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
1818 }
1819 
1820 static void
btoi_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer_view * src,struct radv_image_view * dst)1821 btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1822 		      struct radv_buffer_view *src,
1823 		      struct radv_image_view *dst)
1824 {
1825 	struct radv_device *device = cmd_buffer->device;
1826 
1827 	radv_meta_push_descriptor_set(cmd_buffer,
1828 				      VK_PIPELINE_BIND_POINT_COMPUTE,
1829 				      device->meta_state.btoi.img_p_layout,
1830 				      0, /* set */
1831 				      2, /* descriptorWriteCount */
1832 				      (VkWriteDescriptorSet[]) {
1833 				              {
1834 				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1835 				                      .dstBinding = 0,
1836 				                      .dstArrayElement = 0,
1837 				                      .descriptorCount = 1,
1838 				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1839 				                      .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(src) },
1840 				              },
1841 				              {
1842 				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1843 				                      .dstBinding = 1,
1844 				                      .dstArrayElement = 0,
1845 				                      .descriptorCount = 1,
1846 				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1847 				                      .pImageInfo = (VkDescriptorImageInfo[]) {
1848 				                              {
1849 				                                      .sampler = VK_NULL_HANDLE,
1850 				                                      .imageView = radv_image_view_to_handle(dst),
1851 				                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1852 				                              },
1853 				                      }
1854 				              }
1855 				      });
1856 }
1857 
1858 void
radv_meta_buffer_to_image_cs(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_buffer * src,struct radv_meta_blit2d_surf * dst,unsigned num_rects,struct radv_meta_blit2d_rect * rects)1859 radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
1860 			     struct radv_meta_blit2d_buffer *src,
1861 			     struct radv_meta_blit2d_surf *dst,
1862 			     unsigned num_rects,
1863 			     struct radv_meta_blit2d_rect *rects)
1864 {
1865 	VkPipeline pipeline = cmd_buffer->device->meta_state.btoi.pipeline;
1866 	struct radv_device *device = cmd_buffer->device;
1867 	struct radv_buffer_view src_view;
1868 	struct radv_image_view dst_view;
1869 
1870 	if (dst->image->vk_format == VK_FORMAT_R32G32B32_UINT ||
1871 	    dst->image->vk_format == VK_FORMAT_R32G32B32_SINT ||
1872 	    dst->image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
1873 		radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst,
1874 						       num_rects, rects);
1875 		return;
1876 	}
1877 
1878 	create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
1879 	create_iview(cmd_buffer, dst, &dst_view);
1880 	btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1881 
1882 	if (device->physical_device->rad_info.chip_class >= GFX9 &&
1883 	    dst->image->type == VK_IMAGE_TYPE_3D)
1884 		pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d;
1885 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1886 			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1887 
1888 	for (unsigned r = 0; r < num_rects; ++r) {
1889 		unsigned push_constants[4] = {
1890 			rects[r].dst_x,
1891 			rects[r].dst_y,
1892 			dst->layer,
1893 			src->pitch,
1894 		};
1895 		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1896 				      device->meta_state.btoi.img_p_layout,
1897 				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
1898 				      push_constants);
1899 
1900 		radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1901 	}
1902 }
1903 
1904 static void
itoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer_view * src,struct radv_buffer_view * dst)1905 itoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1906 				struct radv_buffer_view *src,
1907 				struct radv_buffer_view *dst)
1908 {
1909 	struct radv_device *device = cmd_buffer->device;
1910 
1911 	radv_meta_push_descriptor_set(cmd_buffer,
1912 				      VK_PIPELINE_BIND_POINT_COMPUTE,
1913 				      device->meta_state.itoi_r32g32b32.img_p_layout,
1914 				      0, /* set */
1915 				      2, /* descriptorWriteCount */
1916 				      (VkWriteDescriptorSet[]) {
1917 				              {
1918 				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1919 				                      .dstBinding = 0,
1920 				                      .dstArrayElement = 0,
1921 				                      .descriptorCount = 1,
1922 				                      .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1923 				                      .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(src) },
1924 				              },
1925 				              {
1926 				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1927 				                      .dstBinding = 1,
1928 				                      .dstArrayElement = 0,
1929 				                      .descriptorCount = 1,
1930 				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1931 				                      .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(dst) },
1932 				              }
1933 				      });
1934 }
1935 
1936 static void
radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * src,struct radv_meta_blit2d_surf * dst,unsigned num_rects,struct radv_meta_blit2d_rect * rects)1937 radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1938 				      struct radv_meta_blit2d_surf *src,
1939 				      struct radv_meta_blit2d_surf *dst,
1940 				      unsigned num_rects,
1941 				      struct radv_meta_blit2d_rect *rects)
1942 {
1943 	VkPipeline pipeline = cmd_buffer->device->meta_state.itoi_r32g32b32.pipeline;
1944 	struct radv_device *device = cmd_buffer->device;
1945 	struct radv_buffer_view src_view, dst_view;
1946 	unsigned src_offset = 0, dst_offset = 0;
1947 	unsigned src_stride, dst_stride;
1948 	VkBuffer src_buffer, dst_buffer;
1949 
1950 	/* 96-bit formats are only compatible to themselves. */
1951 	assert(dst->format == VK_FORMAT_R32G32B32_UINT ||
1952 	       dst->format == VK_FORMAT_R32G32B32_SINT ||
1953 	       dst->format == VK_FORMAT_R32G32B32_SFLOAT);
1954 
1955 	/* This special itoi path for R32G32B32 formats will write the linear
1956 	 * image as a buffer with the same underlying memory. The compute
1957 	 * shader will copy all components separately using a R32 format.
1958 	 */
1959 	create_buffer_from_image(cmd_buffer, src,
1960 				 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT,
1961 				 &src_buffer);
1962 	create_buffer_from_image(cmd_buffer, dst,
1963 				 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
1964 				 &dst_buffer);
1965 
1966 	create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(src_buffer),
1967 				   src_offset, src->format, &src_view);
1968 	create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(dst_buffer),
1969 				   dst_offset, dst->format, &dst_view);
1970 	itoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1971 
1972 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1973 			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1974 
1975 	src_stride = get_image_stride_for_r32g32b32(cmd_buffer, src);
1976 	dst_stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
1977 
1978 	for (unsigned r = 0; r < num_rects; ++r) {
1979 		unsigned push_constants[6] = {
1980 			rects[r].src_x,
1981 			rects[r].src_y,
1982 			src_stride,
1983 			rects[r].dst_x,
1984 			rects[r].dst_y,
1985 			dst_stride,
1986 		};
1987 		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1988 				      device->meta_state.itoi_r32g32b32.img_p_layout,
1989 				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 24,
1990 				      push_constants);
1991 
1992 		radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1993 	}
1994 
1995 	radv_DestroyBuffer(radv_device_to_handle(device), src_buffer, NULL);
1996 	radv_DestroyBuffer(radv_device_to_handle(device), dst_buffer, NULL);
1997 }
1998 
1999 static void
itoi_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src,struct radv_image_view * dst)2000 itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
2001 		      struct radv_image_view *src,
2002 		      struct radv_image_view *dst)
2003 {
2004 	struct radv_device *device = cmd_buffer->device;
2005 
2006 	radv_meta_push_descriptor_set(cmd_buffer,
2007 				      VK_PIPELINE_BIND_POINT_COMPUTE,
2008 				      device->meta_state.itoi.img_p_layout,
2009 				      0, /* set */
2010 				      2, /* descriptorWriteCount */
2011 				      (VkWriteDescriptorSet[]) {
2012 				              {
2013 				                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2014 				                       .dstBinding = 0,
2015 				                       .dstArrayElement = 0,
2016 				                       .descriptorCount = 1,
2017 				                       .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
2018 				                       .pImageInfo = (VkDescriptorImageInfo[]) {
2019 				                               {
2020 				                                       .sampler = VK_NULL_HANDLE,
2021 				                                       .imageView = radv_image_view_to_handle(src),
2022 				                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
2023 				                               },
2024 				                       }
2025 				              },
2026 				              {
2027 				                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2028 				                       .dstBinding = 1,
2029 				                       .dstArrayElement = 0,
2030 				                       .descriptorCount = 1,
2031 				                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
2032 				                       .pImageInfo = (VkDescriptorImageInfo[]) {
2033 				                               {
2034 				                                       .sampler = VK_NULL_HANDLE,
2035 				                                       .imageView = radv_image_view_to_handle(dst),
2036 				                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
2037 				                               },
2038 				                       }
2039 				              }
2040 				      });
2041 }
2042 
2043 void
radv_meta_image_to_image_cs(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * src,struct radv_meta_blit2d_surf * dst,unsigned num_rects,struct radv_meta_blit2d_rect * rects)2044 radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
2045 			    struct radv_meta_blit2d_surf *src,
2046 			    struct radv_meta_blit2d_surf *dst,
2047 			    unsigned num_rects,
2048 			    struct radv_meta_blit2d_rect *rects)
2049 {
2050 	VkPipeline pipeline = cmd_buffer->device->meta_state.itoi.pipeline;
2051 	struct radv_device *device = cmd_buffer->device;
2052 	struct radv_image_view src_view, dst_view;
2053 
2054 	if (src->format == VK_FORMAT_R32G32B32_UINT ||
2055 	    src->format == VK_FORMAT_R32G32B32_SINT ||
2056 	    src->format == VK_FORMAT_R32G32B32_SFLOAT) {
2057 		radv_meta_image_to_image_cs_r32g32b32(cmd_buffer, src, dst,
2058 						      num_rects, rects);
2059 		return;
2060 	}
2061 
2062 	create_iview(cmd_buffer, src, &src_view);
2063 	create_iview(cmd_buffer, dst, &dst_view);
2064 
2065 	itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
2066 
2067 	if (device->physical_device->rad_info.chip_class >= GFX9 &&
2068 	    (src->image->type == VK_IMAGE_TYPE_3D || dst->image->type == VK_IMAGE_TYPE_3D))
2069 		pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d;
2070 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
2071 			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
2072 
2073 	for (unsigned r = 0; r < num_rects; ++r) {
2074 		unsigned push_constants[6] = {
2075 			rects[r].src_x,
2076 			rects[r].src_y,
2077 			src->layer,
2078 			rects[r].dst_x,
2079 			rects[r].dst_y,
2080 			dst->layer,
2081 		};
2082 		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
2083 				      device->meta_state.itoi.img_p_layout,
2084 				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 24,
2085 				      push_constants);
2086 
2087 		radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
2088 	}
2089 }
2090 
2091 static void
cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer_view * view)2092 cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
2093 				  struct radv_buffer_view *view)
2094 {
2095 	struct radv_device *device = cmd_buffer->device;
2096 
2097 	radv_meta_push_descriptor_set(cmd_buffer,
2098 				      VK_PIPELINE_BIND_POINT_COMPUTE,
2099 				      device->meta_state.cleari_r32g32b32.img_p_layout,
2100 				      0, /* set */
2101 				      1, /* descriptorWriteCount */
2102 				      (VkWriteDescriptorSet[]) {
2103 				              {
2104 				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2105 				                      .dstBinding = 0,
2106 				                      .dstArrayElement = 0,
2107 				                      .descriptorCount = 1,
2108 				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
2109 				                      .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(view) },
2110 				              }
2111 				      });
2112 }
2113 
2114 static void
radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * dst,const VkClearColorValue * clear_color)2115 radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
2116 				   struct radv_meta_blit2d_surf *dst,
2117 				   const VkClearColorValue *clear_color)
2118 {
2119 	VkPipeline pipeline = cmd_buffer->device->meta_state.cleari_r32g32b32.pipeline;
2120 	struct radv_device *device = cmd_buffer->device;
2121 	struct radv_buffer_view dst_view;
2122 	unsigned stride;
2123 	VkBuffer buffer;
2124 
2125 	/* This special clear path for R32G32B32 formats will write the linear
2126 	 * image as a buffer with the same underlying memory. The compute
2127 	 * shader will clear all components separately using a R32 format.
2128 	 */
2129 	create_buffer_from_image(cmd_buffer, dst,
2130 				 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
2131 				 &buffer);
2132 
2133 	create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer),
2134 				   0, dst->format, &dst_view);
2135 	cleari_r32g32b32_bind_descriptors(cmd_buffer, &dst_view);
2136 
2137 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
2138 			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
2139 
2140 	stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
2141 
2142 	unsigned push_constants[4] = {
2143 		clear_color->uint32[0],
2144 		clear_color->uint32[1],
2145 		clear_color->uint32[2],
2146 		stride,
2147 	};
2148 
2149 	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
2150 			      device->meta_state.cleari_r32g32b32.img_p_layout,
2151 			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
2152 			      push_constants);
2153 
2154 	radv_unaligned_dispatch(cmd_buffer, dst->image->info.width,
2155 				dst->image->info.height, 1);
2156 
2157 	radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
2158 }
2159 
2160 static void
cleari_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * dst_iview)2161 cleari_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
2162 	                struct radv_image_view *dst_iview)
2163 {
2164 	struct radv_device *device = cmd_buffer->device;
2165 
2166 	radv_meta_push_descriptor_set(cmd_buffer,
2167 				      VK_PIPELINE_BIND_POINT_COMPUTE,
2168 				      device->meta_state.cleari.img_p_layout,
2169 				      0, /* set */
2170 				      1, /* descriptorWriteCount */
2171 				      (VkWriteDescriptorSet[]) {
2172 				              {
2173 				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2174 				                      .dstBinding = 0,
2175 				                      .dstArrayElement = 0,
2176 				                      .descriptorCount = 1,
2177 				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
2178 				                      .pImageInfo = (VkDescriptorImageInfo[]) {
2179 				                               {
2180 				                                      .sampler = VK_NULL_HANDLE,
2181 				                                      .imageView = radv_image_view_to_handle(dst_iview),
2182 				                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
2183 				                               },
2184 				                      }
2185 				               },
2186 				      });
2187 }
2188 
2189 void
radv_meta_clear_image_cs(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * dst,const VkClearColorValue * clear_color)2190 radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer,
2191 			 struct radv_meta_blit2d_surf *dst,
2192 			 const VkClearColorValue *clear_color)
2193 {
2194 	VkPipeline pipeline = cmd_buffer->device->meta_state.cleari.pipeline;
2195 	struct radv_device *device = cmd_buffer->device;
2196 	struct radv_image_view dst_iview;
2197 
2198 	if (dst->format == VK_FORMAT_R32G32B32_UINT ||
2199 	    dst->format == VK_FORMAT_R32G32B32_SINT ||
2200 	    dst->format == VK_FORMAT_R32G32B32_SFLOAT) {
2201 		radv_meta_clear_image_cs_r32g32b32(cmd_buffer, dst, clear_color);
2202 		return;
2203 	}
2204 
2205 	create_iview(cmd_buffer, dst, &dst_iview);
2206 	cleari_bind_descriptors(cmd_buffer, &dst_iview);
2207 
2208 	if (device->physical_device->rad_info.chip_class >= GFX9 &&
2209 	    dst->image->type == VK_IMAGE_TYPE_3D)
2210 		pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d;
2211 
2212 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
2213 			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
2214 
2215 	unsigned push_constants[5] = {
2216 		clear_color->uint32[0],
2217 		clear_color->uint32[1],
2218 		clear_color->uint32[2],
2219 		clear_color->uint32[3],
2220 		dst->layer,
2221 	};
2222 
2223 	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
2224 			      device->meta_state.cleari.img_p_layout,
2225 			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 20,
2226 			      push_constants);
2227 
2228 	radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);
2229 }
2230