1 #include "radv_meta.h"
2 #include "nir/nir_builder.h"
3 
4 #include "sid.h"
5 #include "radv_cs.h"
6 
7 /*
8  * This is the point we switch from using CP to compute shader
9  * for certain buffer operations.
10  */
11 #define RADV_BUFFER_OPS_CS_THRESHOLD 4096
12 
13 static nir_shader *
build_buffer_fill_shader(struct radv_device * dev)14 build_buffer_fill_shader(struct radv_device *dev)
15 {
16 	nir_builder b;
17 
18 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
19 	b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_fill");
20 	b.shader->info.cs.local_size[0] = 64;
21 	b.shader->info.cs.local_size[1] = 1;
22 	b.shader->info.cs.local_size[2] = 1;
23 
24 	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
25 	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
26 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
27 						b.shader->info.cs.local_size[0],
28 						b.shader->info.cs.local_size[1],
29 						b.shader->info.cs.local_size[2], 0);
30 
31 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
32 
33 	nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
34 	offset = nir_swizzle(&b, offset, (unsigned[]) {0, 0, 0, 0}, 1, false);
35 
36 	nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader,
37 	                                                          nir_intrinsic_vulkan_resource_index);
38 	dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
39 	nir_intrinsic_set_desc_set(dst_buf, 0);
40 	nir_intrinsic_set_binding(dst_buf, 0);
41 	nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, 1, 32, NULL);
42 	nir_builder_instr_insert(&b, &dst_buf->instr);
43 
44 	nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
45 	nir_intrinsic_set_base(load, 0);
46 	nir_intrinsic_set_range(load, 4);
47 	load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
48 	load->num_components = 1;
49 	nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "fill_value");
50 	nir_builder_instr_insert(&b, &load->instr);
51 
52 	nir_ssa_def *swizzled_load = nir_swizzle(&b, &load->dest.ssa, (unsigned[]) { 0, 0, 0, 0}, 4, false);
53 
54 	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
55 	store->src[0] = nir_src_for_ssa(swizzled_load);
56 	store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
57 	store->src[2] = nir_src_for_ssa(offset);
58 	nir_intrinsic_set_write_mask(store, 0xf);
59 	store->num_components = 4;
60 	nir_builder_instr_insert(&b, &store->instr);
61 
62 	return b.shader;
63 }
64 
65 static nir_shader *
build_buffer_copy_shader(struct radv_device * dev)66 build_buffer_copy_shader(struct radv_device *dev)
67 {
68 	nir_builder b;
69 
70 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
71 	b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_copy");
72 	b.shader->info.cs.local_size[0] = 64;
73 	b.shader->info.cs.local_size[1] = 1;
74 	b.shader->info.cs.local_size[2] = 1;
75 
76 	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
77 	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
78 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
79 						b.shader->info.cs.local_size[0],
80 						b.shader->info.cs.local_size[1],
81 						b.shader->info.cs.local_size[2], 0);
82 
83 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
84 
85 	nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
86 	offset = nir_swizzle(&b, offset, (unsigned[]) {0, 0, 0, 0}, 1, false);
87 
88 	nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader,
89 	                                                          nir_intrinsic_vulkan_resource_index);
90 	dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
91 	nir_intrinsic_set_desc_set(dst_buf, 0);
92 	nir_intrinsic_set_binding(dst_buf, 0);
93 	nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, 1, 32, NULL);
94 	nir_builder_instr_insert(&b, &dst_buf->instr);
95 
96 	nir_intrinsic_instr *src_buf = nir_intrinsic_instr_create(b.shader,
97 	                                                          nir_intrinsic_vulkan_resource_index);
98 	src_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
99 	nir_intrinsic_set_desc_set(src_buf, 0);
100 	nir_intrinsic_set_binding(src_buf, 1);
101 	nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL);
102 	nir_builder_instr_insert(&b, &src_buf->instr);
103 
104 	nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo);
105 	load->src[0] = nir_src_for_ssa(&src_buf->dest.ssa);
106 	load->src[1] = nir_src_for_ssa(offset);
107 	nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
108 	load->num_components = 4;
109 	nir_builder_instr_insert(&b, &load->instr);
110 
111 	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
112 	store->src[0] = nir_src_for_ssa(&load->dest.ssa);
113 	store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
114 	store->src[2] = nir_src_for_ssa(offset);
115 	nir_intrinsic_set_write_mask(store, 0xf);
116 	store->num_components = 4;
117 	nir_builder_instr_insert(&b, &store->instr);
118 
119 	return b.shader;
120 }
121 
122 
123 
radv_device_init_meta_buffer_state(struct radv_device * device)124 VkResult radv_device_init_meta_buffer_state(struct radv_device *device)
125 {
126 	VkResult result;
127 	struct radv_shader_module fill_cs = { .nir = NULL };
128 	struct radv_shader_module copy_cs = { .nir = NULL };
129 
130 	fill_cs.nir = build_buffer_fill_shader(device);
131 	copy_cs.nir = build_buffer_copy_shader(device);
132 
133 	VkDescriptorSetLayoutCreateInfo fill_ds_create_info = {
134 		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
135 		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
136 		.bindingCount = 1,
137 		.pBindings = (VkDescriptorSetLayoutBinding[]) {
138 			{
139 				.binding = 0,
140 				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
141 				.descriptorCount = 1,
142 				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
143 				.pImmutableSamplers = NULL
144 			},
145 		}
146 	};
147 
148 	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
149 						&fill_ds_create_info,
150 						&device->meta_state.alloc,
151 						&device->meta_state.buffer.fill_ds_layout);
152 	if (result != VK_SUCCESS)
153 		goto fail;
154 
155 	VkDescriptorSetLayoutCreateInfo copy_ds_create_info = {
156 		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
157 		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
158 		.bindingCount = 2,
159 		.pBindings = (VkDescriptorSetLayoutBinding[]) {
160 			{
161 				.binding = 0,
162 				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
163 				.descriptorCount = 1,
164 				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
165 				.pImmutableSamplers = NULL
166 			},
167 			{
168 				.binding = 1,
169 				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
170 				.descriptorCount = 1,
171 				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
172 				.pImmutableSamplers = NULL
173 			},
174 		}
175 	};
176 
177 	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
178 						&copy_ds_create_info,
179 						&device->meta_state.alloc,
180 						&device->meta_state.buffer.copy_ds_layout);
181 	if (result != VK_SUCCESS)
182 		goto fail;
183 
184 
185 	VkPipelineLayoutCreateInfo fill_pl_create_info = {
186 		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
187 		.setLayoutCount = 1,
188 		.pSetLayouts = &device->meta_state.buffer.fill_ds_layout,
189 		.pushConstantRangeCount = 1,
190 		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 4},
191 	};
192 
193 	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
194 					  &fill_pl_create_info,
195 					  &device->meta_state.alloc,
196 					  &device->meta_state.buffer.fill_p_layout);
197 	if (result != VK_SUCCESS)
198 		goto fail;
199 
200 	VkPipelineLayoutCreateInfo copy_pl_create_info = {
201 		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
202 		.setLayoutCount = 1,
203 		.pSetLayouts = &device->meta_state.buffer.copy_ds_layout,
204 		.pushConstantRangeCount = 0,
205 	};
206 
207 	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
208 					  &copy_pl_create_info,
209 					  &device->meta_state.alloc,
210 					  &device->meta_state.buffer.copy_p_layout);
211 	if (result != VK_SUCCESS)
212 		goto fail;
213 
214 	VkPipelineShaderStageCreateInfo fill_pipeline_shader_stage = {
215 		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
216 		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
217 		.module = radv_shader_module_to_handle(&fill_cs),
218 		.pName = "main",
219 		.pSpecializationInfo = NULL,
220 	};
221 
222 	VkComputePipelineCreateInfo fill_vk_pipeline_info = {
223 		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
224 		.stage = fill_pipeline_shader_stage,
225 		.flags = 0,
226 		.layout = device->meta_state.buffer.fill_p_layout,
227 	};
228 
229 	result = radv_CreateComputePipelines(radv_device_to_handle(device),
230 					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
231 					     1, &fill_vk_pipeline_info, NULL,
232 					     &device->meta_state.buffer.fill_pipeline);
233 	if (result != VK_SUCCESS)
234 		goto fail;
235 
236 	VkPipelineShaderStageCreateInfo copy_pipeline_shader_stage = {
237 		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
238 		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
239 		.module = radv_shader_module_to_handle(&copy_cs),
240 		.pName = "main",
241 		.pSpecializationInfo = NULL,
242 	};
243 
244 	VkComputePipelineCreateInfo copy_vk_pipeline_info = {
245 		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
246 		.stage = copy_pipeline_shader_stage,
247 		.flags = 0,
248 		.layout = device->meta_state.buffer.copy_p_layout,
249 	};
250 
251 	result = radv_CreateComputePipelines(radv_device_to_handle(device),
252 					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
253 					     1, &copy_vk_pipeline_info, NULL,
254 					     &device->meta_state.buffer.copy_pipeline);
255 	if (result != VK_SUCCESS)
256 		goto fail;
257 
258 	ralloc_free(fill_cs.nir);
259 	ralloc_free(copy_cs.nir);
260 	return VK_SUCCESS;
261 fail:
262 	radv_device_finish_meta_buffer_state(device);
263 	ralloc_free(fill_cs.nir);
264 	ralloc_free(copy_cs.nir);
265 	return result;
266 }
267 
radv_device_finish_meta_buffer_state(struct radv_device * device)268 void radv_device_finish_meta_buffer_state(struct radv_device *device)
269 {
270 	struct radv_meta_state *state = &device->meta_state;
271 
272 	radv_DestroyPipeline(radv_device_to_handle(device),
273 			     state->buffer.copy_pipeline, &state->alloc);
274 	radv_DestroyPipeline(radv_device_to_handle(device),
275 			     state->buffer.fill_pipeline, &state->alloc);
276 	radv_DestroyPipelineLayout(radv_device_to_handle(device),
277 				   state->buffer.copy_p_layout, &state->alloc);
278 	radv_DestroyPipelineLayout(radv_device_to_handle(device),
279 				   state->buffer.fill_p_layout, &state->alloc);
280 	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
281 					state->buffer.copy_ds_layout,
282 					&state->alloc);
283 	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
284 					state->buffer.fill_ds_layout,
285 					&state->alloc);
286 }
287 
fill_buffer_shader(struct radv_cmd_buffer * cmd_buffer,struct radeon_winsys_bo * bo,uint64_t offset,uint64_t size,uint32_t value)288 static void fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
289 			       struct radeon_winsys_bo *bo,
290 			       uint64_t offset, uint64_t size, uint32_t value)
291 {
292 	struct radv_device *device = cmd_buffer->device;
293 	uint64_t block_count = round_up_u64(size, 1024);
294 	struct radv_meta_saved_state saved_state;
295 
296 	radv_meta_save(&saved_state, cmd_buffer,
297 		       RADV_META_SAVE_COMPUTE_PIPELINE |
298 		       RADV_META_SAVE_CONSTANTS |
299 		       RADV_META_SAVE_DESCRIPTORS);
300 
301 	struct radv_buffer dst_buffer = {
302 		.bo = bo,
303 		.offset = offset,
304 		.size = size
305 	};
306 
307 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
308 			     VK_PIPELINE_BIND_POINT_COMPUTE,
309 			     device->meta_state.buffer.fill_pipeline);
310 
311 	radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
312 			              device->meta_state.buffer.fill_p_layout,
313 				      0, /* set */
314 				      1, /* descriptorWriteCount */
315 				      (VkWriteDescriptorSet[]) {
316 				              {
317 				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
318 				                      .dstBinding = 0,
319 				                      .dstArrayElement = 0,
320 				                      .descriptorCount = 1,
321 				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
322 				                      .pBufferInfo = &(VkDescriptorBufferInfo) {
323 				                              .buffer = radv_buffer_to_handle(&dst_buffer),
324 				                              .offset = 0,
325 				                              .range = size
326 				                      }
327 				              }
328 				      });
329 
330 	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
331 			      device->meta_state.buffer.fill_p_layout,
332 			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 4,
333 			      &value);
334 
335 	radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
336 
337 	radv_meta_restore(&saved_state, cmd_buffer);
338 }
339 
copy_buffer_shader(struct radv_cmd_buffer * cmd_buffer,struct radeon_winsys_bo * src_bo,struct radeon_winsys_bo * dst_bo,uint64_t src_offset,uint64_t dst_offset,uint64_t size)340 static void copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
341 			       struct radeon_winsys_bo *src_bo,
342 			       struct radeon_winsys_bo *dst_bo,
343 			       uint64_t src_offset, uint64_t dst_offset,
344 			       uint64_t size)
345 {
346 	struct radv_device *device = cmd_buffer->device;
347 	uint64_t block_count = round_up_u64(size, 1024);
348 	struct radv_meta_saved_state saved_state;
349 
350 	radv_meta_save(&saved_state, cmd_buffer,
351 		       RADV_META_SAVE_COMPUTE_PIPELINE |
352 		       RADV_META_SAVE_DESCRIPTORS);
353 
354 	struct radv_buffer dst_buffer = {
355 		.bo = dst_bo,
356 		.offset = dst_offset,
357 		.size = size
358 	};
359 
360 	struct radv_buffer src_buffer = {
361 		.bo = src_bo,
362 		.offset = src_offset,
363 		.size = size
364 	};
365 
366 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
367 			     VK_PIPELINE_BIND_POINT_COMPUTE,
368 			     device->meta_state.buffer.copy_pipeline);
369 
370 	radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
371 			              device->meta_state.buffer.copy_p_layout,
372 				      0, /* set */
373 				      2, /* descriptorWriteCount */
374 				      (VkWriteDescriptorSet[]) {
375 				              {
376 				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
377 				                      .dstBinding = 0,
378 				                      .dstArrayElement = 0,
379 				                      .descriptorCount = 1,
380 				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
381 				                      .pBufferInfo = &(VkDescriptorBufferInfo) {
382 				                              .buffer = radv_buffer_to_handle(&dst_buffer),
383 				                              .offset = 0,
384 				                              .range = size
385 				                      }
386 				              },
387 				              {
388 				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
389 				                      .dstBinding = 1,
390 				                      .dstArrayElement = 0,
391 				                      .descriptorCount = 1,
392 				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
393 				                      .pBufferInfo = &(VkDescriptorBufferInfo) {
394 				                              .buffer = radv_buffer_to_handle(&src_buffer),
395 				                              .offset = 0,
396 				                              .range = size
397 				                      }
398 				              }
399 				      });
400 
401 	radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
402 
403 	radv_meta_restore(&saved_state, cmd_buffer);
404 }
405 
406 
radv_fill_buffer(struct radv_cmd_buffer * cmd_buffer,struct radeon_winsys_bo * bo,uint64_t offset,uint64_t size,uint32_t value)407 uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
408 		      struct radeon_winsys_bo *bo,
409 		      uint64_t offset, uint64_t size, uint32_t value)
410 {
411 	uint32_t flush_bits = 0;
412 
413 	assert(!(offset & 3));
414 	assert(!(size & 3));
415 
416 	if (size >= RADV_BUFFER_OPS_CS_THRESHOLD) {
417 		fill_buffer_shader(cmd_buffer, bo, offset, size, value);
418 		flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
419 			     RADV_CMD_FLAG_INV_VMEM_L1 |
420 			     RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
421 	} else if (size) {
422 		uint64_t va = radv_buffer_get_va(bo);
423 		va += offset;
424 		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo, 8);
425 		si_cp_dma_clear_buffer(cmd_buffer, va, size, value);
426 	}
427 
428 	return flush_bits;
429 }
430 
431 static
radv_copy_buffer(struct radv_cmd_buffer * cmd_buffer,struct radeon_winsys_bo * src_bo,struct radeon_winsys_bo * dst_bo,uint64_t src_offset,uint64_t dst_offset,uint64_t size)432 void radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
433 		      struct radeon_winsys_bo *src_bo,
434 		      struct radeon_winsys_bo *dst_bo,
435 		      uint64_t src_offset, uint64_t dst_offset,
436 		      uint64_t size)
437 {
438 	if (size >= RADV_BUFFER_OPS_CS_THRESHOLD && !(size & 3) && !(src_offset & 3) && !(dst_offset & 3))
439 		copy_buffer_shader(cmd_buffer, src_bo, dst_bo,
440 				   src_offset, dst_offset, size);
441 	else if (size) {
442 		uint64_t src_va = radv_buffer_get_va(src_bo);
443 		uint64_t dst_va = radv_buffer_get_va(dst_bo);
444 		src_va += src_offset;
445 		dst_va += dst_offset;
446 
447 		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, src_bo, 8);
448 		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_bo, 8);
449 
450 		si_cp_dma_buffer_copy(cmd_buffer, src_va, dst_va, size);
451 	}
452 }
453 
radv_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize fillSize,uint32_t data)454 void radv_CmdFillBuffer(
455     VkCommandBuffer                             commandBuffer,
456     VkBuffer                                    dstBuffer,
457     VkDeviceSize                                dstOffset,
458     VkDeviceSize                                fillSize,
459     uint32_t                                    data)
460 {
461 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
462 	RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
463 
464 	if (fillSize == VK_WHOLE_SIZE)
465 		fillSize = (dst_buffer->size - dstOffset) & ~3ull;
466 
467 	radv_fill_buffer(cmd_buffer, dst_buffer->bo, dst_buffer->offset + dstOffset,
468 			 fillSize, data);
469 }
470 
radv_CmdCopyBuffer(VkCommandBuffer commandBuffer,VkBuffer srcBuffer,VkBuffer destBuffer,uint32_t regionCount,const VkBufferCopy * pRegions)471 void radv_CmdCopyBuffer(
472 	VkCommandBuffer                             commandBuffer,
473 	VkBuffer                                    srcBuffer,
474 	VkBuffer                                    destBuffer,
475 	uint32_t                                    regionCount,
476 	const VkBufferCopy*                         pRegions)
477 {
478 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
479 	RADV_FROM_HANDLE(radv_buffer, src_buffer, srcBuffer);
480 	RADV_FROM_HANDLE(radv_buffer, dest_buffer, destBuffer);
481 
482 	for (unsigned r = 0; r < regionCount; r++) {
483 		uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset;
484 		uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset;
485 		uint64_t copy_size = pRegions[r].size;
486 
487 		radv_copy_buffer(cmd_buffer, src_buffer->bo, dest_buffer->bo,
488 				 src_offset, dest_offset, copy_size);
489 	}
490 }
491 
radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)492 void radv_CmdUpdateBuffer(
493 	VkCommandBuffer                             commandBuffer,
494 	VkBuffer                                    dstBuffer,
495 	VkDeviceSize                                dstOffset,
496 	VkDeviceSize                                dataSize,
497 	const void*                                 pData)
498 {
499 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
500 	RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
501 	bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
502 	uint64_t words = dataSize / 4;
503 	uint64_t va = radv_buffer_get_va(dst_buffer->bo);
504 	va += dstOffset + dst_buffer->offset;
505 
506 	assert(!(dataSize & 3));
507 	assert(!(va & 3));
508 
509 	if (!dataSize)
510 		return;
511 
512 	if (dataSize < RADV_BUFFER_OPS_CS_THRESHOLD) {
513 		si_emit_cache_flush(cmd_buffer);
514 
515 		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_buffer->bo, 8);
516 
517 		radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4);
518 
519 		radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0));
520 		radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ?
521 		                                V_370_MEM_ASYNC : V_370_MEMORY_SYNC) |
522 		                            S_370_WR_CONFIRM(1) |
523 		                            S_370_ENGINE_SEL(V_370_ME));
524 		radeon_emit(cmd_buffer->cs, va);
525 		radeon_emit(cmd_buffer->cs, va >> 32);
526 		radeon_emit_array(cmd_buffer->cs, pData, words);
527 
528 		if (unlikely(cmd_buffer->device->trace_bo))
529 			radv_cmd_buffer_trace_emit(cmd_buffer);
530 	} else {
531 		uint32_t buf_offset;
532 		radv_cmd_buffer_upload_data(cmd_buffer, dataSize, 32, pData, &buf_offset);
533 		radv_copy_buffer(cmd_buffer, cmd_buffer->upload.upload_bo, dst_buffer->bo,
534 				 buf_offset, dstOffset + dst_buffer->offset, dataSize);
535 	}
536 }
537