1 #include "radv_meta.h"
2 #include "nir/nir_builder.h"
3 
4 #include "sid.h"
5 #include "radv_cs.h"
6 
7 static nir_shader *
build_buffer_fill_shader(struct radv_device * dev)8 build_buffer_fill_shader(struct radv_device *dev)
9 {
10 	nir_builder b;
11 
12 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
13 	b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_fill");
14 	b.shader->info.cs.local_size[0] = 64;
15 	b.shader->info.cs.local_size[1] = 1;
16 	b.shader->info.cs.local_size[2] = 1;
17 
18 	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
19 	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
20 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
21 						b.shader->info.cs.local_size[0],
22 						b.shader->info.cs.local_size[1],
23 						b.shader->info.cs.local_size[2], 0);
24 
25 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
26 
27 	nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
28 	offset = nir_channel(&b, offset, 0);
29 
30 	nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
31 
32 	nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
33 	nir_intrinsic_set_base(load, 0);
34 	nir_intrinsic_set_range(load, 4);
35 	load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
36 	load->num_components = 1;
37 	nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "fill_value");
38 	nir_builder_instr_insert(&b, &load->instr);
39 
40 	nir_ssa_def *swizzled_load = nir_swizzle(&b, &load->dest.ssa, (unsigned[]) { 0, 0, 0, 0}, 4);
41 
42 	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
43 	store->src[0] = nir_src_for_ssa(swizzled_load);
44 	store->src[1] = nir_src_for_ssa(dst_buf);
45 	store->src[2] = nir_src_for_ssa(offset);
46 	nir_intrinsic_set_write_mask(store, 0xf);
47 	nir_intrinsic_set_access(store, ACCESS_NON_READABLE);
48 	nir_intrinsic_set_align(store, 16, 0);
49 	store->num_components = 4;
50 	nir_builder_instr_insert(&b, &store->instr);
51 
52 	return b.shader;
53 }
54 
55 static nir_shader *
build_buffer_copy_shader(struct radv_device * dev)56 build_buffer_copy_shader(struct radv_device *dev)
57 {
58 	nir_builder b;
59 
60 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
61 	b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_copy");
62 	b.shader->info.cs.local_size[0] = 64;
63 	b.shader->info.cs.local_size[1] = 1;
64 	b.shader->info.cs.local_size[2] = 1;
65 
66 	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
67 	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
68 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
69 						b.shader->info.cs.local_size[0],
70 						b.shader->info.cs.local_size[1],
71 						b.shader->info.cs.local_size[2], 0);
72 
73 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
74 
75 	nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
76 	offset = nir_channel(&b, offset, 0);
77 
78 	nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
79 	nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
80 
81 	nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo);
82 	load->src[0] = nir_src_for_ssa(src_buf);
83 	load->src[1] = nir_src_for_ssa(offset);
84 	nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
85 	load->num_components = 4;
86 	nir_intrinsic_set_align(load, 16, 0);
87 	nir_builder_instr_insert(&b, &load->instr);
88 
89 	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
90 	store->src[0] = nir_src_for_ssa(&load->dest.ssa);
91 	store->src[1] = nir_src_for_ssa(dst_buf);
92 	store->src[2] = nir_src_for_ssa(offset);
93 	nir_intrinsic_set_write_mask(store, 0xf);
94 	nir_intrinsic_set_access(store, ACCESS_NON_READABLE);
95 	nir_intrinsic_set_align(store, 16, 0);
96 	store->num_components = 4;
97 	nir_builder_instr_insert(&b, &store->instr);
98 
99 	return b.shader;
100 }
101 
102 
103 
radv_device_init_meta_buffer_state(struct radv_device * device)104 VkResult radv_device_init_meta_buffer_state(struct radv_device *device)
105 {
106 	VkResult result;
107 	struct radv_shader_module fill_cs = { .nir = NULL };
108 	struct radv_shader_module copy_cs = { .nir = NULL };
109 
110 	fill_cs.nir = build_buffer_fill_shader(device);
111 	copy_cs.nir = build_buffer_copy_shader(device);
112 
113 	VkDescriptorSetLayoutCreateInfo fill_ds_create_info = {
114 		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
115 		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
116 		.bindingCount = 1,
117 		.pBindings = (VkDescriptorSetLayoutBinding[]) {
118 			{
119 				.binding = 0,
120 				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
121 				.descriptorCount = 1,
122 				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
123 				.pImmutableSamplers = NULL
124 			},
125 		}
126 	};
127 
128 	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
129 						&fill_ds_create_info,
130 						&device->meta_state.alloc,
131 						&device->meta_state.buffer.fill_ds_layout);
132 	if (result != VK_SUCCESS)
133 		goto fail;
134 
135 	VkDescriptorSetLayoutCreateInfo copy_ds_create_info = {
136 		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
137 		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
138 		.bindingCount = 2,
139 		.pBindings = (VkDescriptorSetLayoutBinding[]) {
140 			{
141 				.binding = 0,
142 				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
143 				.descriptorCount = 1,
144 				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
145 				.pImmutableSamplers = NULL
146 			},
147 			{
148 				.binding = 1,
149 				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
150 				.descriptorCount = 1,
151 				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
152 				.pImmutableSamplers = NULL
153 			},
154 		}
155 	};
156 
157 	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
158 						&copy_ds_create_info,
159 						&device->meta_state.alloc,
160 						&device->meta_state.buffer.copy_ds_layout);
161 	if (result != VK_SUCCESS)
162 		goto fail;
163 
164 
165 	VkPipelineLayoutCreateInfo fill_pl_create_info = {
166 		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
167 		.setLayoutCount = 1,
168 		.pSetLayouts = &device->meta_state.buffer.fill_ds_layout,
169 		.pushConstantRangeCount = 1,
170 		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 4},
171 	};
172 
173 	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
174 					  &fill_pl_create_info,
175 					  &device->meta_state.alloc,
176 					  &device->meta_state.buffer.fill_p_layout);
177 	if (result != VK_SUCCESS)
178 		goto fail;
179 
180 	VkPipelineLayoutCreateInfo copy_pl_create_info = {
181 		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
182 		.setLayoutCount = 1,
183 		.pSetLayouts = &device->meta_state.buffer.copy_ds_layout,
184 		.pushConstantRangeCount = 0,
185 	};
186 
187 	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
188 					  &copy_pl_create_info,
189 					  &device->meta_state.alloc,
190 					  &device->meta_state.buffer.copy_p_layout);
191 	if (result != VK_SUCCESS)
192 		goto fail;
193 
194 	VkPipelineShaderStageCreateInfo fill_pipeline_shader_stage = {
195 		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
196 		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
197 		.module = radv_shader_module_to_handle(&fill_cs),
198 		.pName = "main",
199 		.pSpecializationInfo = NULL,
200 	};
201 
202 	VkComputePipelineCreateInfo fill_vk_pipeline_info = {
203 		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
204 		.stage = fill_pipeline_shader_stage,
205 		.flags = 0,
206 		.layout = device->meta_state.buffer.fill_p_layout,
207 	};
208 
209 	result = radv_CreateComputePipelines(radv_device_to_handle(device),
210 					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
211 					     1, &fill_vk_pipeline_info, NULL,
212 					     &device->meta_state.buffer.fill_pipeline);
213 	if (result != VK_SUCCESS)
214 		goto fail;
215 
216 	VkPipelineShaderStageCreateInfo copy_pipeline_shader_stage = {
217 		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
218 		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
219 		.module = radv_shader_module_to_handle(&copy_cs),
220 		.pName = "main",
221 		.pSpecializationInfo = NULL,
222 	};
223 
224 	VkComputePipelineCreateInfo copy_vk_pipeline_info = {
225 		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
226 		.stage = copy_pipeline_shader_stage,
227 		.flags = 0,
228 		.layout = device->meta_state.buffer.copy_p_layout,
229 	};
230 
231 	result = radv_CreateComputePipelines(radv_device_to_handle(device),
232 					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
233 					     1, &copy_vk_pipeline_info, NULL,
234 					     &device->meta_state.buffer.copy_pipeline);
235 	if (result != VK_SUCCESS)
236 		goto fail;
237 
238 	ralloc_free(fill_cs.nir);
239 	ralloc_free(copy_cs.nir);
240 	return VK_SUCCESS;
241 fail:
242 	radv_device_finish_meta_buffer_state(device);
243 	ralloc_free(fill_cs.nir);
244 	ralloc_free(copy_cs.nir);
245 	return result;
246 }
247 
radv_device_finish_meta_buffer_state(struct radv_device * device)248 void radv_device_finish_meta_buffer_state(struct radv_device *device)
249 {
250 	struct radv_meta_state *state = &device->meta_state;
251 
252 	radv_DestroyPipeline(radv_device_to_handle(device),
253 			     state->buffer.copy_pipeline, &state->alloc);
254 	radv_DestroyPipeline(radv_device_to_handle(device),
255 			     state->buffer.fill_pipeline, &state->alloc);
256 	radv_DestroyPipelineLayout(radv_device_to_handle(device),
257 				   state->buffer.copy_p_layout, &state->alloc);
258 	radv_DestroyPipelineLayout(radv_device_to_handle(device),
259 				   state->buffer.fill_p_layout, &state->alloc);
260 	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
261 					state->buffer.copy_ds_layout,
262 					&state->alloc);
263 	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
264 					state->buffer.fill_ds_layout,
265 					&state->alloc);
266 }
267 
fill_buffer_shader(struct radv_cmd_buffer * cmd_buffer,struct radeon_winsys_bo * bo,uint64_t offset,uint64_t size,uint32_t value)268 static void fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
269 			       struct radeon_winsys_bo *bo,
270 			       uint64_t offset, uint64_t size, uint32_t value)
271 {
272 	struct radv_device *device = cmd_buffer->device;
273 	uint64_t block_count = round_up_u64(size, 1024);
274 	struct radv_meta_saved_state saved_state;
275 
276 	radv_meta_save(&saved_state, cmd_buffer,
277 		       RADV_META_SAVE_COMPUTE_PIPELINE |
278 		       RADV_META_SAVE_CONSTANTS |
279 		       RADV_META_SAVE_DESCRIPTORS);
280 
281 	struct radv_buffer dst_buffer = {
282 		.bo = bo,
283 		.offset = offset,
284 		.size = size
285 	};
286 
287 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
288 			     VK_PIPELINE_BIND_POINT_COMPUTE,
289 			     device->meta_state.buffer.fill_pipeline);
290 
291 	radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
292 			              device->meta_state.buffer.fill_p_layout,
293 				      0, /* set */
294 				      1, /* descriptorWriteCount */
295 				      (VkWriteDescriptorSet[]) {
296 				              {
297 				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
298 				                      .dstBinding = 0,
299 				                      .dstArrayElement = 0,
300 				                      .descriptorCount = 1,
301 				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
302 				                      .pBufferInfo = &(VkDescriptorBufferInfo) {
303 				                              .buffer = radv_buffer_to_handle(&dst_buffer),
304 				                              .offset = 0,
305 				                              .range = size
306 				                      }
307 				              }
308 				      });
309 
310 	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
311 			      device->meta_state.buffer.fill_p_layout,
312 			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 4,
313 			      &value);
314 
315 	radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
316 
317 	radv_meta_restore(&saved_state, cmd_buffer);
318 }
319 
copy_buffer_shader(struct radv_cmd_buffer * cmd_buffer,struct radeon_winsys_bo * src_bo,struct radeon_winsys_bo * dst_bo,uint64_t src_offset,uint64_t dst_offset,uint64_t size)320 static void copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
321 			       struct radeon_winsys_bo *src_bo,
322 			       struct radeon_winsys_bo *dst_bo,
323 			       uint64_t src_offset, uint64_t dst_offset,
324 			       uint64_t size)
325 {
326 	struct radv_device *device = cmd_buffer->device;
327 	uint64_t block_count = round_up_u64(size, 1024);
328 	struct radv_meta_saved_state saved_state;
329 
330 	radv_meta_save(&saved_state, cmd_buffer,
331 		       RADV_META_SAVE_COMPUTE_PIPELINE |
332 		       RADV_META_SAVE_DESCRIPTORS);
333 
334 	struct radv_buffer dst_buffer = {
335 		.bo = dst_bo,
336 		.offset = dst_offset,
337 		.size = size
338 	};
339 
340 	struct radv_buffer src_buffer = {
341 		.bo = src_bo,
342 		.offset = src_offset,
343 		.size = size
344 	};
345 
346 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
347 			     VK_PIPELINE_BIND_POINT_COMPUTE,
348 			     device->meta_state.buffer.copy_pipeline);
349 
350 	radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
351 			              device->meta_state.buffer.copy_p_layout,
352 				      0, /* set */
353 				      2, /* descriptorWriteCount */
354 				      (VkWriteDescriptorSet[]) {
355 				              {
356 				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
357 				                      .dstBinding = 0,
358 				                      .dstArrayElement = 0,
359 				                      .descriptorCount = 1,
360 				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
361 				                      .pBufferInfo = &(VkDescriptorBufferInfo) {
362 				                              .buffer = radv_buffer_to_handle(&dst_buffer),
363 				                              .offset = 0,
364 				                              .range = size
365 				                      }
366 				              },
367 				              {
368 				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
369 				                      .dstBinding = 1,
370 				                      .dstArrayElement = 0,
371 				                      .descriptorCount = 1,
372 				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
373 				                      .pBufferInfo = &(VkDescriptorBufferInfo) {
374 				                              .buffer = radv_buffer_to_handle(&src_buffer),
375 				                              .offset = 0,
376 				                              .range = size
377 				                      }
378 				              }
379 				      });
380 
381 	radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
382 
383 	radv_meta_restore(&saved_state, cmd_buffer);
384 }
385 
386 
radv_fill_buffer(struct radv_cmd_buffer * cmd_buffer,struct radeon_winsys_bo * bo,uint64_t offset,uint64_t size,uint32_t value)387 uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
388 		      struct radeon_winsys_bo *bo,
389 		      uint64_t offset, uint64_t size, uint32_t value)
390 {
391 	uint32_t flush_bits = 0;
392 
393 	assert(!(offset & 3));
394 	assert(!(size & 3));
395 
396 	if (size >= RADV_BUFFER_OPS_CS_THRESHOLD) {
397 		fill_buffer_shader(cmd_buffer, bo, offset, size, value);
398 		flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
399 			     RADV_CMD_FLAG_INV_VCACHE |
400 			     RADV_CMD_FLAG_WB_L2;
401 	} else if (size) {
402 		uint64_t va = radv_buffer_get_va(bo);
403 		va += offset;
404 		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo);
405 		si_cp_dma_clear_buffer(cmd_buffer, va, size, value);
406 	}
407 
408 	return flush_bits;
409 }
410 
411 static
radv_copy_buffer(struct radv_cmd_buffer * cmd_buffer,struct radeon_winsys_bo * src_bo,struct radeon_winsys_bo * dst_bo,uint64_t src_offset,uint64_t dst_offset,uint64_t size)412 void radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
413 		      struct radeon_winsys_bo *src_bo,
414 		      struct radeon_winsys_bo *dst_bo,
415 		      uint64_t src_offset, uint64_t dst_offset,
416 		      uint64_t size)
417 {
418 	if (size >= RADV_BUFFER_OPS_CS_THRESHOLD && !(size & 3) && !(src_offset & 3) && !(dst_offset & 3))
419 		copy_buffer_shader(cmd_buffer, src_bo, dst_bo,
420 				   src_offset, dst_offset, size);
421 	else if (size) {
422 		uint64_t src_va = radv_buffer_get_va(src_bo);
423 		uint64_t dst_va = radv_buffer_get_va(dst_bo);
424 		src_va += src_offset;
425 		dst_va += dst_offset;
426 
427 		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, src_bo);
428 		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_bo);
429 
430 		si_cp_dma_buffer_copy(cmd_buffer, src_va, dst_va, size);
431 	}
432 }
433 
radv_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize fillSize,uint32_t data)434 void radv_CmdFillBuffer(
435     VkCommandBuffer                             commandBuffer,
436     VkBuffer                                    dstBuffer,
437     VkDeviceSize                                dstOffset,
438     VkDeviceSize                                fillSize,
439     uint32_t                                    data)
440 {
441 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
442 	RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
443 
444 	if (fillSize == VK_WHOLE_SIZE)
445 		fillSize = (dst_buffer->size - dstOffset) & ~3ull;
446 
447 	radv_fill_buffer(cmd_buffer, dst_buffer->bo, dst_buffer->offset + dstOffset,
448 			 fillSize, data);
449 }
450 
451 static void
copy_buffer(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer * src_buffer,struct radv_buffer * dst_buffer,const VkBufferCopy2KHR * region)452 copy_buffer(struct radv_cmd_buffer *cmd_buffer,
453 	    struct radv_buffer *src_buffer,
454 	    struct radv_buffer *dst_buffer,
455 	    const VkBufferCopy2KHR *region)
456 {
457 	bool old_predicating;
458 
459 	/* VK_EXT_conditional_rendering says that copy commands should not be
460 	 * affected by conditional rendering.
461 	 */
462 	old_predicating = cmd_buffer->state.predicating;
463 	cmd_buffer->state.predicating = false;
464 
465 	radv_copy_buffer(cmd_buffer,
466 			 src_buffer->bo,
467 			 dst_buffer->bo,
468 			 src_buffer->offset + region->srcOffset,
469 			 dst_buffer->offset + region->dstOffset,
470 			 region->size);
471 
472 	/* Restore conditional rendering. */
473 	cmd_buffer->state.predicating = old_predicating;
474 }
475 
radv_CmdCopyBuffer(VkCommandBuffer commandBuffer,VkBuffer srcBuffer,VkBuffer destBuffer,uint32_t regionCount,const VkBufferCopy * pRegions)476 void radv_CmdCopyBuffer(
477 	VkCommandBuffer                             commandBuffer,
478 	VkBuffer                                    srcBuffer,
479 	VkBuffer                                    destBuffer,
480 	uint32_t                                    regionCount,
481 	const VkBufferCopy*                         pRegions)
482 {
483 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
484 	RADV_FROM_HANDLE(radv_buffer, src_buffer, srcBuffer);
485 	RADV_FROM_HANDLE(radv_buffer, dst_buffer, destBuffer);
486 
487 	for (unsigned r = 0; r < regionCount; r++) {
488 		VkBufferCopy2KHR copy = {
489 			.sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2_KHR,
490 			.srcOffset = pRegions[r].srcOffset,
491 			.dstOffset = pRegions[r].dstOffset,
492 			.size = pRegions[r].size,
493 		};
494 
495 		copy_buffer(cmd_buffer, src_buffer, dst_buffer, &copy);
496 	}
497 }
498 
radv_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2KHR * pCopyBufferInfo)499 void radv_CmdCopyBuffer2KHR(
500 	VkCommandBuffer                             commandBuffer,
501 	const VkCopyBufferInfo2KHR*                 pCopyBufferInfo)
502 {
503 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
504 	RADV_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
505 	RADV_FROM_HANDLE(radv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
506 
507 	for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
508 		copy_buffer(cmd_buffer, src_buffer, dst_buffer,
509 			    &pCopyBufferInfo->pRegions[r]);
510 	}
511 }
512 
radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)513 void radv_CmdUpdateBuffer(
514 	VkCommandBuffer                             commandBuffer,
515 	VkBuffer                                    dstBuffer,
516 	VkDeviceSize                                dstOffset,
517 	VkDeviceSize                                dataSize,
518 	const void*                                 pData)
519 {
520 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
521 	RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
522 	bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
523 	uint64_t words = dataSize / 4;
524 	uint64_t va = radv_buffer_get_va(dst_buffer->bo);
525 	va += dstOffset + dst_buffer->offset;
526 
527 	assert(!(dataSize & 3));
528 	assert(!(va & 3));
529 
530 	if (!dataSize)
531 		return;
532 
533 	if (dataSize < RADV_BUFFER_UPDATE_THRESHOLD) {
534 		si_emit_cache_flush(cmd_buffer);
535 
536 		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_buffer->bo);
537 
538 		radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4);
539 
540 		radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0));
541 		radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ?
542 		                                V_370_MEM : V_370_MEM_GRBM) |
543 		                            S_370_WR_CONFIRM(1) |
544 		                            S_370_ENGINE_SEL(V_370_ME));
545 		radeon_emit(cmd_buffer->cs, va);
546 		radeon_emit(cmd_buffer->cs, va >> 32);
547 		radeon_emit_array(cmd_buffer->cs, pData, words);
548 
549 		if (unlikely(cmd_buffer->device->trace_bo))
550 			radv_cmd_buffer_trace_emit(cmd_buffer);
551 	} else {
552 		uint32_t buf_offset;
553 		radv_cmd_buffer_upload_data(cmd_buffer, dataSize, 32, pData, &buf_offset);
554 		radv_copy_buffer(cmd_buffer, cmd_buffer->upload.upload_bo, dst_buffer->bo,
555 				 buf_offset, dstOffset + dst_buffer->offset, dataSize);
556 	}
557 }
558