1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "radv_debug.h"
25 #include "radv_meta.h"
26 #include "radv_private.h"
27 #include "nir/nir_builder.h"
28 
29 #include "util/format_rgb9e5.h"
30 #include "vk_format.h"
31 
32 enum {
33 	DEPTH_CLEAR_SLOW,
34 	DEPTH_CLEAR_FAST_EXPCLEAR,
35 	DEPTH_CLEAR_FAST_NO_EXPCLEAR
36 };
37 
38 static void
build_color_shaders(struct nir_shader ** out_vs,struct nir_shader ** out_fs,uint32_t frag_output)39 build_color_shaders(struct nir_shader **out_vs,
40                     struct nir_shader **out_fs,
41                     uint32_t frag_output)
42 {
43 	nir_builder vs_b;
44 	nir_builder fs_b;
45 
46 	nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
47 	nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
48 
49 	vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs");
50 	fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs");
51 
52 	const struct glsl_type *position_type = glsl_vec4_type();
53 	const struct glsl_type *color_type = glsl_vec4_type();
54 
55 	nir_variable *vs_out_pos =
56 		nir_variable_create(vs_b.shader, nir_var_shader_out, position_type,
57 				    "gl_Position");
58 	vs_out_pos->data.location = VARYING_SLOT_POS;
59 
60 	nir_intrinsic_instr *in_color_load = nir_intrinsic_instr_create(fs_b.shader, nir_intrinsic_load_push_constant);
61 	nir_intrinsic_set_base(in_color_load, 0);
62 	nir_intrinsic_set_range(in_color_load, 16);
63 	in_color_load->src[0] = nir_src_for_ssa(nir_imm_int(&fs_b, 0));
64 	in_color_load->num_components = 4;
65 	nir_ssa_dest_init(&in_color_load->instr, &in_color_load->dest, 4, 32, "clear color");
66 	nir_builder_instr_insert(&fs_b, &in_color_load->instr);
67 
68 	nir_variable *fs_out_color =
69 		nir_variable_create(fs_b.shader, nir_var_shader_out, color_type,
70 				    "f_color");
71 	fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output;
72 
73 	nir_store_var(&fs_b, fs_out_color, &in_color_load->dest.ssa, 0xf);
74 
75 	nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&vs_b);
76 	nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
77 
78 	const struct glsl_type *layer_type = glsl_int_type();
79 	nir_variable *vs_out_layer =
80 		nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type,
81 				    "v_layer");
82 	vs_out_layer->data.location = VARYING_SLOT_LAYER;
83 	vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
84 	nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
85 	nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
86 
87 	nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
88 	nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
89 
90 	*out_vs = vs_b.shader;
91 	*out_fs = fs_b.shader;
92 }
93 
94 static VkResult
create_pipeline(struct radv_device * device,struct radv_render_pass * render_pass,uint32_t samples,struct nir_shader * vs_nir,struct nir_shader * fs_nir,const VkPipelineVertexInputStateCreateInfo * vi_state,const VkPipelineDepthStencilStateCreateInfo * ds_state,const VkPipelineColorBlendStateCreateInfo * cb_state,const VkPipelineLayout layout,const struct radv_graphics_pipeline_create_info * extra,const VkAllocationCallbacks * alloc,VkPipeline * pipeline)95 create_pipeline(struct radv_device *device,
96 		struct radv_render_pass *render_pass,
97 		uint32_t samples,
98                 struct nir_shader *vs_nir,
99                 struct nir_shader *fs_nir,
100                 const VkPipelineVertexInputStateCreateInfo *vi_state,
101                 const VkPipelineDepthStencilStateCreateInfo *ds_state,
102                 const VkPipelineColorBlendStateCreateInfo *cb_state,
103 		const VkPipelineLayout layout,
104 		const struct radv_graphics_pipeline_create_info *extra,
105                 const VkAllocationCallbacks *alloc,
106 		VkPipeline *pipeline)
107 {
108 	VkDevice device_h = radv_device_to_handle(device);
109 	VkResult result;
110 
111 	struct radv_shader_module vs_m = { .nir = vs_nir };
112 	struct radv_shader_module fs_m = { .nir = fs_nir };
113 
114 	result = radv_graphics_pipeline_create(device_h,
115 					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
116 					       &(VkGraphicsPipelineCreateInfo) {
117 						       .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
118 							       .stageCount = fs_nir ? 2 : 1,
119 							       .pStages = (VkPipelineShaderStageCreateInfo[]) {
120 							       {
121 								       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
122 								       .stage = VK_SHADER_STAGE_VERTEX_BIT,
123 								       .module = radv_shader_module_to_handle(&vs_m),
124 								       .pName = "main",
125 							       },
126 							       {
127 								       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
128 								       .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
129 								       .module = radv_shader_module_to_handle(&fs_m),
130 								       .pName = "main",
131 							       },
132 						       },
133 							       .pVertexInputState = vi_state,
134 									.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
135 							       .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
136 							       .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
137 							       .primitiveRestartEnable = false,
138 						       },
139 									.pViewportState = &(VkPipelineViewportStateCreateInfo) {
140 							       .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
141 							       .viewportCount = 1,
142 							       .scissorCount = 1,
143 						       },
144 										 .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
145 							       .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
146 							       .rasterizerDiscardEnable = false,
147 							       .polygonMode = VK_POLYGON_MODE_FILL,
148 							       .cullMode = VK_CULL_MODE_NONE,
149 							       .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
150 							       .depthBiasEnable = false,
151 						       },
152 											  .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
153 							       .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
154 							       .rasterizationSamples = samples,
155 							       .sampleShadingEnable = false,
156 							       .pSampleMask = NULL,
157 							       .alphaToCoverageEnable = false,
158 							       .alphaToOneEnable = false,
159 						       },
160 												   .pDepthStencilState = ds_state,
161 													    .pColorBlendState = cb_state,
162 													    .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
163 							       /* The meta clear pipeline declares all state as dynamic.
164 								* As a consequence, vkCmdBindPipeline writes no dynamic state
165 								* to the cmd buffer. Therefore, at the end of the meta clear,
166 								* we need only restore dynamic state was vkCmdSet.
167 								*/
168 							       .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
169 							       .dynamicStateCount = 8,
170 							       .pDynamicStates = (VkDynamicState[]) {
171 								       /* Everything except stencil write mask */
172 								       VK_DYNAMIC_STATE_VIEWPORT,
173 								       VK_DYNAMIC_STATE_SCISSOR,
174 								       VK_DYNAMIC_STATE_LINE_WIDTH,
175 								       VK_DYNAMIC_STATE_DEPTH_BIAS,
176 								       VK_DYNAMIC_STATE_BLEND_CONSTANTS,
177 								       VK_DYNAMIC_STATE_DEPTH_BOUNDS,
178 								       VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
179 								       VK_DYNAMIC_STATE_STENCIL_REFERENCE,
180 							       },
181 						       },
182 						    .layout = layout,
183 						    .flags = 0,
184 						    .renderPass = radv_render_pass_to_handle(render_pass),
185 						    .subpass = 0,
186 						},
187 					       extra,
188 					       alloc,
189 					       pipeline);
190 
191 	ralloc_free(vs_nir);
192 	ralloc_free(fs_nir);
193 
194 	return result;
195 }
196 
197 static VkResult
create_color_renderpass(struct radv_device * device,VkFormat vk_format,uint32_t samples,VkRenderPass * pass)198 create_color_renderpass(struct radv_device *device,
199 			VkFormat vk_format,
200 			uint32_t samples,
201 			VkRenderPass *pass)
202 {
203 	mtx_lock(&device->meta_state.mtx);
204 	if (*pass) {
205 		mtx_unlock (&device->meta_state.mtx);
206 		return VK_SUCCESS;
207 	}
208 
209 	VkResult result = radv_CreateRenderPass(radv_device_to_handle(device),
210 				       &(VkRenderPassCreateInfo) {
211 					       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
212 						       .attachmentCount = 1,
213 						       .pAttachments = &(VkAttachmentDescription) {
214 						       .format = vk_format,
215 						       .samples = samples,
216 						       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
217 						       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
218 						       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
219 						       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
220 					       },
221 						       .subpassCount = 1,
222 								.pSubpasses = &(VkSubpassDescription) {
223 						       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
224 						       .inputAttachmentCount = 0,
225 						       .colorAttachmentCount = 1,
226 						       .pColorAttachments = &(VkAttachmentReference) {
227 							       .attachment = 0,
228 							       .layout = VK_IMAGE_LAYOUT_GENERAL,
229 						       },
230 						       .pResolveAttachments = NULL,
231 						       .pDepthStencilAttachment = &(VkAttachmentReference) {
232 							       .attachment = VK_ATTACHMENT_UNUSED,
233 							       .layout = VK_IMAGE_LAYOUT_GENERAL,
234 						       },
235 						       .preserveAttachmentCount = 0,
236 						       .pPreserveAttachments = NULL,
237 					       },
238 							.dependencyCount = 2,
239 							.pDependencies = (VkSubpassDependency[]) {
240 								{
241 									.srcSubpass = VK_SUBPASS_EXTERNAL,
242 									.dstSubpass = 0,
243 									.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
244 									.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
245 									.srcAccessMask = 0,
246 									.dstAccessMask = 0,
247 									.dependencyFlags = 0
248 								},
249 								{
250 									.srcSubpass = 0,
251 									.dstSubpass = VK_SUBPASS_EXTERNAL,
252 									.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
253 									.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
254 									.srcAccessMask = 0,
255 									.dstAccessMask = 0,
256 									.dependencyFlags = 0
257 								}
258 							},
259 									 }, &device->meta_state.alloc, pass);
260 	mtx_unlock(&device->meta_state.mtx);
261 	return result;
262 }
263 
264 static VkResult
create_color_pipeline(struct radv_device * device,uint32_t samples,uint32_t frag_output,VkPipeline * pipeline,VkRenderPass pass)265 create_color_pipeline(struct radv_device *device,
266 		      uint32_t samples,
267                       uint32_t frag_output,
268 		      VkPipeline *pipeline,
269 		      VkRenderPass pass)
270 {
271 	struct nir_shader *vs_nir;
272 	struct nir_shader *fs_nir;
273 	VkResult result;
274 
275 	mtx_lock(&device->meta_state.mtx);
276 	if (*pipeline) {
277 		mtx_unlock(&device->meta_state.mtx);
278 		return VK_SUCCESS;
279 	}
280 
281 	build_color_shaders(&vs_nir, &fs_nir, frag_output);
282 
283 	const VkPipelineVertexInputStateCreateInfo vi_state = {
284 		.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
285 		.vertexBindingDescriptionCount = 0,
286 		.vertexAttributeDescriptionCount = 0,
287 	};
288 
289 	const VkPipelineDepthStencilStateCreateInfo ds_state = {
290 		.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
291 		.depthTestEnable = false,
292 		.depthWriteEnable = false,
293 		.depthBoundsTestEnable = false,
294 		.stencilTestEnable = false,
295 	};
296 
297 	VkPipelineColorBlendAttachmentState blend_attachment_state[MAX_RTS] = { 0 };
298 	blend_attachment_state[frag_output] = (VkPipelineColorBlendAttachmentState) {
299 		.blendEnable = false,
300 		.colorWriteMask = VK_COLOR_COMPONENT_A_BIT |
301 		VK_COLOR_COMPONENT_R_BIT |
302 		VK_COLOR_COMPONENT_G_BIT |
303 		VK_COLOR_COMPONENT_B_BIT,
304 	};
305 
306 	const VkPipelineColorBlendStateCreateInfo cb_state = {
307 		.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
308 		.logicOpEnable = false,
309 		.attachmentCount = MAX_RTS,
310 		.pAttachments = blend_attachment_state
311 	};
312 
313 
314 	struct radv_graphics_pipeline_create_info extra = {
315 		.use_rectlist = true,
316 	};
317 	result = create_pipeline(device, radv_render_pass_from_handle(pass),
318 				 samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
319 				 device->meta_state.clear_color_p_layout,
320 				 &extra, &device->meta_state.alloc, pipeline);
321 
322 	mtx_unlock(&device->meta_state.mtx);
323 	return result;
324 }
325 
326 static void
finish_meta_clear_htile_mask_state(struct radv_device * device)327 finish_meta_clear_htile_mask_state(struct radv_device *device)
328 {
329 	struct radv_meta_state *state = &device->meta_state;
330 
331 	radv_DestroyPipeline(radv_device_to_handle(device),
332 			     state->clear_htile_mask_pipeline,
333 			     &state->alloc);
334 	radv_DestroyPipelineLayout(radv_device_to_handle(device),
335 				   state->clear_htile_mask_p_layout,
336 				   &state->alloc);
337 	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
338 					state->clear_htile_mask_ds_layout,
339 					&state->alloc);
340 }
341 
342 void
radv_device_finish_meta_clear_state(struct radv_device * device)343 radv_device_finish_meta_clear_state(struct radv_device *device)
344 {
345 	struct radv_meta_state *state = &device->meta_state;
346 
347 	for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) {
348 		for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) {
349 			radv_DestroyPipeline(radv_device_to_handle(device),
350 					     state->clear[i].color_pipelines[j],
351 					     &state->alloc);
352 			radv_DestroyRenderPass(radv_device_to_handle(device),
353 					       state->clear[i].render_pass[j],
354 					       &state->alloc);
355 		}
356 
357 		for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
358 			radv_DestroyPipeline(radv_device_to_handle(device),
359 					     state->clear[i].depth_only_pipeline[j],
360 					     &state->alloc);
361 			radv_DestroyPipeline(radv_device_to_handle(device),
362 					     state->clear[i].stencil_only_pipeline[j],
363 					     &state->alloc);
364 			radv_DestroyPipeline(radv_device_to_handle(device),
365 					     state->clear[i].depthstencil_pipeline[j],
366 					     &state->alloc);
367 
368 			radv_DestroyPipeline(radv_device_to_handle(device),
369 					     state->clear[i].depth_only_unrestricted_pipeline[j],
370 					     &state->alloc);
371 			radv_DestroyPipeline(radv_device_to_handle(device),
372 					     state->clear[i].stencil_only_unrestricted_pipeline[j],
373 					     &state->alloc);
374 			radv_DestroyPipeline(radv_device_to_handle(device),
375 					     state->clear[i].depthstencil_unrestricted_pipeline[j],
376 					     &state->alloc);
377 		}
378 		radv_DestroyRenderPass(radv_device_to_handle(device),
379 				      state->clear[i].depthstencil_rp,
380 				      &state->alloc);
381 	}
382 	radv_DestroyPipelineLayout(radv_device_to_handle(device),
383 				   state->clear_color_p_layout,
384 				   &state->alloc);
385 	radv_DestroyPipelineLayout(radv_device_to_handle(device),
386 				   state->clear_depth_p_layout,
387 				   &state->alloc);
388 	radv_DestroyPipelineLayout(radv_device_to_handle(device),
389 				   state->clear_depth_unrestricted_p_layout,
390 				   &state->alloc);
391 
392 	finish_meta_clear_htile_mask_state(device);
393 }
394 
395 static void
emit_color_clear(struct radv_cmd_buffer * cmd_buffer,const VkClearAttachment * clear_att,const VkClearRect * clear_rect,uint32_t view_mask)396 emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
397                  const VkClearAttachment *clear_att,
398                  const VkClearRect *clear_rect,
399                  uint32_t view_mask)
400 {
401 	struct radv_device *device = cmd_buffer->device;
402 	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
403 	const uint32_t subpass_att = clear_att->colorAttachment;
404 	const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
405 	const struct radv_image_view *iview = cmd_buffer->state.attachments ?
406 		cmd_buffer->state.attachments[pass_att].iview : NULL;
407 	uint32_t samples, samples_log2;
408 	VkFormat format;
409 	unsigned fs_key;
410 	VkClearColorValue clear_value = clear_att->clearValue.color;
411 	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
412 	VkPipeline pipeline;
413 
414 	/* When a framebuffer is bound to the current command buffer, get the
415 	 * number of samples from it. Otherwise, get the number of samples from
416 	 * the render pass because it's likely a secondary command buffer.
417 	 */
418 	if (iview) {
419 		samples = iview->image->info.samples;
420 		format = iview->vk_format;
421 	} else {
422 		samples = cmd_buffer->state.pass->attachments[pass_att].samples;
423 		format = cmd_buffer->state.pass->attachments[pass_att].format;
424 	}
425 
426 	samples_log2 = ffs(samples) - 1;
427 	fs_key = radv_format_meta_fs_key(format);
428 
429 	if (fs_key == -1) {
430 		radv_finishme("color clears incomplete");
431 		return;
432 	}
433 
434 	if (device->meta_state.clear[samples_log2].render_pass[fs_key] == VK_NULL_HANDLE) {
435 		VkResult ret = create_color_renderpass(device, radv_fs_key_format_exemplars[fs_key],
436 		                                       samples,
437 		                                       &device->meta_state.clear[samples_log2].render_pass[fs_key]);
438 		if (ret != VK_SUCCESS) {
439 			cmd_buffer->record_result = ret;
440 			return;
441 		}
442 	}
443 
444 	if (device->meta_state.clear[samples_log2].color_pipelines[fs_key] == VK_NULL_HANDLE) {
445 		VkResult ret = create_color_pipeline(device, samples, 0,
446 		                                     &device->meta_state.clear[samples_log2].color_pipelines[fs_key],
447 		                                     device->meta_state.clear[samples_log2].render_pass[fs_key]);
448 		if (ret != VK_SUCCESS) {
449 			cmd_buffer->record_result = ret;
450 			return;
451 		}
452 	}
453 
454 	pipeline = device->meta_state.clear[samples_log2].color_pipelines[fs_key];
455 	if (!pipeline) {
456 		radv_finishme("color clears incomplete");
457 		return;
458 	}
459 	assert(samples_log2 < ARRAY_SIZE(device->meta_state.clear));
460 	assert(pipeline);
461 	assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
462 	assert(clear_att->colorAttachment < subpass->color_count);
463 
464 	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
465 			      device->meta_state.clear_color_p_layout,
466 			      VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
467 			      &clear_value);
468 
469 	struct radv_subpass clear_subpass = {
470 		.color_count = 1,
471 		.color_attachments = (struct radv_subpass_attachment[]) {
472 			subpass->color_attachments[clear_att->colorAttachment]
473 		},
474 		.depth_stencil_attachment = NULL,
475 	};
476 
477 	radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass);
478 
479 	radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
480 			     pipeline);
481 
482 	radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
483 			.x = clear_rect->rect.offset.x,
484 			.y = clear_rect->rect.offset.y,
485 			.width = clear_rect->rect.extent.width,
486 			.height = clear_rect->rect.extent.height,
487 			.minDepth = 0.0f,
488 			.maxDepth = 1.0f
489 		});
490 
491 	radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
492 
493 	if (view_mask) {
494 		unsigned i;
495 		for_each_bit(i, view_mask)
496 			radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
497 	} else {
498 		radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
499 	}
500 
501 	radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
502 }
503 
504 
505 static void
build_depthstencil_shader(struct nir_shader ** out_vs,struct nir_shader ** out_fs,bool unrestricted)506 build_depthstencil_shader(struct nir_shader **out_vs,
507 			  struct nir_shader **out_fs,
508 			  bool unrestricted)
509 {
510 	nir_builder vs_b, fs_b;
511 
512 	nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
513 	nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
514 
515 	vs_b.shader->info.name = ralloc_strdup(vs_b.shader,
516 					       unrestricted ? "meta_clear_depthstencil_unrestricted_vs"
517 							    : "meta_clear_depthstencil_vs");
518 	fs_b.shader->info.name = ralloc_strdup(fs_b.shader,
519 					       unrestricted ? "meta_clear_depthstencil_unrestricted_fs"
520 							    : "meta_clear_depthstencil_fs");
521 	const struct glsl_type *position_out_type = glsl_vec4_type();
522 
523 	nir_variable *vs_out_pos =
524 		nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type,
525 				    "gl_Position");
526 	vs_out_pos->data.location = VARYING_SLOT_POS;
527 
528 	nir_ssa_def *z;
529 	if (unrestricted) {
530 		nir_intrinsic_instr *in_color_load = nir_intrinsic_instr_create(fs_b.shader, nir_intrinsic_load_push_constant);
531 		nir_intrinsic_set_base(in_color_load, 0);
532 		nir_intrinsic_set_range(in_color_load, 4);
533 		in_color_load->src[0] = nir_src_for_ssa(nir_imm_int(&fs_b, 0));
534 		in_color_load->num_components = 1;
535 		nir_ssa_dest_init(&in_color_load->instr, &in_color_load->dest, 1, 32, "depth value");
536 		nir_builder_instr_insert(&fs_b, &in_color_load->instr);
537 
538 		nir_variable *fs_out_depth =
539 			nir_variable_create(fs_b.shader, nir_var_shader_out,
540 					    glsl_int_type(), "f_depth");
541 		fs_out_depth->data.location = FRAG_RESULT_DEPTH;
542 		nir_store_var(&fs_b, fs_out_depth, &in_color_load->dest.ssa, 0x1);
543 
544 		z = nir_imm_float(&vs_b, 0.0);
545 	} else {
546 		nir_intrinsic_instr *in_color_load = nir_intrinsic_instr_create(vs_b.shader, nir_intrinsic_load_push_constant);
547 		nir_intrinsic_set_base(in_color_load, 0);
548 		nir_intrinsic_set_range(in_color_load, 4);
549 		in_color_load->src[0] = nir_src_for_ssa(nir_imm_int(&vs_b, 0));
550 		in_color_load->num_components = 1;
551 		nir_ssa_dest_init(&in_color_load->instr, &in_color_load->dest, 1, 32, "depth value");
552 		nir_builder_instr_insert(&vs_b, &in_color_load->instr);
553 
554 		z = &in_color_load->dest.ssa;
555 	}
556 
557 	nir_ssa_def *outvec = radv_meta_gen_rect_vertices_comp2(&vs_b, z);
558 	nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
559 
560 	const struct glsl_type *layer_type = glsl_int_type();
561 	nir_variable *vs_out_layer =
562 		nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type,
563 				    "v_layer");
564 	vs_out_layer->data.location = VARYING_SLOT_LAYER;
565 	vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
566 	nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
567 	nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
568 
569 	nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
570 	nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
571 
572 	*out_vs = vs_b.shader;
573 	*out_fs = fs_b.shader;
574 }
575 
576 static VkResult
create_depthstencil_renderpass(struct radv_device * device,uint32_t samples,VkRenderPass * render_pass)577 create_depthstencil_renderpass(struct radv_device *device,
578 			       uint32_t samples,
579 			       VkRenderPass *render_pass)
580 {
581 	mtx_lock(&device->meta_state.mtx);
582 	if (*render_pass) {
583 		mtx_unlock(&device->meta_state.mtx);
584 		return VK_SUCCESS;
585 	}
586 
587 	VkResult result = radv_CreateRenderPass(radv_device_to_handle(device),
588 				       &(VkRenderPassCreateInfo) {
589 					       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
590 						       .attachmentCount = 1,
591 						       .pAttachments = &(VkAttachmentDescription) {
592 						       .format = VK_FORMAT_D32_SFLOAT_S8_UINT,
593 						       .samples = samples,
594 						       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
595 						       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
596 						       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
597 						       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
598 					       },
599 						       .subpassCount = 1,
600 								.pSubpasses = &(VkSubpassDescription) {
601 						       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
602 						       .inputAttachmentCount = 0,
603 						       .colorAttachmentCount = 0,
604 						       .pColorAttachments = NULL,
605 						       .pResolveAttachments = NULL,
606 						       .pDepthStencilAttachment = &(VkAttachmentReference) {
607 							       .attachment = 0,
608 							       .layout = VK_IMAGE_LAYOUT_GENERAL,
609 						       },
610 						       .preserveAttachmentCount = 0,
611 						       .pPreserveAttachments = NULL,
612 					       },
613 							.dependencyCount = 2,
614 							.pDependencies = (VkSubpassDependency[]) {
615 								{
616 									.srcSubpass = VK_SUBPASS_EXTERNAL,
617 									.dstSubpass = 0,
618 									.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
619 									.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
620 									.srcAccessMask = 0,
621 									.dstAccessMask = 0,
622 									.dependencyFlags = 0
623 								},
624 								{
625 									.srcSubpass = 0,
626 									.dstSubpass = VK_SUBPASS_EXTERNAL,
627 									.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
628 									.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
629 									.srcAccessMask = 0,
630 									.dstAccessMask = 0,
631 									.dependencyFlags = 0
632 								}
633 							}
634 									 }, &device->meta_state.alloc, render_pass);
635 	mtx_unlock(&device->meta_state.mtx);
636 	return result;
637 }
638 
639 static VkResult
create_depthstencil_pipeline(struct radv_device * device,VkImageAspectFlags aspects,uint32_t samples,int index,bool unrestricted,VkPipeline * pipeline,VkRenderPass render_pass)640 create_depthstencil_pipeline(struct radv_device *device,
641                              VkImageAspectFlags aspects,
642 			     uint32_t samples,
643 			     int index,
644 			     bool unrestricted,
645 			     VkPipeline *pipeline,
646 			     VkRenderPass render_pass)
647 {
648 	struct nir_shader *vs_nir, *fs_nir;
649 	VkResult result;
650 
651 	mtx_lock(&device->meta_state.mtx);
652 	if (*pipeline) {
653 		mtx_unlock(&device->meta_state.mtx);
654 		return VK_SUCCESS;
655 	}
656 
657 	build_depthstencil_shader(&vs_nir, &fs_nir, unrestricted);
658 
659 	const VkPipelineVertexInputStateCreateInfo vi_state = {
660 		.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
661 		.vertexBindingDescriptionCount = 0,
662 		.vertexAttributeDescriptionCount = 0,
663 	};
664 
665 	const VkPipelineDepthStencilStateCreateInfo ds_state = {
666 		.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
667 		.depthTestEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
668 		.depthCompareOp = VK_COMPARE_OP_ALWAYS,
669 		.depthWriteEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
670 		.depthBoundsTestEnable = false,
671 		.stencilTestEnable = !!(aspects & VK_IMAGE_ASPECT_STENCIL_BIT),
672 		.front = {
673 			.passOp = VK_STENCIL_OP_REPLACE,
674 			.compareOp = VK_COMPARE_OP_ALWAYS,
675 			.writeMask = UINT32_MAX,
676 			.reference = 0, /* dynamic */
677 		},
678 		.back = { 0 /* dont care */ },
679 	};
680 
681 	const VkPipelineColorBlendStateCreateInfo cb_state = {
682 		.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
683 		.logicOpEnable = false,
684 		.attachmentCount = 0,
685 		.pAttachments = NULL,
686 	};
687 
688 	struct radv_graphics_pipeline_create_info extra = {
689 		.use_rectlist = true,
690 	};
691 
692 	if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
693 		extra.db_depth_clear = index == DEPTH_CLEAR_SLOW ? false : true;
694 		extra.db_depth_disable_expclear = index == DEPTH_CLEAR_FAST_NO_EXPCLEAR ? true : false;
695 	}
696 	if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
697 		extra.db_stencil_clear = index == DEPTH_CLEAR_SLOW ? false : true;
698 		extra.db_stencil_disable_expclear = index == DEPTH_CLEAR_FAST_NO_EXPCLEAR ? true : false;
699 	}
700 	result = create_pipeline(device, radv_render_pass_from_handle(render_pass),
701 				 samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
702 				 device->meta_state.clear_depth_p_layout,
703 				 &extra, &device->meta_state.alloc, pipeline);
704 
705 	mtx_unlock(&device->meta_state.mtx);
706 	return result;
707 }
708 
depth_view_can_fast_clear(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,VkImageAspectFlags aspects,VkImageLayout layout,bool in_render_loop,const VkClearRect * clear_rect,VkClearDepthStencilValue clear_value)709 static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer,
710 				      const struct radv_image_view *iview,
711 				      VkImageAspectFlags aspects,
712 				      VkImageLayout layout,
713 				      bool in_render_loop,
714 				      const VkClearRect *clear_rect,
715 				      VkClearDepthStencilValue clear_value)
716 {
717 	if (!iview)
718 		return false;
719 
720 	uint32_t queue_mask = radv_image_queue_family_mask(iview->image,
721 	                                                   cmd_buffer->queue_family_index,
722 	                                                   cmd_buffer->queue_family_index);
723 	if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
724 	    clear_rect->rect.extent.width != iview->extent.width ||
725 	    clear_rect->rect.extent.height != iview->extent.height)
726 		return false;
727 	if (radv_image_is_tc_compat_htile(iview->image) &&
728 	    (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && clear_value.depth != 0.0 &&
729 	      clear_value.depth != 1.0) ||
730 	     ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && clear_value.stencil != 0)))
731 		return false;
732 	if (radv_image_has_htile(iview->image) &&
733 	    iview->base_mip == 0 &&
734 	    iview->base_layer == 0 &&
735 	    iview->layer_count == iview->image->info.array_size &&
736 	    radv_layout_is_htile_compressed(cmd_buffer->device, iview->image, layout, in_render_loop, queue_mask) &&
737 	    radv_image_extent_compare(iview->image, &iview->extent))
738 		return true;
739 	return false;
740 }
741 
742 static VkPipeline
pick_depthstencil_pipeline(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_state * meta_state,const struct radv_image_view * iview,int samples_log2,VkImageAspectFlags aspects,VkImageLayout layout,bool in_render_loop,const VkClearRect * clear_rect,VkClearDepthStencilValue clear_value)743 pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer,
744 			   struct radv_meta_state *meta_state,
745 			   const struct radv_image_view *iview,
746 			   int samples_log2,
747 			   VkImageAspectFlags aspects,
748 			   VkImageLayout layout,
749 			   bool in_render_loop,
750 			   const VkClearRect *clear_rect,
751 			   VkClearDepthStencilValue clear_value)
752 {
753 	bool fast = depth_view_can_fast_clear(cmd_buffer, iview, aspects, layout,
754 	                                      in_render_loop, clear_rect, clear_value);
755 	bool unrestricted = cmd_buffer->device->enabled_extensions.EXT_depth_range_unrestricted;
756 	int index = DEPTH_CLEAR_SLOW;
757 	VkPipeline *pipeline;
758 
759 	if (fast) {
760 		/* we don't know the previous clear values, so we always have
761 		 * the NO_EXPCLEAR path */
762 		index = DEPTH_CLEAR_FAST_NO_EXPCLEAR;
763 	}
764 
765 	switch (aspects) {
766 	case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
767 		pipeline = unrestricted ?
768 			   &meta_state->clear[samples_log2].depthstencil_unrestricted_pipeline[index] :
769 			   &meta_state->clear[samples_log2].depthstencil_pipeline[index];
770 		break;
771 	case VK_IMAGE_ASPECT_DEPTH_BIT:
772 		pipeline = unrestricted ?
773 			   &meta_state->clear[samples_log2].depth_only_unrestricted_pipeline[index] :
774 			   &meta_state->clear[samples_log2].depth_only_pipeline[index];
775 		break;
776 	case VK_IMAGE_ASPECT_STENCIL_BIT:
777 		pipeline = unrestricted ?
778 			   &meta_state->clear[samples_log2].stencil_only_unrestricted_pipeline[index] :
779 			   &meta_state->clear[samples_log2].stencil_only_pipeline[index];
780 		break;
781 	default:
782 		unreachable("expected depth or stencil aspect");
783 	}
784 
785 	if (cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp == VK_NULL_HANDLE) {
786 		VkResult ret = create_depthstencil_renderpass(cmd_buffer->device, 1u << samples_log2,
787 		                                              &cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp);
788 		if (ret != VK_SUCCESS) {
789 			cmd_buffer->record_result = ret;
790 			return VK_NULL_HANDLE;
791 		}
792 	}
793 
794 	if (*pipeline == VK_NULL_HANDLE) {
795 		VkResult ret = create_depthstencil_pipeline(cmd_buffer->device, aspects, 1u << samples_log2, index, unrestricted,
796 		                                            pipeline, cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp);
797 		if (ret != VK_SUCCESS) {
798 			cmd_buffer->record_result = ret;
799 			return VK_NULL_HANDLE;
800 		}
801 	}
802 	return *pipeline;
803 }
804 
805 static void
emit_depthstencil_clear(struct radv_cmd_buffer * cmd_buffer,const VkClearAttachment * clear_att,const VkClearRect * clear_rect,struct radv_subpass_attachment * ds_att,uint32_t view_mask)806 emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
807                         const VkClearAttachment *clear_att,
808                         const VkClearRect *clear_rect,
809 			struct radv_subpass_attachment *ds_att,
810                         uint32_t view_mask)
811 {
812 	struct radv_device *device = cmd_buffer->device;
813 	struct radv_meta_state *meta_state = &device->meta_state;
814 	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
815 	const uint32_t pass_att = ds_att->attachment;
816 	VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
817 	VkImageAspectFlags aspects = clear_att->aspectMask;
818 	const struct radv_image_view *iview = cmd_buffer->state.attachments ?
819 		cmd_buffer->state.attachments[pass_att].iview : NULL;
820 	uint32_t samples, samples_log2;
821 	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
822 
823 	/* When a framebuffer is bound to the current command buffer, get the
824 	 * number of samples from it. Otherwise, get the number of samples from
825 	 * the render pass because it's likely a secondary command buffer.
826 	 */
827 	if (iview) {
828 		samples = iview->image->info.samples;
829 	} else {
830 		samples = cmd_buffer->state.pass->attachments[pass_att].samples;
831 	}
832 
833 	samples_log2 = ffs(samples) - 1;
834 
835 	assert(pass_att != VK_ATTACHMENT_UNUSED);
836 
837 	if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
838 		clear_value.depth = 1.0f;
839 
840 	if (cmd_buffer->device->enabled_extensions.EXT_depth_range_unrestricted) {
841 		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
842 				      device->meta_state.clear_depth_unrestricted_p_layout,
843 				      VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4,
844 				      &clear_value.depth);
845 	} else {
846 		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
847 				      device->meta_state.clear_depth_p_layout,
848 				      VK_SHADER_STAGE_VERTEX_BIT, 0, 4,
849 				      &clear_value.depth);
850 	}
851 
852 	uint32_t prev_reference = cmd_buffer->state.dynamic.stencil_reference.front;
853 	if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
854 		radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT,
855 						  clear_value.stencil);
856 	}
857 
858 	VkPipeline pipeline = pick_depthstencil_pipeline(cmd_buffer,
859 							 meta_state,
860 							 iview,
861 							 samples_log2,
862 							 aspects,
863 							 ds_att->layout,
864 							 ds_att->in_render_loop,
865 							 clear_rect,
866 							 clear_value);
867 	if (!pipeline)
868 		return;
869 
870 	struct radv_subpass clear_subpass = {
871 		.color_count = 0,
872 		.color_attachments = NULL,
873 		.depth_stencil_attachment = ds_att,
874 	};
875 
876 	radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass);
877 
878 	radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
879 			     pipeline);
880 
881 	if (depth_view_can_fast_clear(cmd_buffer, iview, aspects,
882 	                              ds_att->layout, ds_att->in_render_loop,
883 	                              clear_rect, clear_value))
884 		radv_update_ds_clear_metadata(cmd_buffer, iview,
885 					      clear_value, aspects);
886 
887 	radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
888 			.x = clear_rect->rect.offset.x,
889 			.y = clear_rect->rect.offset.y,
890 			.width = clear_rect->rect.extent.width,
891 			.height = clear_rect->rect.extent.height,
892 			.minDepth = 0.0f,
893 			.maxDepth = 1.0f
894 		});
895 
896 	radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
897 
898 	if (view_mask) {
899 		unsigned i;
900 		for_each_bit(i, view_mask)
901 			radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
902 	} else {
903 		radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
904 	}
905 
906 	if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
907 		radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT,
908 						  prev_reference);
909 	}
910 
911 	radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
912 }
913 
914 static uint32_t
clear_htile_mask(struct radv_cmd_buffer * cmd_buffer,struct radeon_winsys_bo * bo,uint64_t offset,uint64_t size,uint32_t htile_value,uint32_t htile_mask)915 clear_htile_mask(struct radv_cmd_buffer *cmd_buffer,
916 		 struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size,
917 		 uint32_t htile_value, uint32_t htile_mask)
918 {
919 	struct radv_device *device = cmd_buffer->device;
920 	struct radv_meta_state *state = &device->meta_state;
921 	uint64_t block_count = round_up_u64(size, 1024);
922 	struct radv_meta_saved_state saved_state;
923 
924 	radv_meta_save(&saved_state, cmd_buffer,
925 		       RADV_META_SAVE_COMPUTE_PIPELINE |
926 		       RADV_META_SAVE_CONSTANTS |
927 		       RADV_META_SAVE_DESCRIPTORS);
928 
929 	struct radv_buffer dst_buffer = {
930 		.bo = bo,
931 		.offset = offset,
932 		.size = size
933 	};
934 
935 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
936 			     VK_PIPELINE_BIND_POINT_COMPUTE,
937 			     state->clear_htile_mask_pipeline);
938 
939 	radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
940 			              state->clear_htile_mask_p_layout,
941 				      0, /* set */
942 				      1, /* descriptorWriteCount */
943 				      (VkWriteDescriptorSet[]) {
944 				              {
945 				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
946 				                      .dstBinding = 0,
947 				                      .dstArrayElement = 0,
948 				                      .descriptorCount = 1,
949 				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
950 				                      .pBufferInfo = &(VkDescriptorBufferInfo) {
951 				                              .buffer = radv_buffer_to_handle(&dst_buffer),
952 				                              .offset = 0,
953 				                              .range = size
954 				                      }
955 				              }
956 				      });
957 
958 	const unsigned constants[2] = {
959 		htile_value & htile_mask,
960 		~htile_mask,
961 	};
962 
963 	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
964 			      state->clear_htile_mask_p_layout,
965 			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 8,
966 			      constants);
967 
968 	radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
969 
970 	radv_meta_restore(&saved_state, cmd_buffer);
971 
972 	return RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
973 	       RADV_CMD_FLAG_INV_VCACHE |
974 	       RADV_CMD_FLAG_WB_L2;
975 }
976 
977 static uint32_t
radv_get_htile_fast_clear_value(const struct radv_image * image,VkClearDepthStencilValue value)978 radv_get_htile_fast_clear_value(const struct radv_image *image,
979 				VkClearDepthStencilValue value)
980 {
981 	uint32_t clear_value;
982 
983 	if (!image->planes[0].surface.has_stencil) {
984 		clear_value = value.depth ? 0xfffffff0 : 0;
985 	} else {
986 		clear_value = value.depth ? 0xfffc0000 : 0;
987 	}
988 
989 	return clear_value;
990 }
991 
992 static uint32_t
radv_get_htile_mask(const struct radv_image * image,VkImageAspectFlags aspects)993 radv_get_htile_mask(const struct radv_image *image, VkImageAspectFlags aspects)
994 {
995 	uint32_t mask = 0;
996 
997 	if (!image->planes[0].surface.has_stencil) {
998 		/* All the HTILE buffer is used when there is no stencil. */
999 		mask = UINT32_MAX;
1000 	} else {
1001 		if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
1002 			mask |= 0xfffffc0f;
1003 		if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
1004 			mask |= 0x000003f0;
1005 	}
1006 
1007 	return mask;
1008 }
1009 
1010 static bool
radv_is_fast_clear_depth_allowed(VkClearDepthStencilValue value)1011 radv_is_fast_clear_depth_allowed(VkClearDepthStencilValue value)
1012 {
1013 	return value.depth == 1.0f || value.depth == 0.0f;
1014 }
1015 
1016 static bool
radv_is_fast_clear_stencil_allowed(VkClearDepthStencilValue value)1017 radv_is_fast_clear_stencil_allowed(VkClearDepthStencilValue value)
1018 {
1019 	return value.stencil == 0;
1020 }
1021 
1022 /**
1023  * Determine if the given image can be fast cleared.
1024  */
1025 static bool
radv_image_can_fast_clear(struct radv_device * device,struct radv_image * image)1026 radv_image_can_fast_clear(struct radv_device *device,  struct radv_image *image)
1027 {
1028 	if (device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
1029 		return false;
1030 
1031 	if (vk_format_is_color(image->vk_format)) {
1032 		if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image))
1033 			return false;
1034 
1035 		/* RB+ doesn't work with CMASK fast clear on Stoney. */
1036 		if (!radv_image_has_dcc(image) &&
1037 		    device->physical_device->rad_info.family == CHIP_STONEY)
1038 			return false;
1039 	} else {
1040 		if (!radv_image_has_htile(image))
1041 			return false;
1042 	}
1043 
1044 	/* Do not fast clears 3D images. */
1045 	if (image->type == VK_IMAGE_TYPE_3D)
1046 		return false;
1047 
1048 	return true;
1049 }
1050 
1051 /**
1052  * Determine if the given image view can be fast cleared.
1053  */
1054 static bool
radv_image_view_can_fast_clear(struct radv_device * device,const struct radv_image_view * iview)1055 radv_image_view_can_fast_clear(struct radv_device *device,
1056 			       const struct radv_image_view *iview)
1057 {
1058 	struct radv_image *image;
1059 
1060 	if (!iview)
1061 		return false;
1062 	image = iview->image;
1063 
1064 	/* Only fast clear if the image itself can be fast cleared. */
1065 	if (!radv_image_can_fast_clear(device, image))
1066 		return false;
1067 
1068 	/* Only fast clear if all layers are bound. */
1069 	if (iview->base_layer > 0 ||
1070 	    iview->layer_count != image->info.array_size)
1071 		return false;
1072 
1073 	/* Only fast clear if the view covers the whole image. */
1074 	if (!radv_image_extent_compare(image, &iview->extent))
1075 		return false;
1076 
1077 	return true;
1078 }
1079 
1080 static bool
radv_can_fast_clear_depth(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,VkImageLayout image_layout,bool in_render_loop,VkImageAspectFlags aspects,const VkClearRect * clear_rect,const VkClearDepthStencilValue clear_value,uint32_t view_mask)1081 radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer,
1082 			  const struct radv_image_view *iview,
1083 			  VkImageLayout image_layout,
1084 			  bool in_render_loop,
1085 			  VkImageAspectFlags aspects,
1086 			  const VkClearRect *clear_rect,
1087 			  const VkClearDepthStencilValue clear_value,
1088 			  uint32_t view_mask)
1089 {
1090 	if (!radv_image_view_can_fast_clear(cmd_buffer->device, iview))
1091 		return false;
1092 
1093 	if (!radv_layout_is_htile_compressed(cmd_buffer->device, iview->image, image_layout, in_render_loop,
1094 	                                     radv_image_queue_family_mask(iview->image,
1095 	                                                                  cmd_buffer->queue_family_index,
1096 	                                                                  cmd_buffer->queue_family_index)))
1097 		return false;
1098 
1099 	if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
1100 	    clear_rect->rect.extent.width != iview->image->info.width ||
1101 	    clear_rect->rect.extent.height != iview->image->info.height)
1102 		return false;
1103 
1104 	if (view_mask && (iview->image->info.array_size >= 32 ||
1105 	                 (1u << iview->image->info.array_size) - 1u != view_mask))
1106 		return false;
1107 	if (!view_mask && clear_rect->baseArrayLayer != 0)
1108 		return false;
1109 	if (!view_mask && clear_rect->layerCount != iview->image->info.array_size)
1110 		return false;
1111 
1112 	if (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
1113 	    !radv_is_fast_clear_depth_allowed(clear_value)) ||
1114 	    ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
1115 	     !radv_is_fast_clear_stencil_allowed(clear_value)))
1116 		return false;
1117 
1118 	return true;
1119 }
1120 
1121 static void
radv_fast_clear_depth(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,const VkClearAttachment * clear_att,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush)1122 radv_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer,
1123 		      const struct radv_image_view *iview,
1124 		      const VkClearAttachment *clear_att,
1125 		      enum radv_cmd_flush_bits *pre_flush,
1126 		      enum radv_cmd_flush_bits *post_flush)
1127 {
1128 	VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
1129 	VkImageAspectFlags aspects = clear_att->aspectMask;
1130 	uint32_t clear_word, flush_bits;
1131 
1132 	clear_word = radv_get_htile_fast_clear_value(iview->image, clear_value);
1133 
1134 	if (pre_flush) {
1135 		cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_DB |
1136 						 RADV_CMD_FLAG_FLUSH_AND_INV_DB_META) & ~ *pre_flush;
1137 		*pre_flush |= cmd_buffer->state.flush_bits;
1138 	}
1139 
1140 	struct VkImageSubresourceRange range = {
1141 		.aspectMask = aspects,
1142 		.baseMipLevel = 0,
1143 		.levelCount = VK_REMAINING_MIP_LEVELS,
1144 		.baseArrayLayer = 0,
1145 		.layerCount = VK_REMAINING_ARRAY_LAYERS,
1146 	};
1147 
1148 	flush_bits = radv_clear_htile(cmd_buffer, iview->image, &range, clear_word);
1149 
1150 	if (iview->image->planes[0].surface.has_stencil &&
1151 	    !(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
1152 		/* Synchronize after performing a depth-only or a stencil-only
1153 		 * fast clear because the driver uses an optimized path which
1154 		 * performs a read-modify-write operation, and the two separate
1155 		 * aspects might use the same HTILE memory.
1156 		 */
1157 		cmd_buffer->state.flush_bits |= flush_bits;
1158 	}
1159 
1160 	radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects);
1161 	if (post_flush) {
1162 		*post_flush |= flush_bits;
1163 	}
1164 }
1165 
1166 static nir_shader *
build_clear_htile_mask_shader()1167 build_clear_htile_mask_shader()
1168 {
1169 	nir_builder b;
1170 
1171 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
1172 	b.shader->info.name = ralloc_strdup(b.shader, "meta_clear_htile_mask");
1173 	b.shader->info.cs.local_size[0] = 64;
1174 	b.shader->info.cs.local_size[1] = 1;
1175 	b.shader->info.cs.local_size[2] = 1;
1176 
1177 	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
1178 	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
1179 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
1180 						b.shader->info.cs.local_size[0],
1181 						b.shader->info.cs.local_size[1],
1182 						b.shader->info.cs.local_size[2], 0);
1183 
1184 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
1185 
1186 	nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
1187 	offset = nir_channel(&b, offset, 0);
1188 
1189 	nir_ssa_def *buf = radv_meta_load_descriptor(&b, 0, 0);
1190 
1191 	nir_intrinsic_instr *constants =
1192 		nir_intrinsic_instr_create(b.shader,
1193 					   nir_intrinsic_load_push_constant);
1194 	nir_intrinsic_set_base(constants, 0);
1195 	nir_intrinsic_set_range(constants, 8);
1196 	constants->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
1197 	constants->num_components = 2;
1198 	nir_ssa_dest_init(&constants->instr, &constants->dest, 2, 32, "constants");
1199 	nir_builder_instr_insert(&b, &constants->instr);
1200 
1201 	nir_intrinsic_instr *load =
1202 		nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo);
1203 	load->src[0] = nir_src_for_ssa(buf);
1204 	load->src[1] = nir_src_for_ssa(offset);
1205 	nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
1206 	load->num_components = 4;
1207 	nir_intrinsic_set_align(load, 16, 0);
1208 	nir_builder_instr_insert(&b, &load->instr);
1209 
1210 	/* data = (data & ~htile_mask) | (htile_value & htile_mask) */
1211 	nir_ssa_def *data =
1212 		nir_iand(&b, &load->dest.ssa,
1213 			 nir_channel(&b, &constants->dest.ssa, 1));
1214 	data = nir_ior(&b, data, nir_channel(&b, &constants->dest.ssa, 0));
1215 
1216 	nir_intrinsic_instr *store =
1217 		nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
1218 	store->src[0] = nir_src_for_ssa(data);
1219 	store->src[1] = nir_src_for_ssa(buf);
1220 	store->src[2] = nir_src_for_ssa(offset);
1221 	nir_intrinsic_set_write_mask(store, 0xf);
1222 	nir_intrinsic_set_access(store, ACCESS_NON_READABLE);
1223 	nir_intrinsic_set_align(store, 16, 0);
1224 	store->num_components = 4;
1225 	nir_builder_instr_insert(&b, &store->instr);
1226 
1227 	return b.shader;
1228 }
1229 
1230 static VkResult
init_meta_clear_htile_mask_state(struct radv_device * device)1231 init_meta_clear_htile_mask_state(struct radv_device *device)
1232 {
1233 	struct radv_meta_state *state = &device->meta_state;
1234 	struct radv_shader_module cs = { .nir = NULL };
1235 	VkResult result;
1236 
1237 	cs.nir = build_clear_htile_mask_shader();
1238 
1239 	VkDescriptorSetLayoutCreateInfo ds_layout_info = {
1240 		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1241 		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1242 		.bindingCount = 1,
1243 		.pBindings = (VkDescriptorSetLayoutBinding[]) {
1244 			{
1245 				.binding = 0,
1246 				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1247 				.descriptorCount = 1,
1248 				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1249 				.pImmutableSamplers = NULL
1250 			},
1251 		}
1252 	};
1253 
1254 	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
1255 						&ds_layout_info, &state->alloc,
1256 						&state->clear_htile_mask_ds_layout);
1257 	if (result != VK_SUCCESS)
1258 		goto fail;
1259 
1260 	VkPipelineLayoutCreateInfo p_layout_info = {
1261 		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1262 		.setLayoutCount = 1,
1263 		.pSetLayouts = &state->clear_htile_mask_ds_layout,
1264 		.pushConstantRangeCount = 1,
1265 		.pPushConstantRanges = &(VkPushConstantRange){
1266 			VK_SHADER_STAGE_COMPUTE_BIT, 0, 8,
1267 		},
1268 	};
1269 
1270 	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
1271 					  &p_layout_info, &state->alloc,
1272 					  &state->clear_htile_mask_p_layout);
1273 	if (result != VK_SUCCESS)
1274 		goto fail;
1275 
1276 	VkPipelineShaderStageCreateInfo shader_stage = {
1277 		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1278 		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
1279 		.module = radv_shader_module_to_handle(&cs),
1280 		.pName = "main",
1281 		.pSpecializationInfo = NULL,
1282 	};
1283 
1284 	VkComputePipelineCreateInfo pipeline_info = {
1285 		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1286 		.stage = shader_stage,
1287 		.flags = 0,
1288 		.layout = state->clear_htile_mask_p_layout,
1289 	};
1290 
1291 	result = radv_CreateComputePipelines(radv_device_to_handle(device),
1292 					     radv_pipeline_cache_to_handle(&state->cache),
1293 					     1, &pipeline_info, NULL,
1294 					     &state->clear_htile_mask_pipeline);
1295 
1296 	ralloc_free(cs.nir);
1297 	return result;
1298 fail:
1299 	ralloc_free(cs.nir);
1300 	return result;
1301 }
1302 
1303 VkResult
radv_device_init_meta_clear_state(struct radv_device * device,bool on_demand)1304 radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand)
1305 {
1306 	VkResult res;
1307 	struct radv_meta_state *state = &device->meta_state;
1308 
1309 	VkPipelineLayoutCreateInfo pl_color_create_info = {
1310 		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1311 		.setLayoutCount = 0,
1312 		.pushConstantRangeCount = 1,
1313 		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16},
1314 	};
1315 
1316 	res = radv_CreatePipelineLayout(radv_device_to_handle(device),
1317 					&pl_color_create_info,
1318 					&device->meta_state.alloc,
1319 					&device->meta_state.clear_color_p_layout);
1320 	if (res != VK_SUCCESS)
1321 		goto fail;
1322 
1323 	VkPipelineLayoutCreateInfo pl_depth_create_info = {
1324 		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1325 		.setLayoutCount = 0,
1326 		.pushConstantRangeCount = 1,
1327 		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_VERTEX_BIT, 0, 4},
1328 	};
1329 
1330 	res = radv_CreatePipelineLayout(radv_device_to_handle(device),
1331 					&pl_depth_create_info,
1332 					&device->meta_state.alloc,
1333 					&device->meta_state.clear_depth_p_layout);
1334 	if (res != VK_SUCCESS)
1335 		goto fail;
1336 
1337 	VkPipelineLayoutCreateInfo pl_depth_unrestricted_create_info = {
1338 		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1339 		.setLayoutCount = 0,
1340 		.pushConstantRangeCount = 1,
1341 		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4},
1342 	};
1343 
1344 	res = radv_CreatePipelineLayout(radv_device_to_handle(device),
1345 					&pl_depth_unrestricted_create_info,
1346 					&device->meta_state.alloc,
1347 					&device->meta_state.clear_depth_unrestricted_p_layout);
1348 	if (res != VK_SUCCESS)
1349 		goto fail;
1350 
1351 	res = init_meta_clear_htile_mask_state(device);
1352 	if (res != VK_SUCCESS)
1353 		goto fail;
1354 
1355 	if (on_demand)
1356 		return VK_SUCCESS;
1357 
1358 	for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) {
1359 		uint32_t samples = 1 << i;
1360 		for (uint32_t j = 0; j < NUM_META_FS_KEYS; ++j) {
1361 			VkFormat format = radv_fs_key_format_exemplars[j];
1362 			unsigned fs_key = radv_format_meta_fs_key(format);
1363 			assert(!state->clear[i].color_pipelines[fs_key]);
1364 
1365 			res = create_color_renderpass(device, format, samples,
1366 						      &state->clear[i].render_pass[fs_key]);
1367 			if (res != VK_SUCCESS)
1368 				goto fail;
1369 
1370 			res = create_color_pipeline(device, samples, 0, &state->clear[i].color_pipelines[fs_key],
1371 						    state->clear[i].render_pass[fs_key]);
1372 			if (res != VK_SUCCESS)
1373 				goto fail;
1374 
1375 		}
1376 
1377 		res = create_depthstencil_renderpass(device,
1378 						     samples,
1379 						     &state->clear[i].depthstencil_rp);
1380 		if (res != VK_SUCCESS)
1381 			goto fail;
1382 
1383 		for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
1384 			res = create_depthstencil_pipeline(device,
1385 							   VK_IMAGE_ASPECT_DEPTH_BIT,
1386 							   samples,
1387 							   j,
1388 							   false,
1389 							   &state->clear[i].depth_only_pipeline[j],
1390 							   state->clear[i].depthstencil_rp);
1391 			if (res != VK_SUCCESS)
1392 				goto fail;
1393 
1394 			res = create_depthstencil_pipeline(device,
1395 							   VK_IMAGE_ASPECT_STENCIL_BIT,
1396 							   samples,
1397 							   j,
1398 							   false,
1399 							   &state->clear[i].stencil_only_pipeline[j],
1400 							   state->clear[i].depthstencil_rp);
1401 			if (res != VK_SUCCESS)
1402 				goto fail;
1403 
1404 			res = create_depthstencil_pipeline(device,
1405 							   VK_IMAGE_ASPECT_DEPTH_BIT |
1406 							   VK_IMAGE_ASPECT_STENCIL_BIT,
1407 							   samples,
1408 							   j,
1409 							   false,
1410 							   &state->clear[i].depthstencil_pipeline[j],
1411 							   state->clear[i].depthstencil_rp);
1412 			if (res != VK_SUCCESS)
1413 				goto fail;
1414 
1415 			res = create_depthstencil_pipeline(device,
1416 							   VK_IMAGE_ASPECT_DEPTH_BIT,
1417 							   samples,
1418 							   j,
1419 							   true,
1420 							   &state->clear[i].depth_only_unrestricted_pipeline[j],
1421 							   state->clear[i].depthstencil_rp);
1422 			if (res != VK_SUCCESS)
1423 				goto fail;
1424 
1425 			res = create_depthstencil_pipeline(device,
1426 							   VK_IMAGE_ASPECT_STENCIL_BIT,
1427 							   samples,
1428 							   j,
1429 							   true,
1430 							   &state->clear[i].stencil_only_unrestricted_pipeline[j],
1431 							   state->clear[i].depthstencil_rp);
1432 			if (res != VK_SUCCESS)
1433 				goto fail;
1434 
1435 			res = create_depthstencil_pipeline(device,
1436 							   VK_IMAGE_ASPECT_DEPTH_BIT |
1437 							   VK_IMAGE_ASPECT_STENCIL_BIT,
1438 							   samples,
1439 							   j,
1440 							   true,
1441 							   &state->clear[i].depthstencil_unrestricted_pipeline[j],
1442 							   state->clear[i].depthstencil_rp);
1443 			if (res != VK_SUCCESS)
1444 				goto fail;
1445 		}
1446 	}
1447 	return VK_SUCCESS;
1448 
1449 fail:
1450 	radv_device_finish_meta_clear_state(device);
1451 	return res;
1452 }
1453 
1454 static uint32_t
radv_get_cmask_fast_clear_value(const struct radv_image * image)1455 radv_get_cmask_fast_clear_value(const struct radv_image *image)
1456 {
1457 	uint32_t value = 0; /* Default value when no DCC. */
1458 
1459 	/* The fast-clear value is different for images that have both DCC and
1460 	 * CMASK metadata.
1461 	 */
1462 	if (radv_image_has_dcc(image)) {
1463 		/* DCC fast clear with MSAA should clear CMASK to 0xC. */
1464 		return image->info.samples > 1 ? 0xcccccccc : 0xffffffff;
1465 	}
1466 
1467 	return value;
1468 }
1469 
1470 uint32_t
radv_clear_cmask(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)1471 radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer,
1472 		 struct radv_image *image,
1473 		 const VkImageSubresourceRange *range, uint32_t value)
1474 {
1475 	uint64_t offset = image->offset + image->planes[0].surface.cmask_offset;
1476 	uint64_t size;
1477 
1478 	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
1479 		/* TODO: clear layers. */
1480 		size = image->planes[0].surface.cmask_size;
1481 	} else {
1482 		unsigned cmask_slice_size =
1483 			image->planes[0].surface.cmask_slice_size;
1484 
1485 		offset += cmask_slice_size * range->baseArrayLayer;
1486 		size = cmask_slice_size * radv_get_layerCount(image, range);
1487 	}
1488 
1489 	return radv_fill_buffer(cmd_buffer, image->bo, offset, size, value);
1490 }
1491 
1492 
1493 uint32_t
radv_clear_fmask(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)1494 radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer,
1495 		 struct radv_image *image,
1496 		 const VkImageSubresourceRange *range, uint32_t value)
1497 {
1498 	uint64_t offset = image->offset + image->planes[0].surface.fmask_offset;
1499 	uint64_t size;
1500 
1501 	/* MSAA images do not support mipmap levels. */
1502 	assert(range->baseMipLevel == 0 &&
1503 	       radv_get_levelCount(image, range) == 1);
1504 
1505 	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
1506 		/* TODO: clear layers. */
1507 		size = image->planes[0].surface.fmask_size;
1508 	} else {
1509 		unsigned fmask_slice_size =
1510 			image->planes[0].surface.u.legacy.fmask.slice_size;
1511 
1512 
1513 		offset += fmask_slice_size * range->baseArrayLayer;
1514 		size = fmask_slice_size * radv_get_layerCount(image, range);
1515 	}
1516 
1517 	return radv_fill_buffer(cmd_buffer, image->bo, offset, size, value);
1518 }
1519 
1520 uint32_t
radv_clear_dcc(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)1521 radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer,
1522 	       struct radv_image *image,
1523 	       const VkImageSubresourceRange *range, uint32_t value)
1524 {
1525 	uint32_t level_count = radv_get_levelCount(image, range);
1526 	uint32_t flush_bits = 0;
1527 
1528 	/* Mark the image as being compressed. */
1529 	radv_update_dcc_metadata(cmd_buffer, image, range, true);
1530 
1531 	for (uint32_t l = 0; l < level_count; l++) {
1532 		uint64_t offset = image->offset + image->planes[0].surface.dcc_offset;
1533 		uint32_t level = range->baseMipLevel + l;
1534 		uint64_t size;
1535 
1536 		if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
1537 			/* Mipmap levels aren't implemented. */
1538 			assert(level == 0);
1539 			size = image->planes[0].surface.dcc_size;
1540 		} else {
1541 			const struct legacy_surf_level *surf_level =
1542 				&image->planes[0].surface.u.legacy.level[level];
1543 
1544 			/* If dcc_fast_clear_size is 0 (which might happens for
1545 			 * mipmaps) the fill buffer operation below is a no-op.
1546 			 * This can only happen during initialization as the
1547 			 * fast clear path fallbacks to slow clears if one
1548 			 * level can't be fast cleared.
1549 			 */
1550 			offset += surf_level->dcc_offset +
1551 				  surf_level->dcc_slice_fast_clear_size * range->baseArrayLayer;
1552 			size = surf_level->dcc_slice_fast_clear_size * radv_get_layerCount(image, range);
1553 		}
1554 
1555 		flush_bits |= radv_fill_buffer(cmd_buffer, image->bo, offset,
1556 					       size, value);
1557 	}
1558 
1559 	return flush_bits;
1560 }
1561 
1562 uint32_t
radv_clear_htile(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)1563 radv_clear_htile(struct radv_cmd_buffer *cmd_buffer,
1564 		 const struct radv_image *image,
1565 		 const VkImageSubresourceRange *range,
1566 		 uint32_t value)
1567 {
1568 	unsigned layer_count = radv_get_layerCount(image, range);
1569 	uint64_t size = image->planes[0].surface.htile_slice_size * layer_count;
1570 	uint64_t offset = image->offset + image->planes[0].surface.htile_offset +
1571 	                  image->planes[0].surface.htile_slice_size * range->baseArrayLayer;
1572 	uint32_t htile_mask, flush_bits;
1573 
1574 	htile_mask = radv_get_htile_mask(image, range->aspectMask);
1575 
1576 	if (htile_mask == UINT_MAX) {
1577 		/* Clear the whole HTILE buffer. */
1578 		flush_bits = radv_fill_buffer(cmd_buffer, image->bo, offset,
1579 					      size, value);
1580 	} else {
1581 		/* Only clear depth or stencil bytes in the HTILE buffer. */
1582 		flush_bits = clear_htile_mask(cmd_buffer, image->bo, offset,
1583 					      size, value, htile_mask);
1584 	}
1585 
1586 	return flush_bits;
1587 }
1588 
1589 enum {
1590 	RADV_DCC_CLEAR_REG = 0x20202020U,
1591 	RADV_DCC_CLEAR_MAIN_1 = 0x80808080U,
1592 	RADV_DCC_CLEAR_SECONDARY_1 = 0x40404040U
1593 };
1594 
vi_get_fast_clear_parameters(struct radv_device * device,VkFormat image_format,VkFormat view_format,const VkClearColorValue * clear_value,uint32_t * reset_value,bool * can_avoid_fast_clear_elim)1595 static void vi_get_fast_clear_parameters(struct radv_device *device,
1596 					 VkFormat image_format,
1597 					 VkFormat view_format,
1598 					 const VkClearColorValue *clear_value,
1599 					 uint32_t* reset_value,
1600 					 bool *can_avoid_fast_clear_elim)
1601 {
1602 	bool values[4] = {0};
1603 	int extra_channel;
1604 	bool main_value = false;
1605 	bool extra_value = false;
1606 	bool has_color = false;
1607 	bool has_alpha = false;
1608 	int i;
1609 	*can_avoid_fast_clear_elim = false;
1610 
1611 	*reset_value = RADV_DCC_CLEAR_REG;
1612 
1613 	const struct vk_format_description *desc = vk_format_description(view_format);
1614 	if (view_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 ||
1615 	    view_format == VK_FORMAT_R5G6B5_UNORM_PACK16 ||
1616 	    view_format == VK_FORMAT_B5G6R5_UNORM_PACK16)
1617 		extra_channel = -1;
1618 	else if (desc->layout == VK_FORMAT_LAYOUT_PLAIN) {
1619 		if (vi_alpha_is_on_msb(device, view_format))
1620 			extra_channel = desc->nr_channels - 1;
1621 		else
1622 			extra_channel = 0;
1623 	} else
1624 		return;
1625 
1626 	for (i = 0; i < 4; i++) {
1627 		int index = desc->swizzle[i] - VK_SWIZZLE_X;
1628 		if (desc->swizzle[i] < VK_SWIZZLE_X ||
1629 		    desc->swizzle[i] > VK_SWIZZLE_W)
1630 			continue;
1631 
1632 		if (desc->channel[i].pure_integer &&
1633 		    desc->channel[i].type == VK_FORMAT_TYPE_SIGNED) {
1634 			/* Use the maximum value for clamping the clear color. */
1635 			int max = u_bit_consecutive(0, desc->channel[i].size - 1);
1636 
1637 			values[i] = clear_value->int32[i] != 0;
1638 			if (clear_value->int32[i] != 0 && MIN2(clear_value->int32[i], max) != max)
1639 				return;
1640 		} else if (desc->channel[i].pure_integer &&
1641 			   desc->channel[i].type == VK_FORMAT_TYPE_UNSIGNED) {
1642 			/* Use the maximum value for clamping the clear color. */
1643 			unsigned max = u_bit_consecutive(0, desc->channel[i].size);
1644 
1645 			values[i] = clear_value->uint32[i] != 0U;
1646 			if (clear_value->uint32[i] != 0U && MIN2(clear_value->uint32[i], max) != max)
1647 				return;
1648 		} else {
1649 			values[i] = clear_value->float32[i] != 0.0F;
1650 			if (clear_value->float32[i] != 0.0F && clear_value->float32[i] != 1.0F)
1651 				return;
1652 		}
1653 
1654 		if (index == extra_channel) {
1655 			extra_value = values[i];
1656 			has_alpha = true;
1657 		} else {
1658 			main_value = values[i];
1659 			has_color = true;
1660 		}
1661 	}
1662 
1663 	/* If alpha isn't present, make it the same as color, and vice versa. */
1664 	if (!has_alpha)
1665 		extra_value = main_value;
1666 	else if (!has_color)
1667 		main_value = extra_value;
1668 
1669 	for (int i = 0; i < 4; ++i)
1670 		if (values[i] != main_value &&
1671 		    desc->swizzle[i] - VK_SWIZZLE_X != extra_channel &&
1672 		    desc->swizzle[i] >= VK_SWIZZLE_X &&
1673 		    desc->swizzle[i] <= VK_SWIZZLE_W)
1674 			return;
1675 
1676 	*can_avoid_fast_clear_elim = true;
1677 	*reset_value = 0;
1678 	if (main_value)
1679 		*reset_value |= RADV_DCC_CLEAR_MAIN_1;
1680 
1681 	if (extra_value)
1682 		*reset_value |= RADV_DCC_CLEAR_SECONDARY_1;
1683 	return;
1684 }
1685 
1686 static bool
radv_can_fast_clear_color(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,VkImageLayout image_layout,bool in_render_loop,const VkClearRect * clear_rect,VkClearColorValue clear_value,uint32_t view_mask)1687 radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer,
1688 			  const struct radv_image_view *iview,
1689 			  VkImageLayout image_layout,
1690 			  bool in_render_loop,
1691 			  const VkClearRect *clear_rect,
1692 			  VkClearColorValue clear_value,
1693 			  uint32_t view_mask)
1694 {
1695 	uint32_t clear_color[2];
1696 
1697 	if (!radv_image_view_can_fast_clear(cmd_buffer->device, iview))
1698 		return false;
1699 
1700 	if (!radv_layout_can_fast_clear(iview->image, image_layout, in_render_loop,
1701 	                                radv_image_queue_family_mask(iview->image,
1702 	                                                             cmd_buffer->queue_family_index,
1703 	                                                             cmd_buffer->queue_family_index)))
1704 		return false;
1705 
1706 	if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
1707 	    clear_rect->rect.extent.width != iview->image->info.width ||
1708 	    clear_rect->rect.extent.height != iview->image->info.height)
1709 		return false;
1710 
1711 	if (view_mask && (iview->image->info.array_size >= 32 ||
1712 	                 (1u << iview->image->info.array_size) - 1u != view_mask))
1713 		return false;
1714 	if (!view_mask && clear_rect->baseArrayLayer != 0)
1715 		return false;
1716 	if (!view_mask && clear_rect->layerCount != iview->image->info.array_size)
1717 		return false;
1718 
1719 	/* DCC */
1720 	if (!radv_format_pack_clear_color(iview->vk_format,
1721 					  clear_color, &clear_value))
1722 		return false;
1723 
1724 	if (radv_dcc_enabled(iview->image, iview->base_mip)) {
1725 		bool can_avoid_fast_clear_elim;
1726 		uint32_t reset_value;
1727 
1728 		vi_get_fast_clear_parameters(cmd_buffer->device,
1729 					     iview->image->vk_format,
1730 					     iview->vk_format,
1731 					     &clear_value, &reset_value,
1732 					     &can_avoid_fast_clear_elim);
1733 
1734 		if (iview->image->info.samples > 1) {
1735 			/* DCC fast clear with MSAA should clear CMASK. */
1736 			/* FIXME: This doesn't work for now. There is a
1737 			 * hardware bug with fast clears and DCC for MSAA
1738 			 * textures. AMDVLK has a workaround but it doesn't
1739 			 * seem to work here. Note that we might emit useless
1740 			 * CB flushes but that shouldn't matter.
1741 			 */
1742 			if (!can_avoid_fast_clear_elim)
1743 				return false;
1744 		}
1745 
1746 		if (iview->image->info.levels > 1 &&
1747 		    cmd_buffer->device->physical_device->rad_info.chip_class == GFX8) {
1748 			for (uint32_t l = 0; l < iview->level_count; l++) {
1749 				uint32_t level = iview->base_mip + l;
1750 				struct legacy_surf_level *surf_level =
1751 					&iview->image->planes[0].surface.u.legacy.level[level];
1752 
1753 				/* Do not fast clears if one level can't be
1754 				 * fast cleared.
1755 				 */
1756 				if (!surf_level->dcc_fast_clear_size)
1757 					return false;
1758 			}
1759 		}
1760 	}
1761 
1762 	return true;
1763 }
1764 
1765 
1766 static void
radv_fast_clear_color(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,const VkClearAttachment * clear_att,uint32_t subpass_att,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush)1767 radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer,
1768 		      const struct radv_image_view *iview,
1769 		      const VkClearAttachment *clear_att,
1770 		      uint32_t subpass_att,
1771 		      enum radv_cmd_flush_bits *pre_flush,
1772 		      enum radv_cmd_flush_bits *post_flush)
1773 {
1774 	VkClearColorValue clear_value = clear_att->clearValue.color;
1775 	uint32_t clear_color[2], flush_bits = 0;
1776 	uint32_t cmask_clear_value;
1777 	VkImageSubresourceRange range = {
1778 		.aspectMask = iview->aspect_mask,
1779 		.baseMipLevel = iview->base_mip,
1780 		.levelCount = iview->level_count,
1781 		.baseArrayLayer = iview->base_layer,
1782 		.layerCount = iview->layer_count,
1783 	};
1784 
1785 	if (pre_flush) {
1786 		cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
1787 						 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META) & ~ *pre_flush;
1788 		*pre_flush |= cmd_buffer->state.flush_bits;
1789 	}
1790 
1791 	/* DCC */
1792 	radv_format_pack_clear_color(iview->vk_format, clear_color, &clear_value);
1793 
1794 	cmask_clear_value = radv_get_cmask_fast_clear_value(iview->image);
1795 
1796 	/* clear cmask buffer */
1797 	if (radv_dcc_enabled(iview->image, iview->base_mip)) {
1798 		uint32_t reset_value;
1799 		bool can_avoid_fast_clear_elim;
1800 		bool need_decompress_pass = false;
1801 
1802 		vi_get_fast_clear_parameters(cmd_buffer->device,
1803 					     iview->image->vk_format,
1804 					     iview->vk_format,
1805 					     &clear_value, &reset_value,
1806 					     &can_avoid_fast_clear_elim);
1807 
1808 		if (radv_image_has_cmask(iview->image)) {
1809 			flush_bits = radv_clear_cmask(cmd_buffer, iview->image,
1810 						      &range, cmask_clear_value);
1811 
1812 			need_decompress_pass = true;
1813 		}
1814 
1815 		if (!can_avoid_fast_clear_elim)
1816 			need_decompress_pass = true;
1817 
1818 		flush_bits |= radv_clear_dcc(cmd_buffer, iview->image, &range,
1819 					     reset_value);
1820 
1821 		radv_update_fce_metadata(cmd_buffer, iview->image, &range,
1822 					 need_decompress_pass);
1823 	} else {
1824 		flush_bits = radv_clear_cmask(cmd_buffer, iview->image,
1825 					      &range, cmask_clear_value);
1826 	}
1827 
1828 	if (post_flush) {
1829 		*post_flush |= flush_bits;
1830 	}
1831 
1832 	radv_update_color_clear_metadata(cmd_buffer, iview, subpass_att,
1833 					 clear_color);
1834 }
1835 
1836 /**
1837  * The parameters mean that same as those in vkCmdClearAttachments.
1838  */
1839 static void
emit_clear(struct radv_cmd_buffer * cmd_buffer,const VkClearAttachment * clear_att,const VkClearRect * clear_rect,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush,uint32_t view_mask,bool ds_resolve_clear)1840 emit_clear(struct radv_cmd_buffer *cmd_buffer,
1841            const VkClearAttachment *clear_att,
1842            const VkClearRect *clear_rect,
1843            enum radv_cmd_flush_bits *pre_flush,
1844            enum radv_cmd_flush_bits *post_flush,
1845            uint32_t view_mask,
1846 	   bool ds_resolve_clear)
1847 {
1848 	const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
1849 	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
1850 	VkImageAspectFlags aspects = clear_att->aspectMask;
1851 
1852 	if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
1853 		const uint32_t subpass_att = clear_att->colorAttachment;
1854 		assert(subpass_att < subpass->color_count);
1855 		const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
1856 		if (pass_att == VK_ATTACHMENT_UNUSED)
1857 			return;
1858 
1859 		VkImageLayout image_layout = subpass->color_attachments[subpass_att].layout;
1860 		bool in_render_loop = subpass->color_attachments[subpass_att].in_render_loop;
1861 		const struct radv_image_view *iview = fb ? cmd_buffer->state.attachments[pass_att].iview : NULL;
1862 		VkClearColorValue clear_value = clear_att->clearValue.color;
1863 
1864 		if (radv_can_fast_clear_color(cmd_buffer, iview, image_layout, in_render_loop,
1865 					      clear_rect, clear_value, view_mask)) {
1866 			radv_fast_clear_color(cmd_buffer, iview, clear_att,
1867 					      subpass_att, pre_flush,
1868 					      post_flush);
1869 		} else {
1870 			emit_color_clear(cmd_buffer, clear_att, clear_rect, view_mask);
1871 		}
1872 	} else {
1873 		struct radv_subpass_attachment *ds_att = subpass->depth_stencil_attachment;
1874 
1875 		if (ds_resolve_clear)
1876 			ds_att = subpass->ds_resolve_attachment;
1877 
1878 		if (!ds_att || ds_att->attachment == VK_ATTACHMENT_UNUSED)
1879 			return;
1880 
1881 		VkImageLayout image_layout = ds_att->layout;
1882 		bool in_render_loop = ds_att->in_render_loop;
1883 		const struct radv_image_view *iview = fb ? cmd_buffer->state.attachments[ds_att->attachment].iview : NULL;
1884 		VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
1885 
1886 		assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
1887 				  VK_IMAGE_ASPECT_STENCIL_BIT));
1888 
1889 		if (radv_can_fast_clear_depth(cmd_buffer, iview, image_layout,
1890 		                              in_render_loop, aspects, clear_rect,
1891 		                              clear_value, view_mask)) {
1892 			radv_fast_clear_depth(cmd_buffer, iview, clear_att,
1893 			                      pre_flush, post_flush);
1894 		} else {
1895 			emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect,
1896 						ds_att, view_mask);
1897 		}
1898 	}
1899 }
1900 
1901 static inline bool
radv_attachment_needs_clear(struct radv_cmd_state * cmd_state,uint32_t a)1902 radv_attachment_needs_clear(struct radv_cmd_state *cmd_state, uint32_t a)
1903 {
1904 	uint32_t view_mask = cmd_state->subpass->view_mask;
1905 	return (a != VK_ATTACHMENT_UNUSED &&
1906 		cmd_state->attachments[a].pending_clear_aspects &&
1907 		(!view_mask || (view_mask & ~cmd_state->attachments[a].cleared_views)));
1908 }
1909 
1910 static bool
radv_subpass_needs_clear(struct radv_cmd_buffer * cmd_buffer)1911 radv_subpass_needs_clear(struct radv_cmd_buffer *cmd_buffer)
1912 {
1913 	struct radv_cmd_state *cmd_state = &cmd_buffer->state;
1914 	uint32_t a;
1915 
1916 	if (!cmd_state->subpass)
1917 		return false;
1918 
1919 	for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
1920 		a = cmd_state->subpass->color_attachments[i].attachment;
1921 		if (radv_attachment_needs_clear(cmd_state, a))
1922 			return true;
1923 	}
1924 
1925 	if (cmd_state->subpass->depth_stencil_attachment) {
1926 		a = cmd_state->subpass->depth_stencil_attachment->attachment;
1927 		if (radv_attachment_needs_clear(cmd_state, a))
1928 			return true;
1929 	}
1930 
1931 	if (!cmd_state->subpass->ds_resolve_attachment)
1932 		return false;
1933 
1934 	a = cmd_state->subpass->ds_resolve_attachment->attachment;
1935 	return radv_attachment_needs_clear(cmd_state, a);
1936 }
1937 
1938 static void
radv_subpass_clear_attachment(struct radv_cmd_buffer * cmd_buffer,struct radv_attachment_state * attachment,const VkClearAttachment * clear_att,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush,bool ds_resolve_clear)1939 radv_subpass_clear_attachment(struct radv_cmd_buffer *cmd_buffer,
1940 			      struct radv_attachment_state *attachment,
1941 			      const VkClearAttachment *clear_att,
1942 			      enum radv_cmd_flush_bits *pre_flush,
1943 			      enum radv_cmd_flush_bits *post_flush,
1944 			      bool ds_resolve_clear)
1945 {
1946 	struct radv_cmd_state *cmd_state = &cmd_buffer->state;
1947 	uint32_t view_mask = cmd_state->subpass->view_mask;
1948 
1949 	VkClearRect clear_rect = {
1950 		.rect = cmd_state->render_area,
1951 		.baseArrayLayer = 0,
1952 		.layerCount = cmd_state->framebuffer->layers,
1953 	};
1954 
1955 	radv_describe_begin_render_pass_clear(cmd_buffer, clear_att->aspectMask);
1956 
1957 	emit_clear(cmd_buffer, clear_att, &clear_rect, pre_flush, post_flush,
1958 		   view_mask & ~attachment->cleared_views, ds_resolve_clear);
1959 	if (view_mask)
1960 		attachment->cleared_views |= view_mask;
1961 	else
1962 		attachment->pending_clear_aspects = 0;
1963 
1964 	radv_describe_end_render_pass_clear(cmd_buffer);
1965 }
1966 
1967 /**
1968  * Emit any pending attachment clears for the current subpass.
1969  *
1970  * @see radv_attachment_state::pending_clear_aspects
1971  */
1972 void
radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer * cmd_buffer)1973 radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer)
1974 {
1975 	struct radv_cmd_state *cmd_state = &cmd_buffer->state;
1976 	struct radv_meta_saved_state saved_state;
1977 	enum radv_cmd_flush_bits pre_flush = 0;
1978 	enum radv_cmd_flush_bits post_flush = 0;
1979 
1980 	if (!radv_subpass_needs_clear(cmd_buffer))
1981 		return;
1982 
1983 	radv_meta_save(&saved_state, cmd_buffer,
1984 		       RADV_META_SAVE_GRAPHICS_PIPELINE |
1985 		       RADV_META_SAVE_CONSTANTS);
1986 
1987 	for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
1988 		uint32_t a = cmd_state->subpass->color_attachments[i].attachment;
1989 
1990 		if (!radv_attachment_needs_clear(cmd_state, a))
1991 			continue;
1992 
1993 		assert(cmd_state->attachments[a].pending_clear_aspects ==
1994 		       VK_IMAGE_ASPECT_COLOR_BIT);
1995 
1996 		VkClearAttachment clear_att = {
1997 			.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1998 			.colorAttachment = i, /* Use attachment index relative to subpass */
1999 			.clearValue = cmd_state->attachments[a].clear_value,
2000 		};
2001 
2002 		radv_subpass_clear_attachment(cmd_buffer,
2003 					      &cmd_state->attachments[a],
2004 					      &clear_att, &pre_flush,
2005 					      &post_flush, false);
2006 	}
2007 
2008 	if (cmd_state->subpass->depth_stencil_attachment) {
2009 		uint32_t ds = cmd_state->subpass->depth_stencil_attachment->attachment;
2010 		if (radv_attachment_needs_clear(cmd_state, ds)) {
2011 			VkClearAttachment clear_att = {
2012 				.aspectMask = cmd_state->attachments[ds].pending_clear_aspects,
2013 				.clearValue = cmd_state->attachments[ds].clear_value,
2014 			};
2015 
2016 			radv_subpass_clear_attachment(cmd_buffer,
2017 						      &cmd_state->attachments[ds],
2018 						      &clear_att, &pre_flush,
2019 						      &post_flush, false);
2020 		}
2021 	}
2022 
2023 	if (cmd_state->subpass->ds_resolve_attachment) {
2024 		uint32_t ds_resolve = cmd_state->subpass->ds_resolve_attachment->attachment;
2025 		if (radv_attachment_needs_clear(cmd_state, ds_resolve)) {
2026 			VkClearAttachment clear_att = {
2027 				.aspectMask = cmd_state->attachments[ds_resolve].pending_clear_aspects,
2028 				.clearValue = cmd_state->attachments[ds_resolve].clear_value,
2029 			};
2030 
2031 			radv_subpass_clear_attachment(cmd_buffer,
2032 						      &cmd_state->attachments[ds_resolve],
2033 						      &clear_att, &pre_flush,
2034 						      &post_flush, true);
2035 		}
2036 	}
2037 
2038 	radv_meta_restore(&saved_state, cmd_buffer);
2039 	cmd_buffer->state.flush_bits |= post_flush;
2040 }
2041 
2042 static void
radv_clear_image_layer(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,VkImageLayout image_layout,const VkImageSubresourceRange * range,VkFormat format,int level,int layer,const VkClearValue * clear_val)2043 radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
2044 		       struct radv_image *image,
2045 		       VkImageLayout image_layout,
2046 		       const VkImageSubresourceRange *range,
2047 		       VkFormat format, int level, int layer,
2048 		       const VkClearValue *clear_val)
2049 {
2050 	VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
2051 	struct radv_image_view iview;
2052 	uint32_t width = radv_minify(image->info.width, range->baseMipLevel + level);
2053 	uint32_t height = radv_minify(image->info.height, range->baseMipLevel + level);
2054 
2055 	radv_image_view_init(&iview, cmd_buffer->device,
2056 			     &(VkImageViewCreateInfo) {
2057 				     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
2058 					     .image = radv_image_to_handle(image),
2059 					     .viewType = radv_meta_get_view_type(image),
2060 					     .format = format,
2061 					     .subresourceRange = {
2062 					     .aspectMask = range->aspectMask,
2063 					     .baseMipLevel = range->baseMipLevel + level,
2064 					     .levelCount = 1,
2065 					     .baseArrayLayer = range->baseArrayLayer + layer,
2066 					     .layerCount = 1
2067 				     },
2068 			     }, NULL);
2069 
2070 	VkFramebuffer fb;
2071 	radv_CreateFramebuffer(device_h,
2072 			       &(VkFramebufferCreateInfo) {
2073 				       .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
2074 					       .attachmentCount = 1,
2075 					       .pAttachments = (VkImageView[]) {
2076 					       radv_image_view_to_handle(&iview),
2077 				       },
2078 					       .width = width,
2079 					       .height = height,
2080 					       .layers = 1
2081 			       },
2082 			       &cmd_buffer->pool->alloc,
2083 			       &fb);
2084 
2085 	VkAttachmentDescription att_desc = {
2086 		.format = iview.vk_format,
2087 		.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
2088 		.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
2089 		.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
2090 		.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
2091 		.initialLayout = image_layout,
2092 		.finalLayout = image_layout,
2093 	};
2094 
2095 	VkSubpassDescription subpass_desc = {
2096 		.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
2097 		.inputAttachmentCount = 0,
2098 		.colorAttachmentCount = 0,
2099 		.pColorAttachments = NULL,
2100 		.pResolveAttachments = NULL,
2101 		.pDepthStencilAttachment = NULL,
2102 		.preserveAttachmentCount = 0,
2103 		.pPreserveAttachments = NULL,
2104 	};
2105 
2106 	const VkAttachmentReference att_ref = {
2107 		.attachment = 0,
2108 		.layout = image_layout,
2109 	};
2110 
2111 	if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
2112 		subpass_desc.colorAttachmentCount = 1;
2113 		subpass_desc.pColorAttachments = &att_ref;
2114 	} else {
2115 		subpass_desc.pDepthStencilAttachment = &att_ref;
2116 	}
2117 
2118 	VkRenderPass pass;
2119 	radv_CreateRenderPass(device_h,
2120 			      &(VkRenderPassCreateInfo) {
2121 				      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
2122 					      .attachmentCount = 1,
2123 					      .pAttachments = &att_desc,
2124 					      .subpassCount = 1,
2125 					      .pSubpasses = &subpass_desc,
2126 					      .dependencyCount = 2,
2127 					      .pDependencies = (VkSubpassDependency[]) {
2128 							{
2129 								.srcSubpass = VK_SUBPASS_EXTERNAL,
2130 								.dstSubpass = 0,
2131 								.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
2132 								.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
2133 								.srcAccessMask = 0,
2134 								.dstAccessMask = 0,
2135 								.dependencyFlags = 0
2136 							},
2137 							{
2138 								.srcSubpass = 0,
2139 								.dstSubpass = VK_SUBPASS_EXTERNAL,
2140 								.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
2141 								.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
2142 								.srcAccessMask = 0,
2143 								.dstAccessMask = 0,
2144 								.dependencyFlags = 0
2145 							}
2146 						}
2147 					},
2148 			      &cmd_buffer->pool->alloc,
2149 			      &pass);
2150 
2151 	radv_cmd_buffer_begin_render_pass(cmd_buffer,
2152 					  &(VkRenderPassBeginInfo) {
2153 						.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
2154 						.renderArea = {
2155 						.offset = { 0, 0, },
2156 						.extent = {
2157 							.width = width,
2158 							.height = height,
2159 							},
2160 						},
2161 						.renderPass = pass,
2162 						.framebuffer = fb,
2163 						.clearValueCount = 0,
2164 						.pClearValues = NULL,
2165 					 });
2166 
2167 	radv_cmd_buffer_set_subpass(cmd_buffer,
2168 				    &cmd_buffer->state.pass->subpasses[0]);
2169 
2170 	VkClearAttachment clear_att = {
2171 		.aspectMask = range->aspectMask,
2172 		.colorAttachment = 0,
2173 		.clearValue = *clear_val,
2174 	};
2175 
2176 	VkClearRect clear_rect = {
2177 		.rect = {
2178 			.offset = { 0, 0 },
2179 			.extent = { width, height },
2180 		},
2181 		.baseArrayLayer = range->baseArrayLayer,
2182 		.layerCount = 1, /* FINISHME: clear multi-layer framebuffer */
2183 	};
2184 
2185 	emit_clear(cmd_buffer, &clear_att, &clear_rect, NULL, NULL, 0, false);
2186 
2187 	radv_cmd_buffer_end_render_pass(cmd_buffer);
2188 	radv_DestroyRenderPass(device_h, pass,
2189 			       &cmd_buffer->pool->alloc);
2190 	radv_DestroyFramebuffer(device_h, fb,
2191 				&cmd_buffer->pool->alloc);
2192 }
2193 
2194 /**
2195  * Return TRUE if a fast color or depth clear has been performed.
2196  */
2197 static bool
radv_fast_clear_range(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,VkFormat format,VkImageLayout image_layout,bool in_render_loop,const VkImageSubresourceRange * range,const VkClearValue * clear_val)2198 radv_fast_clear_range(struct radv_cmd_buffer *cmd_buffer,
2199 		      struct radv_image *image,
2200 		      VkFormat format,
2201 		      VkImageLayout image_layout,
2202 		      bool in_render_loop,
2203 		      const VkImageSubresourceRange *range,
2204 		      const VkClearValue *clear_val)
2205 {
2206 	struct radv_image_view iview;
2207 
2208 	radv_image_view_init(&iview, cmd_buffer->device,
2209 			     &(VkImageViewCreateInfo) {
2210 					.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
2211 					.image = radv_image_to_handle(image),
2212 					.viewType = radv_meta_get_view_type(image),
2213 					.format = image->vk_format,
2214 					.subresourceRange = {
2215 					.aspectMask = range->aspectMask,
2216 					.baseMipLevel = range->baseMipLevel,
2217 					.levelCount = range->levelCount,
2218 					.baseArrayLayer = range->baseArrayLayer,
2219 					.layerCount = range->layerCount,
2220 				   },
2221 			     }, NULL);
2222 
2223 	VkClearRect clear_rect = {
2224 		.rect = {
2225 			.offset = { 0, 0 },
2226 			.extent = {
2227 				radv_minify(image->info.width, range->baseMipLevel),
2228 				radv_minify(image->info.height, range->baseMipLevel),
2229 			},
2230 		},
2231 		.baseArrayLayer = range->baseArrayLayer,
2232 		.layerCount = range->layerCount,
2233 	};
2234 
2235 	VkClearAttachment clear_att = {
2236 		.aspectMask = range->aspectMask,
2237 		.colorAttachment = 0,
2238 		.clearValue = *clear_val,
2239 	};
2240 
2241 	if (vk_format_is_color(format)) {
2242 		if (radv_can_fast_clear_color(cmd_buffer, &iview, image_layout,
2243 					      in_render_loop, &clear_rect,
2244 					      clear_att.clearValue.color, 0)) {
2245 			radv_fast_clear_color(cmd_buffer, &iview, &clear_att,
2246 					      clear_att.colorAttachment,
2247 					      NULL, NULL);
2248 			return true;
2249 		}
2250 	} else {
2251 		if (radv_can_fast_clear_depth(cmd_buffer, &iview, image_layout,
2252 					      in_render_loop,range->aspectMask,
2253 					      &clear_rect, clear_att.clearValue.depthStencil,
2254 					      0)) {
2255 			radv_fast_clear_depth(cmd_buffer, &iview, &clear_att,
2256 			                      NULL, NULL);
2257 			return true;
2258 		}
2259 	}
2260 
2261 	return false;
2262 }
2263 
2264 static void
radv_cmd_clear_image(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,VkImageLayout image_layout,const VkClearValue * clear_value,uint32_t range_count,const VkImageSubresourceRange * ranges,bool cs)2265 radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer,
2266 		     struct radv_image *image,
2267 		     VkImageLayout image_layout,
2268 		     const VkClearValue *clear_value,
2269 		     uint32_t range_count,
2270 		     const VkImageSubresourceRange *ranges,
2271 		     bool cs)
2272 {
2273 	VkFormat format = image->vk_format;
2274 	VkClearValue internal_clear_value = *clear_value;
2275 
2276 	if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
2277 		uint32_t value;
2278 		format = VK_FORMAT_R32_UINT;
2279 		value = float3_to_rgb9e5(clear_value->color.float32);
2280 		internal_clear_value.color.uint32[0] = value;
2281 	}
2282 
2283 	if (format == VK_FORMAT_R4G4_UNORM_PACK8) {
2284 		uint8_t r, g;
2285 		format = VK_FORMAT_R8_UINT;
2286 		r = float_to_ubyte(clear_value->color.float32[0]) >> 4;
2287 		g = float_to_ubyte(clear_value->color.float32[1]) >> 4;
2288 		internal_clear_value.color.uint32[0] = (r << 4) | (g & 0xf);
2289 	}
2290 
2291 	if (format == VK_FORMAT_R32G32B32_UINT ||
2292 	    format == VK_FORMAT_R32G32B32_SINT ||
2293 	    format == VK_FORMAT_R32G32B32_SFLOAT)
2294 		cs = true;
2295 
2296 	for (uint32_t r = 0; r < range_count; r++) {
2297 		const VkImageSubresourceRange *range = &ranges[r];
2298 
2299 		/* Try to perform a fast clear first, otherwise fallback to
2300 		 * the legacy path.
2301 		 */
2302 		if (!cs &&
2303 		    radv_fast_clear_range(cmd_buffer, image, format,
2304 					  image_layout, false, range,
2305 					  &internal_clear_value)) {
2306 			continue;
2307 		}
2308 
2309 		for (uint32_t l = 0; l < radv_get_levelCount(image, range); ++l) {
2310 			const uint32_t layer_count = image->type == VK_IMAGE_TYPE_3D ?
2311 				radv_minify(image->info.depth, range->baseMipLevel + l) :
2312 				radv_get_layerCount(image, range);
2313 			for (uint32_t s = 0; s < layer_count; ++s) {
2314 
2315 				if (cs) {
2316 					struct radv_meta_blit2d_surf surf;
2317 					surf.format = format;
2318 					surf.image = image;
2319 					surf.level = range->baseMipLevel + l;
2320 					surf.layer = range->baseArrayLayer + s;
2321 					surf.aspect_mask = range->aspectMask;
2322 					radv_meta_clear_image_cs(cmd_buffer, &surf,
2323 								 &internal_clear_value.color);
2324 				} else {
2325 					radv_clear_image_layer(cmd_buffer, image, image_layout,
2326 							       range, format, l, s, &internal_clear_value);
2327 				}
2328 			}
2329 		}
2330 	}
2331 }
2332 
radv_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)2333 void radv_CmdClearColorImage(
2334 	VkCommandBuffer                             commandBuffer,
2335 	VkImage                                     image_h,
2336 	VkImageLayout                               imageLayout,
2337 	const VkClearColorValue*                    pColor,
2338 	uint32_t                                    rangeCount,
2339 	const VkImageSubresourceRange*              pRanges)
2340 {
2341 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2342 	RADV_FROM_HANDLE(radv_image, image, image_h);
2343 	struct radv_meta_saved_state saved_state;
2344 	bool cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE;
2345 
2346 	if (cs) {
2347 		radv_meta_save(&saved_state, cmd_buffer,
2348 			       RADV_META_SAVE_COMPUTE_PIPELINE |
2349 			       RADV_META_SAVE_CONSTANTS |
2350 			       RADV_META_SAVE_DESCRIPTORS);
2351 	} else {
2352 		radv_meta_save(&saved_state, cmd_buffer,
2353 			       RADV_META_SAVE_GRAPHICS_PIPELINE |
2354 			       RADV_META_SAVE_CONSTANTS);
2355 	}
2356 
2357 	radv_cmd_clear_image(cmd_buffer, image, imageLayout,
2358 			     (const VkClearValue *) pColor,
2359 			     rangeCount, pRanges, cs);
2360 
2361 	radv_meta_restore(&saved_state, cmd_buffer);
2362 }
2363 
radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)2364 void radv_CmdClearDepthStencilImage(
2365 	VkCommandBuffer                             commandBuffer,
2366 	VkImage                                     image_h,
2367 	VkImageLayout                               imageLayout,
2368 	const VkClearDepthStencilValue*             pDepthStencil,
2369 	uint32_t                                    rangeCount,
2370 	const VkImageSubresourceRange*              pRanges)
2371 {
2372 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2373 	RADV_FROM_HANDLE(radv_image, image, image_h);
2374 	struct radv_meta_saved_state saved_state;
2375 
2376 	radv_meta_save(&saved_state, cmd_buffer,
2377 		       RADV_META_SAVE_GRAPHICS_PIPELINE |
2378 		       RADV_META_SAVE_CONSTANTS);
2379 
2380 	radv_cmd_clear_image(cmd_buffer, image, imageLayout,
2381 			     (const VkClearValue *) pDepthStencil,
2382 			     rangeCount, pRanges, false);
2383 
2384 	radv_meta_restore(&saved_state, cmd_buffer);
2385 }
2386 
radv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)2387 void radv_CmdClearAttachments(
2388 	VkCommandBuffer                             commandBuffer,
2389 	uint32_t                                    attachmentCount,
2390 	const VkClearAttachment*                    pAttachments,
2391 	uint32_t                                    rectCount,
2392 	const VkClearRect*                          pRects)
2393 {
2394 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2395 	struct radv_meta_saved_state saved_state;
2396 	enum radv_cmd_flush_bits pre_flush = 0;
2397 	enum radv_cmd_flush_bits post_flush = 0;
2398 
2399 	if (!cmd_buffer->state.subpass)
2400 		return;
2401 
2402 	radv_meta_save(&saved_state, cmd_buffer,
2403 		       RADV_META_SAVE_GRAPHICS_PIPELINE |
2404 		       RADV_META_SAVE_CONSTANTS);
2405 
2406 	/* FINISHME: We can do better than this dumb loop. It thrashes too much
2407 	 * state.
2408 	 */
2409 	for (uint32_t a = 0; a < attachmentCount; ++a) {
2410 		for (uint32_t r = 0; r < rectCount; ++r) {
2411 			emit_clear(cmd_buffer, &pAttachments[a], &pRects[r], &pre_flush, &post_flush,
2412 			           cmd_buffer->state.subpass->view_mask, false);
2413 		}
2414 	}
2415 
2416 	radv_meta_restore(&saved_state, cmd_buffer);
2417 	cmd_buffer->state.flush_bits |= post_flush;
2418 }
2419