1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Christian König <christian.koenig@amd.com>
25  */
26 
27 #include "util/u_memory.h"
28 #include "util/u_framebuffer.h"
29 #include "util/u_blitter.h"
30 #include "tgsi/tgsi_parse.h"
31 #include "radeonsi_pipe.h"
32 #include "radeonsi_shader.h"
33 #include "si_state.h"
34 #include "sid.h"
35 
36 /*
37  * Shaders
38  */
39 
si_pipe_shader_vs(struct pipe_context * ctx,struct si_pipe_shader * shader)40 static void si_pipe_shader_vs(struct pipe_context *ctx, struct si_pipe_shader *shader)
41 {
42 	struct r600_context *rctx = (struct r600_context *)ctx;
43 	struct si_pm4_state *pm4;
44 	unsigned num_sgprs, num_user_sgprs;
45 	unsigned nparams, i;
46 	uint64_t va;
47 
48 	if (si_pipe_shader_create(ctx, shader))
49 		return;
50 
51 	si_pm4_delete_state(rctx, vs, shader->pm4);
52 	pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
53 
54 	si_pm4_inval_shader_cache(pm4);
55 
56 	/* Certain attributes (position, psize, etc.) don't count as params.
57 	 * VS is required to export at least one param and r600_shader_from_tgsi()
58 	 * takes care of adding a dummy export.
59 	 */
60 	for (nparams = 0, i = 0 ; i < shader->shader.noutput; i++) {
61 		if (shader->shader.output[i].name != TGSI_SEMANTIC_POSITION)
62 			nparams++;
63 	}
64 	if (nparams < 1)
65 		nparams = 1;
66 
67 	si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG,
68 		       S_0286C4_VS_EXPORT_COUNT(nparams - 1));
69 
70 	si_pm4_set_reg(pm4, R_02870C_SPI_SHADER_POS_FORMAT,
71 		       S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
72 		       S_02870C_POS1_EXPORT_FORMAT(V_02870C_SPI_SHADER_NONE) |
73 		       S_02870C_POS2_EXPORT_FORMAT(V_02870C_SPI_SHADER_NONE) |
74 		       S_02870C_POS3_EXPORT_FORMAT(V_02870C_SPI_SHADER_NONE));
75 
76 	va = r600_resource_va(ctx->screen, (void *)shader->bo);
77 	si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ);
78 	si_pm4_set_reg(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8);
79 	si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, va >> 40);
80 
81 	num_user_sgprs = 8;
82 	num_sgprs = shader->num_sgprs;
83 	if (num_user_sgprs > num_sgprs)
84 		num_sgprs = num_user_sgprs;
85 	/* Last 2 reserved SGPRs are used for VCC */
86 	num_sgprs += 2;
87 	assert(num_sgprs <= 104);
88 
89 	si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS,
90 		       S_00B128_VGPRS((shader->num_vgprs - 1) / 4) |
91 		       S_00B128_SGPRS((num_sgprs - 1) / 8));
92 	si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS,
93 		       S_00B12C_USER_SGPR(num_user_sgprs));
94 
95 	si_pm4_bind_state(rctx, vs, shader->pm4);
96 }
97 
si_pipe_shader_ps(struct pipe_context * ctx,struct si_pipe_shader * shader)98 static void si_pipe_shader_ps(struct pipe_context *ctx, struct si_pipe_shader *shader)
99 {
100 	struct r600_context *rctx = (struct r600_context *)ctx;
101 	struct si_pm4_state *pm4;
102 	unsigned i, exports_ps, num_cout, spi_ps_in_control, db_shader_control;
103 	unsigned num_sgprs, num_user_sgprs;
104 	int ninterp = 0;
105 	boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE;
106 	unsigned spi_baryc_cntl, spi_ps_input_ena;
107 	uint64_t va;
108 
109 	if (si_pipe_shader_create(ctx, shader))
110 		return;
111 
112 	si_pm4_delete_state(rctx, ps, shader->pm4);
113 	pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
114 
115 	si_pm4_inval_shader_cache(pm4);
116 
117 	db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
118 	for (i = 0; i < shader->shader.ninput; i++) {
119 		ninterp++;
120 		/* XXX: Flat shading hangs the GPU */
121 		if (shader->shader.input[i].interpolate == TGSI_INTERPOLATE_CONSTANT ||
122 		    (shader->shader.input[i].interpolate == TGSI_INTERPOLATE_COLOR &&
123 		     rctx->queued.named.rasterizer->flatshade))
124 			have_linear = TRUE;
125 		if (shader->shader.input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
126 			have_linear = TRUE;
127 		if (shader->shader.input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
128 			have_perspective = TRUE;
129 		if (shader->shader.input[i].centroid)
130 			have_centroid = TRUE;
131 	}
132 
133 	for (i = 0; i < shader->shader.noutput; i++) {
134 		if (shader->shader.output[i].name == TGSI_SEMANTIC_POSITION)
135 			db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1);
136 		if (shader->shader.output[i].name == TGSI_SEMANTIC_STENCIL)
137 			db_shader_control |= 0; // XXX OP_VAL or TEST_VAL?
138 	}
139 	if (shader->shader.uses_kill)
140 		db_shader_control |= S_02880C_KILL_ENABLE(1);
141 
142 	exports_ps = 0;
143 	num_cout = 0;
144 	for (i = 0; i < shader->shader.noutput; i++) {
145 		if (shader->shader.output[i].name == TGSI_SEMANTIC_POSITION ||
146 		    shader->shader.output[i].name == TGSI_SEMANTIC_STENCIL)
147 			exports_ps |= 1;
148 		else if (shader->shader.output[i].name == TGSI_SEMANTIC_COLOR) {
149 			if (shader->shader.fs_write_all)
150 				num_cout = shader->shader.nr_cbufs;
151 			else
152 				num_cout++;
153 		}
154 	}
155 	if (!exports_ps) {
156 		/* always at least export 1 component per pixel */
157 		exports_ps = 2;
158 	}
159 
160 	spi_ps_in_control = S_0286D8_NUM_INTERP(ninterp);
161 
162 	spi_baryc_cntl = 0;
163 	if (have_perspective)
164 		spi_baryc_cntl |= have_centroid ?
165 			S_0286E0_PERSP_CENTROID_CNTL(1) : S_0286E0_PERSP_CENTER_CNTL(1);
166 	if (have_linear)
167 		spi_baryc_cntl |= have_centroid ?
168 			S_0286E0_LINEAR_CENTROID_CNTL(1) : S_0286E0_LINEAR_CENTER_CNTL(1);
169 
170 	si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
171 	spi_ps_input_ena = shader->spi_ps_input_ena;
172 	/* we need to enable at least one of them, otherwise we hang the GPU */
173 	if (!G_0286CC_PERSP_SAMPLE_ENA(spi_ps_input_ena) &&
174 	    !G_0286CC_PERSP_CENTROID_ENA(spi_ps_input_ena) &&
175 	    !G_0286CC_PERSP_PULL_MODEL_ENA(spi_ps_input_ena) &&
176 	    !G_0286CC_LINEAR_SAMPLE_ENA(spi_ps_input_ena) &&
177 	    !G_0286CC_LINEAR_CENTER_ENA(spi_ps_input_ena) &&
178 	    !G_0286CC_LINEAR_CENTROID_ENA(spi_ps_input_ena) &&
179 	    !G_0286CC_LINE_STIPPLE_TEX_ENA(spi_ps_input_ena)) {
180 
181 		spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1);
182 	}
183 	si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, spi_ps_input_ena);
184 	si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR, spi_ps_input_ena);
185 	si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control);
186 
187 	/* XXX: Depends on Z buffer format? */
188 	si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT, 0);
189 
190 	va = r600_resource_va(ctx->screen, (void *)shader->bo);
191 	si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ);
192 	si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
193 	si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40);
194 
195 	num_user_sgprs = 6;
196 	num_sgprs = shader->num_sgprs;
197 	if (num_user_sgprs > num_sgprs)
198 		num_sgprs = num_user_sgprs;
199 	/* Last 2 reserved SGPRs are used for VCC */
200 	num_sgprs += 2;
201 	assert(num_sgprs <= 104);
202 
203 	si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
204 		       S_00B028_VGPRS((shader->num_vgprs - 1) / 4) |
205 		       S_00B028_SGPRS((num_sgprs - 1) / 8));
206 	si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
207 		       S_00B02C_USER_SGPR(num_user_sgprs));
208 
209 	si_pm4_set_reg(pm4, R_02880C_DB_SHADER_CONTROL, db_shader_control);
210 
211 	shader->sprite_coord_enable = rctx->sprite_coord_enable;
212 	si_pm4_bind_state(rctx, ps, shader->pm4);
213 }
214 
215 /*
216  * Drawing
217  */
218 
si_conv_pipe_prim(unsigned pprim)219 static unsigned si_conv_pipe_prim(unsigned pprim)
220 {
221         static const unsigned prim_conv[] = {
222 		[PIPE_PRIM_POINTS]			= V_008958_DI_PT_POINTLIST,
223 		[PIPE_PRIM_LINES]			= V_008958_DI_PT_LINELIST,
224 		[PIPE_PRIM_LINE_LOOP]			= V_008958_DI_PT_LINELOOP,
225 		[PIPE_PRIM_LINE_STRIP]			= V_008958_DI_PT_LINESTRIP,
226 		[PIPE_PRIM_TRIANGLES]			= V_008958_DI_PT_TRILIST,
227 		[PIPE_PRIM_TRIANGLE_STRIP]		= V_008958_DI_PT_TRISTRIP,
228 		[PIPE_PRIM_TRIANGLE_FAN]		= V_008958_DI_PT_TRIFAN,
229 		[PIPE_PRIM_QUADS]			= V_008958_DI_PT_QUADLIST,
230 		[PIPE_PRIM_QUAD_STRIP]			= V_008958_DI_PT_QUADSTRIP,
231 		[PIPE_PRIM_POLYGON]			= V_008958_DI_PT_POLYGON,
232 		[PIPE_PRIM_LINES_ADJACENCY]		= ~0,
233 		[PIPE_PRIM_LINE_STRIP_ADJACENCY]	= ~0,
234 		[PIPE_PRIM_TRIANGLES_ADJACENCY]		= ~0,
235 		[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY]	= ~0
236         };
237 	unsigned result = prim_conv[pprim];
238         if (result == ~0) {
239 		R600_ERR("unsupported primitive type %d\n", pprim);
240         }
241 	return result;
242 }
243 
si_update_draw_info_state(struct r600_context * rctx,const struct pipe_draw_info * info)244 static bool si_update_draw_info_state(struct r600_context *rctx,
245 			       const struct pipe_draw_info *info)
246 {
247 	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
248 	unsigned prim = si_conv_pipe_prim(info->mode);
249 	unsigned ls_mask = 0;
250 
251 	if (pm4 == NULL)
252 		return false;
253 
254 	if (prim == ~0) {
255 		FREE(pm4);
256 		return false;
257 	}
258 
259 	si_pm4_set_reg(pm4, R_008958_VGT_PRIMITIVE_TYPE, prim);
260 	si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
261 	si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
262 	si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET,
263 		       info->indexed ? info->index_bias : info->start);
264 	si_pm4_set_reg(pm4, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, info->restart_index);
265 	si_pm4_set_reg(pm4, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, info->primitive_restart);
266 #if 0
267 	si_pm4_set_reg(pm4, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
268 	si_pm4_set_reg(pm4, R_03CFF4_SQ_VTX_START_INST_LOC, info->start_instance);
269 #endif
270 
271         if (prim == V_008958_DI_PT_LINELIST)
272                 ls_mask = 1;
273         else if (prim == V_008958_DI_PT_LINESTRIP)
274                 ls_mask = 2;
275 	si_pm4_set_reg(pm4, R_028A0C_PA_SC_LINE_STIPPLE,
276 		       S_028A0C_AUTO_RESET_CNTL(ls_mask) |
277 		       rctx->pa_sc_line_stipple);
278 
279         if (info->mode == PIPE_PRIM_QUADS || info->mode == PIPE_PRIM_QUAD_STRIP || info->mode == PIPE_PRIM_POLYGON) {
280 		si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL,
281 			       S_028814_PROVOKING_VTX_LAST(1) | rctx->pa_su_sc_mode_cntl);
282         } else {
283 		si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL, rctx->pa_su_sc_mode_cntl);
284         }
285 	si_pm4_set_reg(pm4, R_02881C_PA_CL_VS_OUT_CNTL,
286 		       prim == PIPE_PRIM_POINTS ? rctx->pa_cl_vs_out_cntl : 0
287 		       /*| (rctx->rasterizer->clip_plane_enable &
288 		       rctx->vs_shader->shader.clip_dist_write)*/);
289 	si_pm4_set_reg(pm4, R_028810_PA_CL_CLIP_CNTL, rctx->pa_cl_clip_cntl
290 			/*| (rctx->vs_shader->shader.clip_dist_write ||
291 			rctx->vs_shader->shader.vs_prohibit_ucps ?
292 			0 : rctx->rasterizer->clip_plane_enable & 0x3F)*/);
293 
294 	si_pm4_set_state(rctx, draw_info, pm4);
295 	return true;
296 }
297 
si_update_alpha_ref(struct r600_context * rctx)298 static void si_update_alpha_ref(struct r600_context *rctx)
299 {
300 #if 0
301         unsigned alpha_ref;
302         struct r600_pipe_state rstate;
303 
304         alpha_ref = rctx->alpha_ref;
305         rstate.nregs = 0;
306         if (rctx->export_16bpc)
307                 alpha_ref &= ~0x1FFF;
308         si_pm4_set_reg(&rstate, R_028438_SX_ALPHA_REF, alpha_ref);
309 
310 	si_pm4_set_state(rctx, TODO, pm4);
311         rctx->alpha_ref_dirty = false;
312 #endif
313 }
314 
si_update_spi_map(struct r600_context * rctx)315 static void si_update_spi_map(struct r600_context *rctx)
316 {
317 	struct si_shader *ps = &rctx->ps_shader->current->shader;
318 	struct si_shader *vs = &rctx->vs_shader->current->shader;
319 	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
320 	unsigned i, j, tmp;
321 
322 	for (i = 0; i < ps->ninput; i++) {
323 		tmp = 0;
324 
325 #if 0
326 		/* XXX: Flat shading hangs the GPU */
327 		if (ps->input[i].name == TGSI_SEMANTIC_POSITION ||
328 		    ps->input[i].interpolate == TGSI_INTERPOLATE_CONSTANT ||
329 		    (ps->input[i].interpolate == TGSI_INTERPOLATE_COLOR &&
330 		     rctx->rasterizer && rctx->rasterizer->flatshade)) {
331 			tmp |= S_028644_FLAT_SHADE(1);
332 		}
333 #endif
334 
335 		if (ps->input[i].name == TGSI_SEMANTIC_GENERIC &&
336 		    rctx->sprite_coord_enable & (1 << ps->input[i].sid)) {
337 			tmp |= S_028644_PT_SPRITE_TEX(1);
338 		}
339 
340 		for (j = 0; j < vs->noutput; j++) {
341 			if (ps->input[i].name == vs->output[j].name &&
342 			    ps->input[i].sid == vs->output[j].sid) {
343 				tmp |= S_028644_OFFSET(vs->output[j].param_offset);
344 				break;
345 			}
346 		}
347 
348 		if (j == vs->noutput) {
349 			/* No corresponding output found, load defaults into input */
350 			tmp |= S_028644_OFFSET(0x20);
351 		}
352 
353 		si_pm4_set_reg(pm4, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp);
354 	}
355 
356 	si_pm4_set_state(rctx, spi, pm4);
357 }
358 
si_update_derived_state(struct r600_context * rctx)359 static void si_update_derived_state(struct r600_context *rctx)
360 {
361 	struct pipe_context * ctx = (struct pipe_context*)rctx;
362 	unsigned ps_dirty = 0;
363 
364 	if (!rctx->blitter->running) {
365 		if (rctx->have_depth_fb || rctx->have_depth_texture)
366 			si_flush_depth_textures(rctx);
367 	}
368 
369 	si_shader_select(ctx, rctx->ps_shader, &ps_dirty);
370 
371 	if (rctx->alpha_ref_dirty) {
372 		si_update_alpha_ref(rctx);
373 	}
374 
375 	if (!rctx->vs_shader->current->pm4) {
376 		si_pipe_shader_vs(ctx, rctx->vs_shader->current);
377 	}
378 
379 	if (!rctx->ps_shader->current->pm4) {
380 		si_pipe_shader_ps(ctx, rctx->ps_shader->current);
381 		ps_dirty = 0;
382 	}
383 	if (!rctx->ps_shader->current->bo) {
384 		if (!rctx->dummy_pixel_shader->pm4)
385 			si_pipe_shader_ps(ctx, rctx->dummy_pixel_shader);
386 		else
387 			si_pm4_bind_state(rctx, vs, rctx->dummy_pixel_shader->pm4);
388 
389 		ps_dirty = 0;
390 	}
391 
392 	if (ps_dirty) {
393 		si_pm4_bind_state(rctx, ps, rctx->ps_shader->current->pm4);
394 		rctx->shader_dirty = true;
395 	}
396 
397 	if (rctx->shader_dirty) {
398 		si_update_spi_map(rctx);
399 		rctx->shader_dirty = false;
400 	}
401 }
402 
si_vertex_buffer_update(struct r600_context * rctx)403 static void si_vertex_buffer_update(struct r600_context *rctx)
404 {
405 	struct pipe_context *ctx = &rctx->context;
406 	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
407 	bool bound[PIPE_MAX_ATTRIBS] = {};
408 	unsigned i, count;
409 	uint64_t va;
410 
411 	si_pm4_inval_vertex_cache(pm4);
412 
413 	/* bind vertex buffer once */
414 	count = rctx->vertex_elements->count;
415 	assert(count <= 256 / 4);
416 
417 	si_pm4_sh_data_begin(pm4);
418 	for (i = 0 ; i < count; i++) {
419 		struct pipe_vertex_element *ve = &rctx->vertex_elements->elements[i];
420 		struct pipe_vertex_buffer *vb;
421 		struct si_resource *rbuffer;
422 		unsigned offset;
423 
424 		if (ve->vertex_buffer_index >= rctx->nr_vertex_buffers)
425 			continue;
426 
427 		vb = &rctx->vertex_buffer[ve->vertex_buffer_index];
428 		rbuffer = (struct si_resource*)vb->buffer;
429 		if (rbuffer == NULL)
430 			continue;
431 
432 		offset = 0;
433 		offset += vb->buffer_offset;
434 		offset += ve->src_offset;
435 
436 		va = r600_resource_va(ctx->screen, (void*)rbuffer);
437 		va += offset;
438 
439 		/* Fill in T# buffer resource description */
440 		si_pm4_sh_data_add(pm4, va & 0xFFFFFFFF);
441 		si_pm4_sh_data_add(pm4, (S_008F04_BASE_ADDRESS_HI(va >> 32) |
442 					 S_008F04_STRIDE(vb->stride)));
443 		si_pm4_sh_data_add(pm4, (vb->buffer->width0 - offset) /
444 					 MAX2(vb->stride, 1));
445 		si_pm4_sh_data_add(pm4, rctx->vertex_elements->rsrc_word3[i]);
446 
447 		if (!bound[ve->vertex_buffer_index]) {
448 			si_pm4_add_bo(pm4, rbuffer, RADEON_USAGE_READ);
449 			bound[ve->vertex_buffer_index] = true;
450 		}
451 	}
452 	si_pm4_sh_data_end(pm4, R_00B148_SPI_SHADER_USER_DATA_VS_6);
453 	si_pm4_set_state(rctx, vertex_buffers, pm4);
454 }
455 
si_state_draw(struct r600_context * rctx,const struct pipe_draw_info * info,const struct pipe_index_buffer * ib)456 static void si_state_draw(struct r600_context *rctx,
457 			  const struct pipe_draw_info *info,
458 			  const struct pipe_index_buffer *ib)
459 {
460 	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
461 
462 	/* queries need some special values
463 	 * (this is non-zero if any query is active) */
464 	if (rctx->num_cs_dw_queries_suspend) {
465 		struct si_state_dsa *dsa = rctx->queued.named.dsa;
466 
467 		si_pm4_set_reg(pm4, R_028004_DB_COUNT_CONTROL,
468 			       S_028004_PERFECT_ZPASS_COUNTS(1));
469 		si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE,
470 			       dsa->db_render_override |
471 			       S_02800C_NOOP_CULL_DISABLE(1));
472 	}
473 
474 	/* draw packet */
475 	si_pm4_cmd_begin(pm4, PKT3_INDEX_TYPE);
476 	if (ib->index_size == 4) {
477 		si_pm4_cmd_add(pm4, V_028A7C_VGT_INDEX_32 | (R600_BIG_ENDIAN ?
478 				V_028A7C_VGT_DMA_SWAP_32_BIT : 0));
479 	} else {
480 		si_pm4_cmd_add(pm4, V_028A7C_VGT_INDEX_16 | (R600_BIG_ENDIAN ?
481 				V_028A7C_VGT_DMA_SWAP_16_BIT : 0));
482 	}
483 	si_pm4_cmd_end(pm4, rctx->predicate_drawing);
484 
485 	si_pm4_cmd_begin(pm4, PKT3_NUM_INSTANCES);
486 	si_pm4_cmd_add(pm4, info->instance_count);
487 	si_pm4_cmd_end(pm4, rctx->predicate_drawing);
488 
489 	if (info->indexed) {
490 		uint64_t va;
491 		va = r600_resource_va(&rctx->screen->screen, ib->buffer);
492 		va += ib->offset;
493 
494 		si_pm4_add_bo(pm4, (struct si_resource *)ib->buffer, RADEON_USAGE_READ);
495 		si_pm4_cmd_begin(pm4, PKT3_DRAW_INDEX_2);
496 		si_pm4_cmd_add(pm4, (ib->buffer->width0 - ib->offset) /
497 					rctx->index_buffer.index_size);
498 		si_pm4_cmd_add(pm4, va);
499 		si_pm4_cmd_add(pm4, (va >> 32UL) & 0xFF);
500 		si_pm4_cmd_add(pm4, info->count);
501 		si_pm4_cmd_add(pm4, V_0287F0_DI_SRC_SEL_DMA);
502 		si_pm4_cmd_end(pm4, rctx->predicate_drawing);
503 	} else {
504 		si_pm4_cmd_begin(pm4, PKT3_DRAW_INDEX_AUTO);
505 		si_pm4_cmd_add(pm4, info->count);
506 		si_pm4_cmd_add(pm4, V_0287F0_DI_SRC_SEL_AUTO_INDEX |
507 			       (info->count_from_stream_output ?
508 				S_0287F0_USE_OPAQUE(1) : 0));
509 		si_pm4_cmd_end(pm4, rctx->predicate_drawing);
510 	}
511 	si_pm4_set_state(rctx, draw, pm4);
512 }
513 
si_draw_vbo(struct pipe_context * ctx,const struct pipe_draw_info * info)514 void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
515 {
516 	struct r600_context *rctx = (struct r600_context *)ctx;
517 	struct pipe_index_buffer ib = {};
518 	uint32_t cp_coher_cntl;
519 
520 	if ((!info->count && (info->indexed || !info->count_from_stream_output)) ||
521 	    (info->indexed && !rctx->index_buffer.buffer)) {
522 		return;
523 	}
524 
525 	if (!rctx->ps_shader || !rctx->vs_shader)
526 		return;
527 
528 	si_update_derived_state(rctx);
529 	si_vertex_buffer_update(rctx);
530 
531 	if (info->indexed) {
532 		/* Initialize the index buffer struct. */
533 		pipe_resource_reference(&ib.buffer, rctx->index_buffer.buffer);
534 		ib.index_size = rctx->index_buffer.index_size;
535 		ib.offset = rctx->index_buffer.offset + info->start * ib.index_size;
536 
537 		/* Translate or upload, if needed. */
538 		r600_translate_index_buffer(rctx, &ib, info->count);
539 
540 		if (ib.user_buffer) {
541 			r600_upload_index_buffer(rctx, &ib, info->count);
542 		}
543 
544 	} else if (info->count_from_stream_output) {
545 		r600_context_draw_opaque_count(rctx, (struct r600_so_target*)info->count_from_stream_output);
546 	}
547 
548 	rctx->vs_shader_so_strides = rctx->vs_shader->current->so_strides;
549 
550 	if (!si_update_draw_info_state(rctx, info))
551 		return;
552 
553 	si_state_draw(rctx, info, &ib);
554 
555 	cp_coher_cntl = si_pm4_sync_flags(rctx);
556 	if (cp_coher_cntl) {
557 		struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
558 		si_cmd_surface_sync(pm4, cp_coher_cntl);
559 		si_pm4_set_state(rctx, sync, pm4);
560 	}
561 
562 	/* Emit states. */
563 	rctx->pm4_dirty_cdwords += si_pm4_dirty_dw(rctx);
564 
565 	si_need_cs_space(rctx, 0, TRUE);
566 
567 	si_pm4_emit_dirty(rctx);
568 	rctx->pm4_dirty_cdwords = 0;
569 
570 #if 0
571 	/* Enable stream out if needed. */
572 	if (rctx->streamout_start) {
573 		r600_context_streamout_begin(rctx);
574 		rctx->streamout_start = FALSE;
575 	}
576 #endif
577 
578 
579 	rctx->flags |= R600_CONTEXT_DST_CACHES_DIRTY;
580 
581 	if (rctx->framebuffer.zsbuf)
582 	{
583 		struct pipe_resource *tex = rctx->framebuffer.zsbuf->texture;
584 		((struct r600_resource_texture *)tex)->dirty_db = TRUE;
585 	}
586 
587 	pipe_resource_reference(&ib.buffer, NULL);
588 }
589