1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "r600_cs.h"
25 #include "util/u_viewport.h"
26 #include "tgsi/tgsi_scan.h"
27 
28 #define R600_R_028C0C_PA_CL_GB_VERT_CLIP_ADJ         0x028C0C
29 #define CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ           0x28be8
30 #define R_02843C_PA_CL_VPORT_XSCALE                  0x02843C
31 
32 #define R_028250_PA_SC_VPORT_SCISSOR_0_TL                               0x028250
33 #define   S_028250_TL_X(x)                                            (((unsigned)(x) & 0x7FFF) << 0)
34 #define   G_028250_TL_X(x)                                            (((x) >> 0) & 0x7FFF)
35 #define   C_028250_TL_X                                               0xFFFF8000
36 #define   S_028250_TL_Y(x)                                            (((unsigned)(x) & 0x7FFF) << 16)
37 #define   G_028250_TL_Y(x)                                            (((x) >> 16) & 0x7FFF)
38 #define   C_028250_TL_Y                                               0x8000FFFF
39 #define   S_028250_WINDOW_OFFSET_DISABLE(x)                           (((unsigned)(x) & 0x1) << 31)
40 #define   G_028250_WINDOW_OFFSET_DISABLE(x)                           (((x) >> 31) & 0x1)
41 #define   C_028250_WINDOW_OFFSET_DISABLE                              0x7FFFFFFF
42 #define   S_028254_BR_X(x)                                            (((unsigned)(x) & 0x7FFF) << 0)
43 #define   G_028254_BR_X(x)                                            (((x) >> 0) & 0x7FFF)
44 #define   C_028254_BR_X                                               0xFFFF8000
45 #define   S_028254_BR_Y(x)                                            (((unsigned)(x) & 0x7FFF) << 16)
46 #define   G_028254_BR_Y(x)                                            (((x) >> 16) & 0x7FFF)
47 #define   C_028254_BR_Y                                               0x8000FFFF
48 #define R_0282D0_PA_SC_VPORT_ZMIN_0                                     0x0282D0
49 #define R_0282D4_PA_SC_VPORT_ZMAX_0                                     0x0282D4
50 
51 #define GET_MAX_SCISSOR(rctx) (rctx->chip_class >= EVERGREEN ? 16384 : 8192)
52 
r600_set_scissor_states(struct pipe_context * ctx,unsigned start_slot,unsigned num_scissors,const struct pipe_scissor_state * state)53 static void r600_set_scissor_states(struct pipe_context *ctx,
54 				    unsigned start_slot,
55 				    unsigned num_scissors,
56 				    const struct pipe_scissor_state *state)
57 {
58 	struct r600_common_context *rctx = (struct r600_common_context *)ctx;
59 	int i;
60 
61 	for (i = 0; i < num_scissors; i++)
62 		rctx->scissors.states[start_slot + i] = state[i];
63 
64 	if (!rctx->scissor_enabled)
65 		return;
66 
67 	rctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
68 	rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
69 }
70 
71 /* Since the guard band disables clipping, we have to clip per-pixel
72  * using a scissor.
73  */
r600_get_scissor_from_viewport(struct r600_common_context * rctx,const struct pipe_viewport_state * vp,struct r600_signed_scissor * scissor)74 static void r600_get_scissor_from_viewport(struct r600_common_context *rctx,
75 					   const struct pipe_viewport_state *vp,
76 					   struct r600_signed_scissor *scissor)
77 {
78 	float tmp, minx, miny, maxx, maxy;
79 
80 	/* Convert (-1, -1) and (1, 1) from clip space into window space. */
81 	minx = -vp->scale[0] + vp->translate[0];
82 	miny = -vp->scale[1] + vp->translate[1];
83 	maxx = vp->scale[0] + vp->translate[0];
84 	maxy = vp->scale[1] + vp->translate[1];
85 
86 	/* r600_draw_rectangle sets this. Disable the scissor. */
87 	if (minx == -1 && miny == -1 && maxx == 1 && maxy == 1) {
88 		scissor->minx = scissor->miny = 0;
89 		scissor->maxx = scissor->maxy = GET_MAX_SCISSOR(rctx);
90 		return;
91 	}
92 
93 	/* Handle inverted viewports. */
94 	if (minx > maxx) {
95 		tmp = minx;
96 		minx = maxx;
97 		maxx = tmp;
98 	}
99 	if (miny > maxy) {
100 		tmp = miny;
101 		miny = maxy;
102 		maxy = tmp;
103 	}
104 
105 	/* Convert to integer and round up the max bounds. */
106 	scissor->minx = minx;
107 	scissor->miny = miny;
108 	scissor->maxx = ceilf(maxx);
109 	scissor->maxy = ceilf(maxy);
110 }
111 
r600_clamp_scissor(struct r600_common_context * rctx,struct pipe_scissor_state * out,struct r600_signed_scissor * scissor)112 static void r600_clamp_scissor(struct r600_common_context *rctx,
113 			       struct pipe_scissor_state *out,
114 			       struct r600_signed_scissor *scissor)
115 {
116 	unsigned max_scissor = GET_MAX_SCISSOR(rctx);
117 	out->minx = CLAMP(scissor->minx, 0, max_scissor);
118 	out->miny = CLAMP(scissor->miny, 0, max_scissor);
119 	out->maxx = CLAMP(scissor->maxx, 0, max_scissor);
120 	out->maxy = CLAMP(scissor->maxy, 0, max_scissor);
121 }
122 
r600_clip_scissor(struct pipe_scissor_state * out,struct pipe_scissor_state * clip)123 static void r600_clip_scissor(struct pipe_scissor_state *out,
124 			      struct pipe_scissor_state *clip)
125 {
126 	out->minx = MAX2(out->minx, clip->minx);
127 	out->miny = MAX2(out->miny, clip->miny);
128 	out->maxx = MIN2(out->maxx, clip->maxx);
129 	out->maxy = MIN2(out->maxy, clip->maxy);
130 }
131 
r600_scissor_make_union(struct r600_signed_scissor * out,struct r600_signed_scissor * in)132 static void r600_scissor_make_union(struct r600_signed_scissor *out,
133 				    struct r600_signed_scissor *in)
134 {
135 	out->minx = MIN2(out->minx, in->minx);
136 	out->miny = MIN2(out->miny, in->miny);
137 	out->maxx = MAX2(out->maxx, in->maxx);
138 	out->maxy = MAX2(out->maxy, in->maxy);
139 }
140 
evergreen_apply_scissor_bug_workaround(struct r600_common_context * rctx,struct pipe_scissor_state * scissor)141 void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx,
142 					    struct pipe_scissor_state *scissor)
143 {
144 	if (rctx->chip_class == EVERGREEN || rctx->chip_class == CAYMAN) {
145 		if (scissor->maxx == 0)
146 			scissor->minx = 1;
147 		if (scissor->maxy == 0)
148 			scissor->miny = 1;
149 
150 		if (rctx->chip_class == CAYMAN &&
151 		    scissor->maxx == 1 && scissor->maxy == 1)
152 			scissor->maxx = 2;
153 	}
154 }
155 
r600_emit_one_scissor(struct r600_common_context * rctx,struct radeon_winsys_cs * cs,struct r600_signed_scissor * vp_scissor,struct pipe_scissor_state * scissor)156 static void r600_emit_one_scissor(struct r600_common_context *rctx,
157 				  struct radeon_winsys_cs *cs,
158 				  struct r600_signed_scissor *vp_scissor,
159 				  struct pipe_scissor_state *scissor)
160 {
161 	struct pipe_scissor_state final;
162 
163 	if (rctx->vs_disables_clipping_viewport) {
164 		final.minx = final.miny = 0;
165 		final.maxx = final.maxy = GET_MAX_SCISSOR(rctx);
166 	} else {
167 		r600_clamp_scissor(rctx, &final, vp_scissor);
168 	}
169 
170 	if (scissor)
171 		r600_clip_scissor(&final, scissor);
172 
173 	evergreen_apply_scissor_bug_workaround(rctx, &final);
174 
175 	radeon_emit(cs, S_028250_TL_X(final.minx) |
176 			S_028250_TL_Y(final.miny) |
177 			S_028250_WINDOW_OFFSET_DISABLE(1));
178 	radeon_emit(cs, S_028254_BR_X(final.maxx) |
179 			S_028254_BR_Y(final.maxy));
180 }
181 
182 /* the range is [-MAX, MAX] */
183 #define GET_MAX_VIEWPORT_RANGE(rctx) (rctx->chip_class >= EVERGREEN ? 32768 : 16384)
184 
r600_emit_guardband(struct r600_common_context * rctx,struct r600_signed_scissor * vp_as_scissor)185 static void r600_emit_guardband(struct r600_common_context *rctx,
186 				struct r600_signed_scissor *vp_as_scissor)
187 {
188 	struct radeon_winsys_cs *cs = rctx->gfx.cs;
189 	struct pipe_viewport_state vp;
190 	float left, top, right, bottom, max_range, guardband_x, guardband_y;
191 
192 	/* Reconstruct the viewport transformation from the scissor. */
193 	vp.translate[0] = (vp_as_scissor->minx + vp_as_scissor->maxx) / 2.0;
194 	vp.translate[1] = (vp_as_scissor->miny + vp_as_scissor->maxy) / 2.0;
195 	vp.scale[0] = vp_as_scissor->maxx - vp.translate[0];
196 	vp.scale[1] = vp_as_scissor->maxy - vp.translate[1];
197 
198 	/* Treat a 0x0 viewport as 1x1 to prevent division by zero. */
199 	if (vp_as_scissor->minx == vp_as_scissor->maxx)
200 		vp.scale[0] = 0.5;
201 	if (vp_as_scissor->miny == vp_as_scissor->maxy)
202 		vp.scale[1] = 0.5;
203 
204 	/* Find the biggest guard band that is inside the supported viewport
205 	 * range. The guard band is specified as a horizontal and vertical
206 	 * distance from (0,0) in clip space.
207 	 *
208 	 * This is done by applying the inverse viewport transformation
209 	 * on the viewport limits to get those limits in clip space.
210 	 *
211 	 * Use a limit one pixel smaller to allow for some precision error.
212 	 */
213 	max_range = GET_MAX_VIEWPORT_RANGE(rctx) - 1;
214 	left   = (-max_range - vp.translate[0]) / vp.scale[0];
215 	right  = ( max_range - vp.translate[0]) / vp.scale[0];
216 	top    = (-max_range - vp.translate[1]) / vp.scale[1];
217 	bottom = ( max_range - vp.translate[1]) / vp.scale[1];
218 
219 	assert(left <= -1 && top <= -1 && right >= 1 && bottom >= 1);
220 
221 	guardband_x = MIN2(-left, right);
222 	guardband_y = MIN2(-top, bottom);
223 
224 	/* If any of the GB registers is updated, all of them must be updated. */
225 	if (rctx->chip_class >= CAYMAN)
226 		radeon_set_context_reg_seq(cs, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
227 	else
228 		radeon_set_context_reg_seq(cs, R600_R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 4);
229 
230 	radeon_emit(cs, fui(guardband_y)); /* R_028BE8_PA_CL_GB_VERT_CLIP_ADJ */
231 	radeon_emit(cs, fui(1.0));         /* R_028BEC_PA_CL_GB_VERT_DISC_ADJ */
232 	radeon_emit(cs, fui(guardband_x)); /* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */
233 	radeon_emit(cs, fui(1.0));         /* R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
234 }
235 
r600_emit_scissors(struct r600_common_context * rctx,struct r600_atom * atom)236 static void r600_emit_scissors(struct r600_common_context *rctx, struct r600_atom *atom)
237 {
238 	struct radeon_winsys_cs *cs = rctx->gfx.cs;
239 	struct pipe_scissor_state *states = rctx->scissors.states;
240 	unsigned mask = rctx->scissors.dirty_mask;
241 	bool scissor_enabled = rctx->scissor_enabled;
242 	struct r600_signed_scissor max_vp_scissor;
243 	int i;
244 
245 	/* The simple case: Only 1 viewport is active. */
246 	if (!rctx->vs_writes_viewport_index) {
247 		struct r600_signed_scissor *vp = &rctx->viewports.as_scissor[0];
248 
249 		if (!(mask & 1))
250 			return;
251 
252 		radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
253 		r600_emit_one_scissor(rctx, cs, vp, scissor_enabled ? &states[0] : NULL);
254 		r600_emit_guardband(rctx, vp);
255 		rctx->scissors.dirty_mask &= ~1; /* clear one bit */
256 		return;
257 	}
258 
259 	/* Shaders can draw to any viewport. Make a union of all viewports. */
260 	max_vp_scissor = rctx->viewports.as_scissor[0];
261 	for (i = 1; i < R600_MAX_VIEWPORTS; i++)
262 		r600_scissor_make_union(&max_vp_scissor,
263 				      &rctx->viewports.as_scissor[i]);
264 
265 	while (mask) {
266 		int start, count, i;
267 
268 		u_bit_scan_consecutive_range(&mask, &start, &count);
269 
270 		radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL +
271 					       start * 4 * 2, count * 2);
272 		for (i = start; i < start+count; i++) {
273 			r600_emit_one_scissor(rctx, cs, &rctx->viewports.as_scissor[i],
274 					      scissor_enabled ? &states[i] : NULL);
275 		}
276 	}
277 	r600_emit_guardband(rctx, &max_vp_scissor);
278 	rctx->scissors.dirty_mask = 0;
279 }
280 
r600_set_viewport_states(struct pipe_context * ctx,unsigned start_slot,unsigned num_viewports,const struct pipe_viewport_state * state)281 static void r600_set_viewport_states(struct pipe_context *ctx,
282 				     unsigned start_slot,
283 				     unsigned num_viewports,
284 				     const struct pipe_viewport_state *state)
285 {
286 	struct r600_common_context *rctx = (struct r600_common_context *)ctx;
287 	unsigned mask;
288 	int i;
289 
290 	for (i = 0; i < num_viewports; i++) {
291 		unsigned index = start_slot + i;
292 
293 		rctx->viewports.states[index] = state[i];
294 		r600_get_scissor_from_viewport(rctx, &state[i],
295 					       &rctx->viewports.as_scissor[index]);
296 	}
297 
298 	mask = ((1 << num_viewports) - 1) << start_slot;
299 	rctx->viewports.dirty_mask |= mask;
300 	rctx->viewports.depth_range_dirty_mask |= mask;
301 	rctx->scissors.dirty_mask |= mask;
302 	rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
303 	rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
304 }
305 
r600_emit_one_viewport(struct r600_common_context * rctx,struct pipe_viewport_state * state)306 static void r600_emit_one_viewport(struct r600_common_context *rctx,
307 				   struct pipe_viewport_state *state)
308 {
309 	struct radeon_winsys_cs *cs = rctx->gfx.cs;
310 
311 	radeon_emit(cs, fui(state->scale[0]));
312 	radeon_emit(cs, fui(state->translate[0]));
313 	radeon_emit(cs, fui(state->scale[1]));
314 	radeon_emit(cs, fui(state->translate[1]));
315 	radeon_emit(cs, fui(state->scale[2]));
316 	radeon_emit(cs, fui(state->translate[2]));
317 }
318 
r600_emit_viewports(struct r600_common_context * rctx)319 static void r600_emit_viewports(struct r600_common_context *rctx)
320 {
321 	struct radeon_winsys_cs *cs = rctx->gfx.cs;
322 	struct pipe_viewport_state *states = rctx->viewports.states;
323 	unsigned mask = rctx->viewports.dirty_mask;
324 
325 	/* The simple case: Only 1 viewport is active. */
326 	if (!rctx->vs_writes_viewport_index) {
327 		if (!(mask & 1))
328 			return;
329 
330 		radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
331 		r600_emit_one_viewport(rctx, &states[0]);
332 		rctx->viewports.dirty_mask &= ~1; /* clear one bit */
333 		return;
334 	}
335 
336 	while (mask) {
337 		int start, count, i;
338 
339 		u_bit_scan_consecutive_range(&mask, &start, &count);
340 
341 		radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
342 					       start * 4 * 6, count * 6);
343 		for (i = start; i < start+count; i++)
344 			r600_emit_one_viewport(rctx, &states[i]);
345 	}
346 	rctx->viewports.dirty_mask = 0;
347 }
348 
r600_emit_depth_ranges(struct r600_common_context * rctx)349 static void r600_emit_depth_ranges(struct r600_common_context *rctx)
350 {
351 	struct radeon_winsys_cs *cs = rctx->gfx.cs;
352 	struct pipe_viewport_state *states = rctx->viewports.states;
353 	unsigned mask = rctx->viewports.depth_range_dirty_mask;
354 	float zmin, zmax;
355 
356 	/* The simple case: Only 1 viewport is active. */
357 	if (!rctx->vs_writes_viewport_index) {
358 		if (!(mask & 1))
359 			return;
360 
361 		util_viewport_zmin_zmax(&states[0], rctx->clip_halfz, &zmin, &zmax);
362 
363 		radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
364 		radeon_emit(cs, fui(zmin));
365 		radeon_emit(cs, fui(zmax));
366 		rctx->viewports.depth_range_dirty_mask &= ~1; /* clear one bit */
367 		return;
368 	}
369 
370 	while (mask) {
371 		int start, count, i;
372 
373 		u_bit_scan_consecutive_range(&mask, &start, &count);
374 
375 		radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 +
376 					   start * 4 * 2, count * 2);
377 		for (i = start; i < start+count; i++) {
378 			util_viewport_zmin_zmax(&states[i], rctx->clip_halfz, &zmin, &zmax);
379 			radeon_emit(cs, fui(zmin));
380 			radeon_emit(cs, fui(zmax));
381 		}
382 	}
383 	rctx->viewports.depth_range_dirty_mask = 0;
384 }
385 
r600_emit_viewport_states(struct r600_common_context * rctx,struct r600_atom * atom)386 static void r600_emit_viewport_states(struct r600_common_context *rctx,
387 				      struct r600_atom *atom)
388 {
389 	r600_emit_viewports(rctx);
390 	r600_emit_depth_ranges(rctx);
391 }
392 
393 /* Set viewport dependencies on pipe_rasterizer_state. */
r600_viewport_set_rast_deps(struct r600_common_context * rctx,bool scissor_enable,bool clip_halfz)394 void r600_viewport_set_rast_deps(struct r600_common_context *rctx,
395 				 bool scissor_enable, bool clip_halfz)
396 {
397 	if (rctx->scissor_enabled != scissor_enable) {
398 		rctx->scissor_enabled = scissor_enable;
399 		rctx->scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
400 		rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
401 	}
402 	if (rctx->clip_halfz != clip_halfz) {
403 		rctx->clip_halfz = clip_halfz;
404 		rctx->viewports.depth_range_dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
405 		rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
406 	}
407 }
408 
409 /**
410  * Normally, we only emit 1 viewport and 1 scissor if no shader is using
411  * the VIEWPORT_INDEX output, and emitting the other viewports and scissors
412  * is delayed. When a shader with VIEWPORT_INDEX appears, this should be
413  * called to emit the rest.
414  */
r600_update_vs_writes_viewport_index(struct r600_common_context * rctx,struct tgsi_shader_info * info)415 void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
416 					  struct tgsi_shader_info *info)
417 {
418 	bool vs_window_space;
419 
420 	if (!info)
421 		return;
422 
423 	/* When the VS disables clipping and viewport transformation. */
424 	vs_window_space =
425 		info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
426 
427 	if (rctx->vs_disables_clipping_viewport != vs_window_space) {
428 		rctx->vs_disables_clipping_viewport = vs_window_space;
429 		rctx->scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
430 		rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
431 	}
432 
433 	/* Viewport index handling. */
434 	rctx->vs_writes_viewport_index = info->writes_viewport_index;
435 	if (!rctx->vs_writes_viewport_index)
436 		return;
437 
438 	if (rctx->scissors.dirty_mask)
439 	    rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
440 
441 	if (rctx->viewports.dirty_mask ||
442 	    rctx->viewports.depth_range_dirty_mask)
443 	    rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
444 }
445 
r600_init_viewport_functions(struct r600_common_context * rctx)446 void r600_init_viewport_functions(struct r600_common_context *rctx)
447 {
448 	rctx->scissors.atom.emit = r600_emit_scissors;
449 	rctx->viewports.atom.emit = r600_emit_viewport_states;
450 
451 	rctx->scissors.atom.num_dw = (2 + 16 * 2) + 6;
452 	rctx->viewports.atom.num_dw = 2 + 16 * 6;
453 
454 	rctx->b.set_scissor_states = r600_set_scissor_states;
455 	rctx->b.set_viewport_states = r600_set_viewport_states;
456 }
457