1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors: Marek Olšák <maraeo@gmail.com>
24  *
25  */
26 
27 #include "r600_cs.h"
28 
29 /* 2xMSAA
30  * There are two locations (4, 4), (-4, -4). */
31 const uint32_t eg_sample_locs_2x[4] = {
32 	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
33 	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
34 	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
35 	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
36 };
37 const unsigned eg_max_dist_2x = 4;
38 /* 4xMSAA
39  * There are 4 locations: (-2, 6), (6, -2), (-6, 2), (2, 6). */
40 const uint32_t eg_sample_locs_4x[4] = {
41 	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
42 	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
43 	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
44 	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
45 };
46 const unsigned eg_max_dist_4x = 6;
47 
48 /* Cayman 8xMSAA */
49 static const uint32_t cm_sample_locs_8x[] = {
50 	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
51 	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
52 	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
53 	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
54 	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
55 	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
56 	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
57 	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
58 };
59 static const unsigned cm_max_dist_8x = 8;
60 /* Cayman 16xMSAA */
61 static const uint32_t cm_sample_locs_16x[] = {
62 	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
63 	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
64 	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
65 	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
66 	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
67 	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
68 	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
69 	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
70 	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
71 	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
72 	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
73 	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
74 	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
75 	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
76 	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
77 	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
78 };
79 static const unsigned cm_max_dist_16x = 8;
80 
cayman_get_sample_position(struct pipe_context * ctx,unsigned sample_count,unsigned sample_index,float * out_value)81 void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
82 				unsigned sample_index, float *out_value)
83 {
84 	int offset, index;
85 	struct {
86 		int idx:4;
87 	} val;
88 	switch (sample_count) {
89 	case 1:
90 	default:
91 		out_value[0] = out_value[1] = 0.5;
92 		break;
93 	case 2:
94 		offset = 4 * (sample_index * 2);
95 		val.idx = (eg_sample_locs_2x[0] >> offset) & 0xf;
96 		out_value[0] = (float)(val.idx + 8) / 16.0f;
97 		val.idx = (eg_sample_locs_2x[0] >> (offset + 4)) & 0xf;
98 		out_value[1] = (float)(val.idx + 8) / 16.0f;
99 		break;
100 	case 4:
101 		offset = 4 * (sample_index * 2);
102 		val.idx = (eg_sample_locs_4x[0] >> offset) & 0xf;
103 		out_value[0] = (float)(val.idx + 8) / 16.0f;
104 		val.idx = (eg_sample_locs_4x[0] >> (offset + 4)) & 0xf;
105 		out_value[1] = (float)(val.idx + 8) / 16.0f;
106 		break;
107 	case 8:
108 		offset = 4 * (sample_index % 4 * 2);
109 		index = (sample_index / 4) * 4;
110 		val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf;
111 		out_value[0] = (float)(val.idx + 8) / 16.0f;
112 		val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf;
113 		out_value[1] = (float)(val.idx + 8) / 16.0f;
114 		break;
115 	case 16:
116 		offset = 4 * (sample_index % 4 * 2);
117 		index = (sample_index / 4) * 4;
118 		val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf;
119 		out_value[0] = (float)(val.idx + 8) / 16.0f;
120 		val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf;
121 		out_value[1] = (float)(val.idx + 8) / 16.0f;
122 		break;
123 	}
124 }
125 
cayman_init_msaa(struct pipe_context * ctx)126 void cayman_init_msaa(struct pipe_context *ctx)
127 {
128 	struct r600_common_context *rctx = (struct r600_common_context*)ctx;
129 	int i;
130 
131 	cayman_get_sample_position(ctx, 1, 0, rctx->sample_locations_1x[0]);
132 
133 	for (i = 0; i < 2; i++)
134 		cayman_get_sample_position(ctx, 2, i, rctx->sample_locations_2x[i]);
135 	for (i = 0; i < 4; i++)
136 		cayman_get_sample_position(ctx, 4, i, rctx->sample_locations_4x[i]);
137 	for (i = 0; i < 8; i++)
138 		cayman_get_sample_position(ctx, 8, i, rctx->sample_locations_8x[i]);
139 	for (i = 0; i < 16; i++)
140 		cayman_get_sample_position(ctx, 16, i, rctx->sample_locations_16x[i]);
141 }
142 
cayman_emit_msaa_sample_locs(struct radeon_winsys_cs * cs,int nr_samples)143 void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples)
144 {
145 	switch (nr_samples) {
146 	default:
147 	case 1:
148 		radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0);
149 		radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0);
150 		radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0);
151 		radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0);
152 		break;
153 	case 2:
154 		radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
155 		radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
156 		radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]);
157 		radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]);
158 		break;
159 	case 4:
160 		radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]);
161 		radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]);
162 		radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]);
163 		radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]);
164 		break;
165 	case 8:
166 		radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
167 		radeon_emit(cs, cm_sample_locs_8x[0]);
168 		radeon_emit(cs, cm_sample_locs_8x[4]);
169 		radeon_emit(cs, 0);
170 		radeon_emit(cs, 0);
171 		radeon_emit(cs, cm_sample_locs_8x[1]);
172 		radeon_emit(cs, cm_sample_locs_8x[5]);
173 		radeon_emit(cs, 0);
174 		radeon_emit(cs, 0);
175 		radeon_emit(cs, cm_sample_locs_8x[2]);
176 		radeon_emit(cs, cm_sample_locs_8x[6]);
177 		radeon_emit(cs, 0);
178 		radeon_emit(cs, 0);
179 		radeon_emit(cs, cm_sample_locs_8x[3]);
180 		radeon_emit(cs, cm_sample_locs_8x[7]);
181 		break;
182 	case 16:
183 		radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
184 		radeon_emit(cs, cm_sample_locs_16x[0]);
185 		radeon_emit(cs, cm_sample_locs_16x[4]);
186 		radeon_emit(cs, cm_sample_locs_16x[8]);
187 		radeon_emit(cs, cm_sample_locs_16x[12]);
188 		radeon_emit(cs, cm_sample_locs_16x[1]);
189 		radeon_emit(cs, cm_sample_locs_16x[5]);
190 		radeon_emit(cs, cm_sample_locs_16x[9]);
191 		radeon_emit(cs, cm_sample_locs_16x[13]);
192 		radeon_emit(cs, cm_sample_locs_16x[2]);
193 		radeon_emit(cs, cm_sample_locs_16x[6]);
194 		radeon_emit(cs, cm_sample_locs_16x[10]);
195 		radeon_emit(cs, cm_sample_locs_16x[14]);
196 		radeon_emit(cs, cm_sample_locs_16x[3]);
197 		radeon_emit(cs, cm_sample_locs_16x[7]);
198 		radeon_emit(cs, cm_sample_locs_16x[11]);
199 		radeon_emit(cs, cm_sample_locs_16x[15]);
200 		break;
201 	}
202 }
203 
cayman_emit_msaa_config(struct radeon_winsys_cs * cs,int nr_samples,int ps_iter_samples,int overrast_samples,unsigned sc_mode_cntl_1)204 void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
205 			     int ps_iter_samples, int overrast_samples,
206 			     unsigned sc_mode_cntl_1)
207 {
208 	int setup_samples = nr_samples > 1 ? nr_samples :
209 			    overrast_samples > 1 ? overrast_samples : 0;
210 	/* Required by OpenGL line rasterization.
211 	 *
212 	 * TODO: We should also enable perpendicular endcaps for AA lines,
213 	 *       but that requires implementing line stippling in the pixel
214 	 *       shader. SC can only do line stippling with axis-aligned
215 	 *       endcaps.
216 	 */
217 	unsigned sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1);
218 
219 	if (setup_samples > 1) {
220 		/* indexed by log2(nr_samples) */
221 		unsigned max_dist[] = {
222 			0,
223 			eg_max_dist_2x,
224 			eg_max_dist_4x,
225 			cm_max_dist_8x,
226 			cm_max_dist_16x
227 		};
228 		unsigned log_samples = util_logbase2(setup_samples);
229 		unsigned log_ps_iter_samples =
230 			util_logbase2(util_next_power_of_two(ps_iter_samples));
231 
232 		radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
233 		radeon_emit(cs, sc_line_cntl |
234 			    S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
235 		radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
236 			    S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
237 			    S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */
238 
239 		if (nr_samples > 1) {
240 			radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
241 					       S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
242 					       S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
243 					       S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
244 					       S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
245 					       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
246 					       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
247 			radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
248 					       EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) |
249 					       sc_mode_cntl_1);
250 		} else if (overrast_samples > 1) {
251 			radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
252 					       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
253 					       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) |
254 					       S_028804_OVERRASTERIZATION_AMOUNT(log_samples));
255 			radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
256 					       sc_mode_cntl_1);
257 		}
258 	} else {
259 		radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
260 		radeon_emit(cs, sc_line_cntl); /* CM_R_028BDC_PA_SC_LINE_CNTL */
261 		radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
262 
263 		radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
264 				       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
265 				       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
266 		radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
267 				       sc_mode_cntl_1);
268 	}
269 }
270