1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Christian König <christian.koenig@amd.com>
25  */
26 
27 #include "radeonsi_pipe.h"
28 #include "si_state.h"
29 
30 /*
31  * Stream out
32  */
33 
34 #if 0
35 void si_context_streamout_begin(struct r600_context *ctx)
36 {
37 	struct radeon_winsys_cs *cs = ctx->cs;
38 	struct si_so_target **t = ctx->so_targets;
39 	unsigned *strides = ctx->vs_shader_so_strides;
40 	unsigned buffer_en, i;
41 
42 	buffer_en = (ctx->num_so_targets >= 1 && t[0] ? 1 : 0) |
43 		    (ctx->num_so_targets >= 2 && t[1] ? 2 : 0) |
44 		    (ctx->num_so_targets >= 3 && t[2] ? 4 : 0) |
45 		    (ctx->num_so_targets >= 4 && t[3] ? 8 : 0);
46 
47 	ctx->num_cs_dw_streamout_end =
48 		12 + /* flush_vgt_streamout */
49 		util_bitcount(buffer_en) * 8 +
50 		3;
51 
52 	si_need_cs_space(ctx,
53 			   12 + /* flush_vgt_streamout */
54 			   6 + /* enables */
55 			   util_bitcount(buffer_en & ctx->streamout_append_bitmask) * 8 +
56 			   util_bitcount(buffer_en & ~ctx->streamout_append_bitmask) * 6 +
57 			   ctx->num_cs_dw_streamout_end, TRUE);
58 
59 	if (ctx->chip_class >= CAYMAN) {
60 		evergreen_flush_vgt_streamout(ctx);
61 		evergreen_set_streamout_enable(ctx, buffer_en);
62 	}
63 
64 	for (i = 0; i < ctx->num_so_targets; i++) {
65 #if 0
66 		if (t[i]) {
67 			t[i]->stride = strides[i];
68 			t[i]->so_index = i;
69 
70 			cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 3, 0);
71 			cs->buf[cs->cdw++] = (R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 +
72 							16*i - SI_CONTEXT_REG_OFFSET) >> 2;
73 			cs->buf[cs->cdw++] = (t[i]->b.buffer_offset +
74 							t[i]->b.buffer_size) >> 2; /* BUFFER_SIZE (in DW) */
75 			cs->buf[cs->cdw++] = strides[i] >> 2;		   /* VTX_STRIDE (in DW) */
76 			cs->buf[cs->cdw++] = 0;			   /* BUFFER_BASE */
77 
78 			cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
79 			cs->buf[cs->cdw++] =
80 				si_context_bo_reloc(ctx, si_resource(t[i]->b.buffer),
81 						      RADEON_USAGE_WRITE);
82 
83 			if (ctx->streamout_append_bitmask & (1 << i)) {
84 				/* Append. */
85 				cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
86 				cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
87 							       STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM); /* control */
88 				cs->buf[cs->cdw++] = 0; /* unused */
89 				cs->buf[cs->cdw++] = 0; /* unused */
90 				cs->buf[cs->cdw++] = 0; /* src address lo */
91 				cs->buf[cs->cdw++] = 0; /* src address hi */
92 
93 				cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
94 				cs->buf[cs->cdw++] =
95 					si_context_bo_reloc(ctx,  t[i]->filled_size,
96 							      RADEON_USAGE_READ);
97 			} else {
98 				/* Start from the beginning. */
99 				cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
100 				cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
101 							       STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET); /* control */
102 				cs->buf[cs->cdw++] = 0; /* unused */
103 				cs->buf[cs->cdw++] = 0; /* unused */
104 				cs->buf[cs->cdw++] = t[i]->b.buffer_offset >> 2; /* buffer offset in DW */
105 				cs->buf[cs->cdw++] = 0; /* unused */
106 			}
107 		}
108 #endif
109 	}
110 }
111 
112 void si_context_streamout_end(struct r600_context *ctx)
113 {
114 	struct radeon_winsys_cs *cs = ctx->cs;
115 	struct si_so_target **t = ctx->so_targets;
116 	unsigned i, flush_flags = 0;
117 
118 	evergreen_flush_vgt_streamout(ctx);
119 
120 	for (i = 0; i < ctx->num_so_targets; i++) {
121 #if 0
122 		if (t[i]) {
123 			cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
124 			cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
125 						       STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
126 						       STRMOUT_STORE_BUFFER_FILLED_SIZE; /* control */
127 			cs->buf[cs->cdw++] = 0; /* dst address lo */
128 			cs->buf[cs->cdw++] = 0; /* dst address hi */
129 			cs->buf[cs->cdw++] = 0; /* unused */
130 			cs->buf[cs->cdw++] = 0; /* unused */
131 
132 			cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
133 			cs->buf[cs->cdw++] =
134 				si_context_bo_reloc(ctx,  t[i]->filled_size,
135 						      RADEON_USAGE_WRITE);
136 
137 			flush_flags |= S_0085F0_SO0_DEST_BASE_ENA(1) << i;
138 		}
139 #endif
140 	}
141 
142 	evergreen_set_streamout_enable(ctx, 0);
143 
144 	ctx->atom_surface_sync.flush_flags |= flush_flags;
145 	si_atom_dirty(ctx, &ctx->atom_surface_sync.atom);
146 
147 	ctx->num_cs_dw_streamout_end = 0;
148 
149 	/* XXX print some debug info */
150 	for (i = 0; i < ctx->num_so_targets; i++) {
151 		if (!t[i])
152 			continue;
153 
154 		uint32_t *ptr = ctx->ws->buffer_map(t[i]->filled_size->cs_buf, ctx->cs, RADEON_USAGE_READ);
155 		printf("FILLED_SIZE%i: %u\n", i, *ptr);
156 		ctx->ws->buffer_unmap(t[i]->filled_size->cs_buf);
157 	}
158 }
159 
160 void evergreen_flush_vgt_streamout(struct si_context *ctx)
161 {
162 	struct radeon_winsys_cs *cs = ctx->cs;
163 
164 	cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0);
165 	cs->buf[cs->cdw++] = (R_0084FC_CP_STRMOUT_CNTL - SI_CONFIG_REG_OFFSET) >> 2;
166 	cs->buf[cs->cdw++] = 0;
167 
168 	cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
169 	cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0);
170 
171 	cs->buf[cs->cdw++] = PKT3(PKT3_WAIT_REG_MEM, 5, 0);
172 	cs->buf[cs->cdw++] = WAIT_REG_MEM_EQUAL; /* wait until the register is equal to the reference value */
173 	cs->buf[cs->cdw++] = R_0084FC_CP_STRMOUT_CNTL >> 2;  /* register */
174 	cs->buf[cs->cdw++] = 0;
175 	cs->buf[cs->cdw++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* reference value */
176 	cs->buf[cs->cdw++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* mask */
177 	cs->buf[cs->cdw++] = 4; /* poll interval */
178 }
179 
180 void evergreen_set_streamout_enable(struct si_context *ctx, unsigned buffer_enable_bit)
181 {
182 	struct radeon_winsys_cs *cs = ctx->cs;
183 
184 	if (buffer_enable_bit) {
185 		cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
186 		cs->buf[cs->cdw++] = (R_028B94_VGT_STRMOUT_CONFIG - SI_CONTEXT_REG_OFFSET) >> 2;
187 		cs->buf[cs->cdw++] = S_028B94_STREAMOUT_0_EN(1);
188 
189 		cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
190 		cs->buf[cs->cdw++] = (R_028B98_VGT_STRMOUT_BUFFER_CONFIG - SI_CONTEXT_REG_OFFSET) >> 2;
191 		cs->buf[cs->cdw++] = S_028B98_STREAM_0_BUFFER_EN(buffer_enable_bit);
192 	} else {
193 		cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
194 		cs->buf[cs->cdw++] = (R_028B94_VGT_STRMOUT_CONFIG - SI_CONTEXT_REG_OFFSET) >> 2;
195 		cs->buf[cs->cdw++] = S_028B94_STREAMOUT_0_EN(0);
196 	}
197 }
198 
199 #endif
200 
201 struct pipe_stream_output_target *
si_create_so_target(struct pipe_context * ctx,struct pipe_resource * buffer,unsigned buffer_offset,unsigned buffer_size)202 si_create_so_target(struct pipe_context *ctx,
203 		    struct pipe_resource *buffer,
204 		    unsigned buffer_offset,
205 		    unsigned buffer_size)
206 {
207 #if 0
208 	struct si_context *rctx = (struct r600_context *)ctx;
209 	struct si_so_target *t;
210 	void *ptr;
211 
212 	t = CALLOC_STRUCT(si_so_target);
213 	if (!t) {
214 		return NULL;
215 	}
216 
217 	t->b.reference.count = 1;
218 	t->b.context = ctx;
219 	pipe_resource_reference(&t->b.buffer, buffer);
220 	t->b.buffer_offset = buffer_offset;
221 	t->b.buffer_size = buffer_size;
222 
223 	t->filled_size = si_resource_create_custom(ctx->screen, PIPE_USAGE_STATIC, 4);
224 	ptr = rctx->ws->buffer_map(t->filled_size->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
225 	memset(ptr, 0, t->filled_size->buf->size);
226 	rctx->ws->buffer_unmap(t->filled_size->cs_buf);
227 
228 	return &t->b;
229 #endif
230 	return NULL;
231 }
232 
si_so_target_destroy(struct pipe_context * ctx,struct pipe_stream_output_target * target)233 void si_so_target_destroy(struct pipe_context *ctx,
234 			  struct pipe_stream_output_target *target)
235 {
236 #if 0
237 	struct si_so_target *t = (struct r600_so_target*)target;
238 	pipe_resource_reference(&t->b.buffer, NULL);
239 	si_resource_reference(&t->filled_size, NULL);
240 	FREE(t);
241 #endif
242 }
243 
si_set_so_targets(struct pipe_context * ctx,unsigned num_targets,struct pipe_stream_output_target ** targets,unsigned append_bitmask)244 void si_set_so_targets(struct pipe_context *ctx,
245 		       unsigned num_targets,
246 		       struct pipe_stream_output_target **targets,
247 		       unsigned append_bitmask)
248 {
249 	assert(num_targets == 0);
250 #if 0
251 	struct si_context *rctx = (struct r600_context *)ctx;
252 	unsigned i;
253 
254 	/* Stop streamout. */
255 	if (rctx->num_so_targets) {
256 		si_context_streamout_end(rctx);
257 	}
258 
259 	/* Set the new targets. */
260 	for (i = 0; i < num_targets; i++) {
261 		pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], targets[i]);
262 	}
263 	for (; i < rctx->num_so_targets; i++) {
264 		pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], NULL);
265 	}
266 
267 	rctx->num_so_targets = num_targets;
268 	rctx->streamout_start = num_targets != 0;
269 	rctx->streamout_append_bitmask = append_bitmask;
270 #endif
271 }
272