1 /*
2  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #include "pipe/p_state.h"
28 #include "util/u_memory.h"
29 #include "util/u_inlines.h"
30 
31 #include "freedreno_query_hw.h"
32 #include "freedreno_context.h"
33 #include "freedreno_resource.h"
34 #include "freedreno_util.h"
35 
36 struct fd_hw_sample_period {
37 	struct fd_hw_sample *start, *end;
38 	struct list_head list;
39 };
40 
41 static struct fd_hw_sample *
get_sample(struct fd_batch * batch,struct fd_ringbuffer * ring,unsigned query_type)42 get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring,
43 		unsigned query_type)
44 {
45 	struct fd_context *ctx = batch->ctx;
46 	struct fd_hw_sample *samp = NULL;
47 	int idx = pidx(query_type);
48 
49 	assume(idx >= 0);   /* query never would have been created otherwise */
50 
51 	if (!batch->sample_cache[idx]) {
52 		struct fd_hw_sample *new_samp =
53 			ctx->hw_sample_providers[idx]->get_sample(batch, ring);
54 		fd_hw_sample_reference(ctx, &batch->sample_cache[idx], new_samp);
55 		util_dynarray_append(&batch->samples, struct fd_hw_sample *, new_samp);
56 		batch->needs_flush = true;
57 	}
58 
59 	fd_hw_sample_reference(ctx, &samp, batch->sample_cache[idx]);
60 
61 	return samp;
62 }
63 
64 static void
clear_sample_cache(struct fd_batch * batch)65 clear_sample_cache(struct fd_batch *batch)
66 {
67 	int i;
68 
69 	for (i = 0; i < ARRAY_SIZE(batch->sample_cache); i++)
70 		fd_hw_sample_reference(batch->ctx, &batch->sample_cache[i], NULL);
71 }
72 
73 static bool
is_active(struct fd_hw_query * hq,enum fd_render_stage stage)74 is_active(struct fd_hw_query *hq, enum fd_render_stage stage)
75 {
76 	return !!(hq->provider->active & stage);
77 }
78 
79 
80 static void
resume_query(struct fd_batch * batch,struct fd_hw_query * hq,struct fd_ringbuffer * ring)81 resume_query(struct fd_batch *batch, struct fd_hw_query *hq,
82 		struct fd_ringbuffer *ring)
83 {
84 	int idx = pidx(hq->provider->query_type);
85 	DBG("%p", hq);
86 	assert(idx >= 0);   /* query never would have been created otherwise */
87 	assert(!hq->period);
88 	batch->active_providers |= (1 << idx);
89 	hq->period = slab_alloc_st(&batch->ctx->sample_period_pool);
90 	list_inithead(&hq->period->list);
91 	hq->period->start = get_sample(batch, ring, hq->base.type);
92 	/* NOTE: slab_alloc_st() does not zero out the buffer: */
93 	hq->period->end = NULL;
94 }
95 
96 static void
pause_query(struct fd_batch * batch,struct fd_hw_query * hq,struct fd_ringbuffer * ring)97 pause_query(struct fd_batch *batch, struct fd_hw_query *hq,
98 		struct fd_ringbuffer *ring)
99 {
100 	int idx = pidx(hq->provider->query_type);
101 	DBG("%p", hq);
102 	assert(idx >= 0);   /* query never would have been created otherwise */
103 	assert(hq->period && !hq->period->end);
104 	assert(batch->active_providers & (1 << idx));
105 	hq->period->end = get_sample(batch, ring, hq->base.type);
106 	list_addtail(&hq->period->list, &hq->periods);
107 	hq->period = NULL;
108 }
109 
110 static void
destroy_periods(struct fd_context * ctx,struct fd_hw_query * hq)111 destroy_periods(struct fd_context *ctx, struct fd_hw_query *hq)
112 {
113 	struct fd_hw_sample_period *period, *s;
114 	LIST_FOR_EACH_ENTRY_SAFE(period, s, &hq->periods, list) {
115 		fd_hw_sample_reference(ctx, &period->start, NULL);
116 		fd_hw_sample_reference(ctx, &period->end, NULL);
117 		list_del(&period->list);
118 		slab_free_st(&ctx->sample_period_pool, period);
119 	}
120 }
121 
122 static void
fd_hw_destroy_query(struct fd_context * ctx,struct fd_query * q)123 fd_hw_destroy_query(struct fd_context *ctx, struct fd_query *q)
124 {
125 	struct fd_hw_query *hq = fd_hw_query(q);
126 
127 	DBG("%p", q);
128 
129 	destroy_periods(ctx, hq);
130 	list_del(&hq->list);
131 
132 	free(hq);
133 }
134 
135 static void
fd_hw_begin_query(struct fd_context * ctx,struct fd_query * q)136 fd_hw_begin_query(struct fd_context *ctx, struct fd_query *q)
137 {
138 	struct fd_batch *batch = fd_context_batch(ctx);
139 	struct fd_hw_query *hq = fd_hw_query(q);
140 
141 	DBG("%p", q);
142 
143 	/* begin_query() should clear previous results: */
144 	destroy_periods(ctx, hq);
145 
146 	if (batch && is_active(hq, batch->stage))
147 		resume_query(batch, hq, batch->draw);
148 
149 	/* add to active list: */
150 	assert(list_is_empty(&hq->list));
151 	list_addtail(&hq->list, &ctx->hw_active_queries);
152 }
153 
154 static void
fd_hw_end_query(struct fd_context * ctx,struct fd_query * q)155 fd_hw_end_query(struct fd_context *ctx, struct fd_query *q)
156 {
157 	struct fd_batch *batch = fd_context_batch(ctx);
158 	struct fd_hw_query *hq = fd_hw_query(q);
159 
160 	DBG("%p", q);
161 
162 	if (batch && is_active(hq, batch->stage))
163 		pause_query(batch, hq, batch->draw);
164 
165 	/* remove from active list: */
166 	list_delinit(&hq->list);
167 }
168 
169 /* helper to get ptr to specified sample: */
sampptr(struct fd_hw_sample * samp,uint32_t n,void * ptr)170 static void * sampptr(struct fd_hw_sample *samp, uint32_t n, void *ptr)
171 {
172 	return ((char *)ptr) + (samp->tile_stride * n) + samp->offset;
173 }
174 
175 static bool
fd_hw_get_query_result(struct fd_context * ctx,struct fd_query * q,bool wait,union pipe_query_result * result)176 fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q,
177 		bool wait, union pipe_query_result *result)
178 {
179 	struct fd_hw_query *hq = fd_hw_query(q);
180 	const struct fd_hw_sample_provider *p = hq->provider;
181 	struct fd_hw_sample_period *period;
182 
183 	DBG("%p: wait=%d", q, wait);
184 
185 	if (list_is_empty(&hq->periods))
186 		return true;
187 
188 	assert(list_is_empty(&hq->list));
189 	assert(!hq->period);
190 
191 	/* if !wait, then check the last sample (the one most likely to
192 	 * not be ready yet) and bail if it is not ready:
193 	 */
194 	if (!wait) {
195 		int ret;
196 
197 		period = LIST_ENTRY(struct fd_hw_sample_period,
198 				hq->periods.prev, list);
199 
200 		struct fd_resource *rsc = fd_resource(period->end->prsc);
201 
202 		if (pending(rsc, false)) {
203 			/* piglit spec@arb_occlusion_query@occlusion_query_conform
204 			 * test, and silly apps perhaps, get stuck in a loop trying
205 			 * to get  query result forever with wait==false..  we don't
206 			 * wait to flush unnecessarily but we also don't want to
207 			 * spin forever:
208 			 */
209 			if (hq->no_wait_cnt++ > 5)
210 				fd_batch_flush(rsc->write_batch);
211 			return false;
212 		}
213 
214 		if (!rsc->bo)
215 			return false;
216 
217 		ret = fd_bo_cpu_prep(rsc->bo, ctx->pipe,
218 				DRM_FREEDRENO_PREP_READ | DRM_FREEDRENO_PREP_NOSYNC);
219 		if (ret)
220 			return false;
221 
222 		fd_bo_cpu_fini(rsc->bo);
223 	}
224 
225 	/* sum the result across all sample periods: */
226 	LIST_FOR_EACH_ENTRY(period, &hq->periods, list) {
227 		struct fd_hw_sample *start = period->start;
228 		struct fd_hw_sample *end = period->end;
229 		unsigned i;
230 
231 		/* start and end samples should be from same batch: */
232 		assert(start->prsc == end->prsc);
233 		assert(start->num_tiles == end->num_tiles);
234 
235 		struct fd_resource *rsc = fd_resource(start->prsc);
236 
237 		if (rsc->write_batch)
238 			fd_batch_flush(rsc->write_batch);
239 
240 		/* some piglit tests at least do query with no draws, I guess: */
241 		if (!rsc->bo)
242 			continue;
243 
244 		fd_bo_cpu_prep(rsc->bo, ctx->pipe, DRM_FREEDRENO_PREP_READ);
245 
246 		void *ptr = fd_bo_map(rsc->bo);
247 
248 		for (i = 0; i < start->num_tiles; i++) {
249 			p->accumulate_result(ctx, sampptr(period->start, i, ptr),
250 					sampptr(period->end, i, ptr), result);
251 		}
252 
253 		fd_bo_cpu_fini(rsc->bo);
254 	}
255 
256 	return true;
257 }
258 
259 static const struct fd_query_funcs hw_query_funcs = {
260 		.destroy_query    = fd_hw_destroy_query,
261 		.begin_query      = fd_hw_begin_query,
262 		.end_query        = fd_hw_end_query,
263 		.get_query_result = fd_hw_get_query_result,
264 };
265 
266 struct fd_query *
fd_hw_create_query(struct fd_context * ctx,unsigned query_type,unsigned index)267 fd_hw_create_query(struct fd_context *ctx, unsigned query_type, unsigned index)
268 {
269 	struct fd_hw_query *hq;
270 	struct fd_query *q;
271 	int idx = pidx(query_type);
272 
273 	if ((idx < 0) || !ctx->hw_sample_providers[idx])
274 		return NULL;
275 
276 	hq = CALLOC_STRUCT(fd_hw_query);
277 	if (!hq)
278 		return NULL;
279 
280 	DBG("%p: query_type=%u", hq, query_type);
281 
282 	hq->provider = ctx->hw_sample_providers[idx];
283 
284 	list_inithead(&hq->periods);
285 	list_inithead(&hq->list);
286 
287 	q = &hq->base;
288 	q->funcs = &hw_query_funcs;
289 	q->type = query_type;
290 	q->index = index;
291 
292 	return q;
293 }
294 
295 struct fd_hw_sample *
fd_hw_sample_init(struct fd_batch * batch,uint32_t size)296 fd_hw_sample_init(struct fd_batch *batch, uint32_t size)
297 {
298 	struct fd_hw_sample *samp = slab_alloc_st(&batch->ctx->sample_pool);
299 	pipe_reference_init(&samp->reference, 1);
300 	samp->size = size;
301 	debug_assert(util_is_power_of_two_or_zero(size));
302 	batch->next_sample_offset = align(batch->next_sample_offset, size);
303 	samp->offset = batch->next_sample_offset;
304 	/* NOTE: slab_alloc_st() does not zero out the buffer: */
305 	samp->prsc = NULL;
306 	samp->num_tiles = 0;
307 	samp->tile_stride = 0;
308 	batch->next_sample_offset += size;
309 
310 	if (!batch->query_buf) {
311 		struct pipe_screen *pscreen = &batch->ctx->screen->base;
312 		struct pipe_resource templ = {
313 			.target  = PIPE_BUFFER,
314 			.format  = PIPE_FORMAT_R8_UNORM,
315 			.bind    = PIPE_BIND_QUERY_BUFFER,
316 			.width0  = 0,    /* create initially zero size buffer */
317 			.height0 = 1,
318 			.depth0  = 1,
319 			.array_size = 1,
320 			.last_level = 0,
321 			.nr_samples = 1,
322 		};
323 		batch->query_buf = pscreen->resource_create(pscreen, &templ);
324 	}
325 
326 	pipe_resource_reference(&samp->prsc, batch->query_buf);
327 
328 	return samp;
329 }
330 
331 void
__fd_hw_sample_destroy(struct fd_context * ctx,struct fd_hw_sample * samp)332 __fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp)
333 {
334 	pipe_resource_reference(&samp->prsc, NULL);
335 	slab_free_st(&ctx->sample_pool, samp);
336 }
337 
338 /* called from gmem code once total storage requirements are known (ie.
339  * number of samples times number of tiles)
340  */
341 void
fd_hw_query_prepare(struct fd_batch * batch,uint32_t num_tiles)342 fd_hw_query_prepare(struct fd_batch *batch, uint32_t num_tiles)
343 {
344 	uint32_t tile_stride = batch->next_sample_offset;
345 
346 	if (tile_stride > 0)
347 		fd_resource_resize(batch->query_buf, tile_stride * num_tiles);
348 
349 	batch->query_tile_stride = tile_stride;
350 
351 	while (batch->samples.size > 0) {
352 		struct fd_hw_sample *samp =
353 			util_dynarray_pop(&batch->samples, struct fd_hw_sample *);
354 		samp->num_tiles = num_tiles;
355 		samp->tile_stride = tile_stride;
356 		fd_hw_sample_reference(batch->ctx, &samp, NULL);
357 	}
358 
359 	/* reset things for next batch: */
360 	batch->next_sample_offset = 0;
361 }
362 
363 void
fd_hw_query_prepare_tile(struct fd_batch * batch,uint32_t n,struct fd_ringbuffer * ring)364 fd_hw_query_prepare_tile(struct fd_batch *batch, uint32_t n,
365 		struct fd_ringbuffer *ring)
366 {
367 	uint32_t tile_stride = batch->query_tile_stride;
368 	uint32_t offset = tile_stride * n;
369 
370 	/* bail if no queries: */
371 	if (tile_stride == 0)
372 		return;
373 
374 	fd_wfi(batch, ring);
375 	OUT_PKT0 (ring, HW_QUERY_BASE_REG, 1);
376 	OUT_RELOC(ring, fd_resource(batch->query_buf)->bo, offset, 0, 0);
377 }
378 
379 void
fd_hw_query_set_stage(struct fd_batch * batch,enum fd_render_stage stage)380 fd_hw_query_set_stage(struct fd_batch *batch, enum fd_render_stage stage)
381 {
382 	/* special case: internal blits (like mipmap level generation)
383 	 * go through normal draw path (via util_blitter_blit()).. but
384 	 * we need to ignore the FD_STAGE_DRAW which will be set, so we
385 	 * don't enable queries which should be paused during internal
386 	 * blits:
387 	 */
388 	if (batch->stage == FD_STAGE_BLIT && stage != FD_STAGE_NULL)
389 		stage = FD_STAGE_BLIT;
390 
391 	if (stage != batch->stage) {
392 		struct fd_hw_query *hq;
393 		LIST_FOR_EACH_ENTRY(hq, &batch->ctx->hw_active_queries, list) {
394 			bool was_active = is_active(hq, batch->stage);
395 			bool now_active = is_active(hq, stage);
396 
397 			if (now_active && !was_active)
398 				resume_query(batch, hq, batch->draw);
399 			else if (was_active && !now_active)
400 				pause_query(batch, hq, batch->draw);
401 		}
402 	}
403 	clear_sample_cache(batch);
404 }
405 
406 /* call the provider->enable() for all the hw queries that were active
407  * in the current batch.  This sets up perfctr selector regs statically
408  * for the duration of the batch.
409  */
410 void
fd_hw_query_enable(struct fd_batch * batch,struct fd_ringbuffer * ring)411 fd_hw_query_enable(struct fd_batch *batch, struct fd_ringbuffer *ring)
412 {
413 	struct fd_context *ctx = batch->ctx;
414 	for (int idx = 0; idx < MAX_HW_SAMPLE_PROVIDERS; idx++) {
415 		if (batch->active_providers & (1 << idx)) {
416 			assert(ctx->hw_sample_providers[idx]);
417 			if (ctx->hw_sample_providers[idx]->enable)
418 				ctx->hw_sample_providers[idx]->enable(ctx, ring);
419 		}
420 	}
421 	batch->active_providers = 0;  /* clear it for next frame */
422 }
423 
424 void
fd_hw_query_register_provider(struct pipe_context * pctx,const struct fd_hw_sample_provider * provider)425 fd_hw_query_register_provider(struct pipe_context *pctx,
426 		const struct fd_hw_sample_provider *provider)
427 {
428 	struct fd_context *ctx = fd_context(pctx);
429 	int idx = pidx(provider->query_type);
430 
431 	assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS));
432 	assert(!ctx->hw_sample_providers[idx]);
433 
434 	ctx->hw_sample_providers[idx] = provider;
435 }
436 
437 void
fd_hw_query_init(struct pipe_context * pctx)438 fd_hw_query_init(struct pipe_context *pctx)
439 {
440 	struct fd_context *ctx = fd_context(pctx);
441 
442 	slab_create(&ctx->sample_pool, sizeof(struct fd_hw_sample),
443 			16);
444 	slab_create(&ctx->sample_period_pool, sizeof(struct fd_hw_sample_period),
445 			16);
446 }
447 
448 void
fd_hw_query_fini(struct pipe_context * pctx)449 fd_hw_query_fini(struct pipe_context *pctx)
450 {
451 	struct fd_context *ctx = fd_context(pctx);
452 
453 	slab_destroy(&ctx->sample_pool);
454 	slab_destroy(&ctx->sample_period_pool);
455 }
456