1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2 
3 /*
4  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * Authors:
26  *    Rob Clark <robclark@freedesktop.org>
27  */
28 
29 #include "pipe/p_state.h"
30 #include "util/u_memory.h"
31 #include "util/u_inlines.h"
32 
33 #include "freedreno_query_hw.h"
34 #include "freedreno_context.h"
35 #include "freedreno_resource.h"
36 #include "freedreno_util.h"
37 
38 struct fd_hw_sample_period {
39 	struct fd_hw_sample *start, *end;
40 	struct list_head list;
41 };
42 
43 static struct fd_hw_sample *
get_sample(struct fd_batch * batch,struct fd_ringbuffer * ring,unsigned query_type)44 get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring,
45 		unsigned query_type)
46 {
47 	struct fd_context *ctx = batch->ctx;
48 	struct fd_hw_sample *samp = NULL;
49 	int idx = pidx(query_type);
50 
51 	assume(idx >= 0);   /* query never would have been created otherwise */
52 
53 	if (!batch->sample_cache[idx]) {
54 		struct fd_hw_sample *new_samp =
55 			ctx->hw_sample_providers[idx]->get_sample(batch, ring);
56 		fd_hw_sample_reference(ctx, &batch->sample_cache[idx], new_samp);
57 		util_dynarray_append(&batch->samples, struct fd_hw_sample *, new_samp);
58 		batch->needs_flush = true;
59 	}
60 
61 	fd_hw_sample_reference(ctx, &samp, batch->sample_cache[idx]);
62 
63 	return samp;
64 }
65 
66 static void
clear_sample_cache(struct fd_batch * batch)67 clear_sample_cache(struct fd_batch *batch)
68 {
69 	int i;
70 
71 	for (i = 0; i < ARRAY_SIZE(batch->sample_cache); i++)
72 		fd_hw_sample_reference(batch->ctx, &batch->sample_cache[i], NULL);
73 }
74 
75 static bool
is_active(struct fd_hw_query * hq,enum fd_render_stage stage)76 is_active(struct fd_hw_query *hq, enum fd_render_stage stage)
77 {
78 	return !!(hq->provider->active & stage);
79 }
80 
81 
82 static void
resume_query(struct fd_batch * batch,struct fd_hw_query * hq,struct fd_ringbuffer * ring)83 resume_query(struct fd_batch *batch, struct fd_hw_query *hq,
84 		struct fd_ringbuffer *ring)
85 {
86 	int idx = pidx(hq->provider->query_type);
87 	DBG("%p", hq);
88 	assert(idx >= 0);   /* query never would have been created otherwise */
89 	assert(!hq->period);
90 	batch->active_providers |= (1 << idx);
91 	hq->period = slab_alloc_st(&batch->ctx->sample_period_pool);
92 	list_inithead(&hq->period->list);
93 	hq->period->start = get_sample(batch, ring, hq->base.type);
94 	/* NOTE: slab_alloc_st() does not zero out the buffer: */
95 	hq->period->end = NULL;
96 }
97 
98 static void
pause_query(struct fd_batch * batch,struct fd_hw_query * hq,struct fd_ringbuffer * ring)99 pause_query(struct fd_batch *batch, struct fd_hw_query *hq,
100 		struct fd_ringbuffer *ring)
101 {
102 	int idx = pidx(hq->provider->query_type);
103 	DBG("%p", hq);
104 	assert(idx >= 0);   /* query never would have been created otherwise */
105 	assert(hq->period && !hq->period->end);
106 	assert(batch->active_providers & (1 << idx));
107 	hq->period->end = get_sample(batch, ring, hq->base.type);
108 	list_addtail(&hq->period->list, &hq->periods);
109 	hq->period = NULL;
110 }
111 
112 static void
destroy_periods(struct fd_context * ctx,struct fd_hw_query * hq)113 destroy_periods(struct fd_context *ctx, struct fd_hw_query *hq)
114 {
115 	struct fd_hw_sample_period *period, *s;
116 	LIST_FOR_EACH_ENTRY_SAFE(period, s, &hq->periods, list) {
117 		fd_hw_sample_reference(ctx, &period->start, NULL);
118 		fd_hw_sample_reference(ctx, &period->end, NULL);
119 		list_del(&period->list);
120 		slab_free_st(&ctx->sample_period_pool, period);
121 	}
122 }
123 
124 static void
fd_hw_destroy_query(struct fd_context * ctx,struct fd_query * q)125 fd_hw_destroy_query(struct fd_context *ctx, struct fd_query *q)
126 {
127 	struct fd_hw_query *hq = fd_hw_query(q);
128 
129 	DBG("%p: active=%d", q, q->active);
130 
131 	destroy_periods(ctx, hq);
132 	list_del(&hq->list);
133 
134 	free(hq);
135 }
136 
137 static boolean
fd_hw_begin_query(struct fd_context * ctx,struct fd_query * q)138 fd_hw_begin_query(struct fd_context *ctx, struct fd_query *q)
139 {
140 	struct fd_batch *batch = ctx->batch;
141 	struct fd_hw_query *hq = fd_hw_query(q);
142 
143 	DBG("%p: active=%d", q, q->active);
144 
145 	/* begin_query() should clear previous results: */
146 	destroy_periods(ctx, hq);
147 
148 	if (batch && is_active(hq, batch->stage))
149 		resume_query(batch, hq, batch->draw);
150 
151 	/* add to active list: */
152 	assert(list_empty(&hq->list));
153 	list_addtail(&hq->list, &ctx->hw_active_queries);
154 
155 	return true;
156 }
157 
158 static void
fd_hw_end_query(struct fd_context * ctx,struct fd_query * q)159 fd_hw_end_query(struct fd_context *ctx, struct fd_query *q)
160 {
161 	struct fd_batch *batch = ctx->batch;
162 	struct fd_hw_query *hq = fd_hw_query(q);
163 
164 	DBG("%p: active=%d", q, q->active);
165 
166 	if (batch && is_active(hq, batch->stage))
167 		pause_query(batch, hq, batch->draw);
168 
169 	/* remove from active list: */
170 	list_delinit(&hq->list);
171 }
172 
173 /* helper to get ptr to specified sample: */
sampptr(struct fd_hw_sample * samp,uint32_t n,void * ptr)174 static void * sampptr(struct fd_hw_sample *samp, uint32_t n, void *ptr)
175 {
176 	return ((char *)ptr) + (samp->tile_stride * n) + samp->offset;
177 }
178 
179 static boolean
fd_hw_get_query_result(struct fd_context * ctx,struct fd_query * q,boolean wait,union pipe_query_result * result)180 fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q,
181 		boolean wait, union pipe_query_result *result)
182 {
183 	struct fd_hw_query *hq = fd_hw_query(q);
184 	const struct fd_hw_sample_provider *p = hq->provider;
185 	struct fd_hw_sample_period *period;
186 
187 	DBG("%p: wait=%d, active=%d", q, wait, q->active);
188 
189 	if (LIST_IS_EMPTY(&hq->periods))
190 		return true;
191 
192 	assert(LIST_IS_EMPTY(&hq->list));
193 	assert(!hq->period);
194 
195 	/* if !wait, then check the last sample (the one most likely to
196 	 * not be ready yet) and bail if it is not ready:
197 	 */
198 	if (!wait) {
199 		int ret;
200 
201 		period = LIST_ENTRY(struct fd_hw_sample_period,
202 				hq->periods.prev, list);
203 
204 		struct fd_resource *rsc = fd_resource(period->end->prsc);
205 
206 		if (pending(rsc, false)) {
207 			/* piglit spec@arb_occlusion_query@occlusion_query_conform
208 			 * test, and silly apps perhaps, get stuck in a loop trying
209 			 * to get  query result forever with wait==false..  we don't
210 			 * wait to flush unnecessarily but we also don't want to
211 			 * spin forever:
212 			 */
213 			if (hq->no_wait_cnt++ > 5)
214 				fd_batch_flush(rsc->write_batch, false, false);
215 			return false;
216 		}
217 
218 		if (!rsc->bo)
219 			return false;
220 
221 		ret = fd_bo_cpu_prep(rsc->bo, ctx->pipe,
222 				DRM_FREEDRENO_PREP_READ | DRM_FREEDRENO_PREP_NOSYNC);
223 		if (ret)
224 			return false;
225 
226 		fd_bo_cpu_fini(rsc->bo);
227 	}
228 
229 	/* sum the result across all sample periods: */
230 	LIST_FOR_EACH_ENTRY(period, &hq->periods, list) {
231 		struct fd_hw_sample *start = period->start;
232 		struct fd_hw_sample *end = period->end;
233 		unsigned i;
234 
235 		/* start and end samples should be from same batch: */
236 		assert(start->prsc == end->prsc);
237 		assert(start->num_tiles == end->num_tiles);
238 
239 		struct fd_resource *rsc = fd_resource(start->prsc);
240 
241 		if (rsc->write_batch)
242 			fd_batch_flush(rsc->write_batch, true, false);
243 
244 		/* some piglit tests at least do query with no draws, I guess: */
245 		if (!rsc->bo)
246 			continue;
247 
248 		fd_bo_cpu_prep(rsc->bo, ctx->pipe, DRM_FREEDRENO_PREP_READ);
249 
250 		void *ptr = fd_bo_map(rsc->bo);
251 
252 		for (i = 0; i < start->num_tiles; i++) {
253 			p->accumulate_result(ctx, sampptr(period->start, i, ptr),
254 					sampptr(period->end, i, ptr), result);
255 		}
256 
257 		fd_bo_cpu_fini(rsc->bo);
258 	}
259 
260 	return true;
261 }
262 
263 static const struct fd_query_funcs hw_query_funcs = {
264 		.destroy_query    = fd_hw_destroy_query,
265 		.begin_query      = fd_hw_begin_query,
266 		.end_query        = fd_hw_end_query,
267 		.get_query_result = fd_hw_get_query_result,
268 };
269 
270 struct fd_query *
fd_hw_create_query(struct fd_context * ctx,unsigned query_type)271 fd_hw_create_query(struct fd_context *ctx, unsigned query_type)
272 {
273 	struct fd_hw_query *hq;
274 	struct fd_query *q;
275 	int idx = pidx(query_type);
276 
277 	if ((idx < 0) || !ctx->hw_sample_providers[idx])
278 		return NULL;
279 
280 	hq = CALLOC_STRUCT(fd_hw_query);
281 	if (!hq)
282 		return NULL;
283 
284 	DBG("%p: query_type=%u", hq, query_type);
285 
286 	hq->provider = ctx->hw_sample_providers[idx];
287 
288 	list_inithead(&hq->periods);
289 	list_inithead(&hq->list);
290 
291 	q = &hq->base;
292 	q->funcs = &hw_query_funcs;
293 	q->type = query_type;
294 
295 	return q;
296 }
297 
298 struct fd_hw_sample *
fd_hw_sample_init(struct fd_batch * batch,uint32_t size)299 fd_hw_sample_init(struct fd_batch *batch, uint32_t size)
300 {
301 	struct fd_hw_sample *samp = slab_alloc_st(&batch->ctx->sample_pool);
302 	pipe_reference_init(&samp->reference, 1);
303 	samp->size = size;
304 	debug_assert(util_is_power_of_two(size));
305 	batch->next_sample_offset = align(batch->next_sample_offset, size);
306 	samp->offset = batch->next_sample_offset;
307 	/* NOTE: slab_alloc_st() does not zero out the buffer: */
308 	samp->prsc = NULL;
309 	samp->num_tiles = 0;
310 	samp->tile_stride = 0;
311 	batch->next_sample_offset += size;
312 
313 	if (!batch->query_buf) {
314 		struct pipe_screen *pscreen = &batch->ctx->screen->base;
315 		struct pipe_resource templ = {
316 			.target  = PIPE_BUFFER,
317 			.format  = PIPE_FORMAT_R8_UNORM,
318 			.bind    = PIPE_BIND_QUERY_BUFFER,
319 			.width0  = 0,    /* create initially zero size buffer */
320 			.height0 = 1,
321 			.depth0  = 1,
322 			.array_size = 1,
323 			.last_level = 0,
324 			.nr_samples = 1,
325 		};
326 		batch->query_buf = pscreen->resource_create(pscreen, &templ);
327 	}
328 
329 	pipe_resource_reference(&samp->prsc, batch->query_buf);
330 
331 	return samp;
332 }
333 
334 void
__fd_hw_sample_destroy(struct fd_context * ctx,struct fd_hw_sample * samp)335 __fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp)
336 {
337 	pipe_resource_reference(&samp->prsc, NULL);
338 	slab_free_st(&ctx->sample_pool, samp);
339 }
340 
341 /* called from gmem code once total storage requirements are known (ie.
342  * number of samples times number of tiles)
343  */
344 void
fd_hw_query_prepare(struct fd_batch * batch,uint32_t num_tiles)345 fd_hw_query_prepare(struct fd_batch *batch, uint32_t num_tiles)
346 {
347 	uint32_t tile_stride = batch->next_sample_offset;
348 
349 	if (tile_stride > 0)
350 		fd_resource_resize(batch->query_buf, tile_stride * num_tiles);
351 
352 	batch->query_tile_stride = tile_stride;
353 
354 	while (batch->samples.size > 0) {
355 		struct fd_hw_sample *samp =
356 			util_dynarray_pop(&batch->samples, struct fd_hw_sample *);
357 		samp->num_tiles = num_tiles;
358 		samp->tile_stride = tile_stride;
359 		fd_hw_sample_reference(batch->ctx, &samp, NULL);
360 	}
361 
362 	/* reset things for next batch: */
363 	batch->next_sample_offset = 0;
364 }
365 
366 void
fd_hw_query_prepare_tile(struct fd_batch * batch,uint32_t n,struct fd_ringbuffer * ring)367 fd_hw_query_prepare_tile(struct fd_batch *batch, uint32_t n,
368 		struct fd_ringbuffer *ring)
369 {
370 	uint32_t tile_stride = batch->query_tile_stride;
371 	uint32_t offset = tile_stride * n;
372 
373 	/* bail if no queries: */
374 	if (tile_stride == 0)
375 		return;
376 
377 	fd_wfi(batch, ring);
378 	OUT_PKT0 (ring, HW_QUERY_BASE_REG, 1);
379 	OUT_RELOCW(ring, fd_resource(batch->query_buf)->bo, offset, 0, 0);
380 }
381 
382 void
fd_hw_query_set_stage(struct fd_batch * batch,enum fd_render_stage stage)383 fd_hw_query_set_stage(struct fd_batch *batch, enum fd_render_stage stage)
384 {
385 	if (stage != batch->stage) {
386 		struct fd_hw_query *hq;
387 		LIST_FOR_EACH_ENTRY(hq, &batch->ctx->hw_active_queries, list) {
388 			bool was_active = is_active(hq, batch->stage);
389 			bool now_active = is_active(hq, stage);
390 
391 			if (now_active && !was_active)
392 				resume_query(batch, hq, batch->draw);
393 			else if (was_active && !now_active)
394 				pause_query(batch, hq, batch->draw);
395 		}
396 	}
397 	clear_sample_cache(batch);
398 }
399 
400 /* call the provider->enable() for all the hw queries that were active
401  * in the current batch.  This sets up perfctr selector regs statically
402  * for the duration of the batch.
403  */
404 void
fd_hw_query_enable(struct fd_batch * batch,struct fd_ringbuffer * ring)405 fd_hw_query_enable(struct fd_batch *batch, struct fd_ringbuffer *ring)
406 {
407 	struct fd_context *ctx = batch->ctx;
408 	for (int idx = 0; idx < MAX_HW_SAMPLE_PROVIDERS; idx++) {
409 		if (batch->active_providers & (1 << idx)) {
410 			assert(ctx->hw_sample_providers[idx]);
411 			if (ctx->hw_sample_providers[idx]->enable)
412 				ctx->hw_sample_providers[idx]->enable(ctx, ring);
413 		}
414 	}
415 	batch->active_providers = 0;  /* clear it for next frame */
416 }
417 
418 void
fd_hw_query_register_provider(struct pipe_context * pctx,const struct fd_hw_sample_provider * provider)419 fd_hw_query_register_provider(struct pipe_context *pctx,
420 		const struct fd_hw_sample_provider *provider)
421 {
422 	struct fd_context *ctx = fd_context(pctx);
423 	int idx = pidx(provider->query_type);
424 
425 	assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS));
426 	assert(!ctx->hw_sample_providers[idx]);
427 
428 	ctx->hw_sample_providers[idx] = provider;
429 }
430 
431 void
fd_hw_query_init(struct pipe_context * pctx)432 fd_hw_query_init(struct pipe_context *pctx)
433 {
434 	struct fd_context *ctx = fd_context(pctx);
435 
436 	slab_create(&ctx->sample_pool, sizeof(struct fd_hw_sample),
437 			16);
438 	slab_create(&ctx->sample_period_pool, sizeof(struct fd_hw_sample_period),
439 			16);
440 }
441 
442 void
fd_hw_query_fini(struct pipe_context * pctx)443 fd_hw_query_fini(struct pipe_context *pctx)
444 {
445 	struct fd_context *ctx = fd_context(pctx);
446 
447 	slab_destroy(&ctx->sample_pool);
448 	slab_destroy(&ctx->sample_period_pool);
449 }
450