1 /*
2  * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #ifndef FREEDRENO_BATCH_H_
28 #define FREEDRENO_BATCH_H_
29 
30 #include "util/u_inlines.h"
31 #include "util/u_queue.h"
32 #include "util/list.h"
33 
34 #include "freedreno_util.h"
35 
36 #ifdef DEBUG
37 #  define BATCH_DEBUG (fd_mesa_debug & FD_DBG_MSGS)
38 #else
39 #  define BATCH_DEBUG 0
40 #endif
41 
42 struct fd_context;
43 struct fd_resource;
44 enum fd_resource_status;
45 
46 /* Bitmask of stages in rendering that a particular query query is
47  * active.  Queries will be automatically started/stopped (generating
48  * additional fd_hw_sample_period's) on entrance/exit from stages that
49  * are applicable to the query.
50  *
51  * NOTE: set the stage to NULL at end of IB to ensure no query is still
52  * active.  Things aren't going to work out the way you want if a query
53  * is active across IB's (or between tile IB and draw IB)
54  */
55 enum fd_render_stage {
56 	FD_STAGE_NULL     = 0x00,
57 	FD_STAGE_DRAW     = 0x01,
58 	FD_STAGE_CLEAR    = 0x02,
59 	/* used for driver internal draws (ie. util_blitter_blit()): */
60 	FD_STAGE_BLIT     = 0x04,
61 	FD_STAGE_ALL      = 0xff,
62 };
63 
64 #define MAX_HW_SAMPLE_PROVIDERS 7
65 struct fd_hw_sample_provider;
66 struct fd_hw_sample;
67 
68 /* A batch tracks everything about a cmdstream batch/submit, including the
69  * ringbuffers used for binning, draw, and gmem cmds, list of associated
70  * fd_resource-s, etc.
71  */
72 struct fd_batch {
73 	struct pipe_reference reference;
74 	unsigned seqno;
75 	unsigned idx;       /* index into cache->batches[] */
76 
77 	int in_fence_fd;
78 	bool needs_out_fence_fd;
79 	struct pipe_fence_handle *fence;
80 
81 	struct fd_context *ctx;
82 
83 	/* do we need to mem2gmem before rendering.  We don't, if for example,
84 	 * there was a glClear() that invalidated the entire previous buffer
85 	 * contents.  Keep track of which buffer(s) are cleared, or needs
86 	 * restore.  Masks of PIPE_CLEAR_*
87 	 *
88 	 * The 'cleared' bits will be set for buffers which are *entirely*
89 	 * cleared, and 'partial_cleared' bits will be set if you must
90 	 * check cleared_scissor.
91 	 *
92 	 * The 'invalidated' bits are set for cleared buffers, and buffers
93 	 * where the contents are undefined, ie. what we don't need to restore
94 	 * to gmem.
95 	 */
96 	enum {
97 		/* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */
98 		FD_BUFFER_COLOR   = PIPE_CLEAR_COLOR,
99 		FD_BUFFER_DEPTH   = PIPE_CLEAR_DEPTH,
100 		FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL,
101 		FD_BUFFER_ALL     = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL,
102 	} invalidated, cleared, fast_cleared, restore, resolve;
103 
104 	/* is this a non-draw batch (ie compute/blit which has no pfb state)? */
105 	bool nondraw : 1;
106 	bool needs_flush : 1;
107 	bool flushed : 1;
108 	bool blit : 1;
109 	bool back_blit : 1;      /* only blit so far is resource shadowing back-blit */
110 	bool tessellation : 1;      /* tessellation used in batch */
111 
112 	/* Keep track if WAIT_FOR_IDLE is needed for registers we need
113 	 * to update via RMW:
114 	 */
115 	bool needs_wfi : 1;
116 
117 	/* To decide whether to render to system memory, keep track of the
118 	 * number of draws, and whether any of them require multisample,
119 	 * depth_test (or depth write), stencil_test, blending, and
120 	 * color_logic_Op (since those functions are disabled when by-
121 	 * passing GMEM.
122 	 */
123 	enum {
124 		FD_GMEM_CLEARS_DEPTH_STENCIL = 0x01,
125 		FD_GMEM_DEPTH_ENABLED        = 0x02,
126 		FD_GMEM_STENCIL_ENABLED      = 0x04,
127 
128 		FD_GMEM_BLEND_ENABLED        = 0x10,
129 		FD_GMEM_LOGICOP_ENABLED      = 0x20,
130 		FD_GMEM_FB_READ              = 0x40,
131 	} gmem_reason;
132 
133 	/* At submit time, once we've decided that this batch will use GMEM
134 	 * rendering, the appropriate gmem state is looked up:
135 	 */
136 	const struct fd_gmem_stateobj *gmem_state;
137 
138 	unsigned num_draws;      /* number of draws in current batch */
139 	unsigned num_vertices;   /* number of vertices in current batch */
140 
141 	/* Currently only used on a6xx, to calculate vsc prim/draw stream
142 	 * sizes:
143 	 */
144 	unsigned num_bins_per_pipe;
145 	unsigned prim_strm_bits;
146 	unsigned draw_strm_bits;
147 
148 	/* Track the maximal bounds of the scissor of all the draws within a
149 	 * batch.  Used at the tile rendering step (fd_gmem_render_tiles(),
150 	 * mem2gmem/gmem2mem) to avoid needlessly moving data in/out of gmem.
151 	 */
152 	struct pipe_scissor_state max_scissor;
153 
154 	/* Keep track of DRAW initiators that need to be patched up depending
155 	 * on whether we using binning or not:
156 	 */
157 	struct util_dynarray draw_patches;
158 
159 	/* texture state that needs patching for fb_read: */
160 	struct util_dynarray fb_read_patches;
161 
162 	/* Keep track of writes to RB_RENDER_CONTROL which need to be patched
163 	 * once we know whether or not to use GMEM, and GMEM tile pitch.
164 	 *
165 	 * (only for a3xx.. but having gen specific subclasses of fd_batch
166 	 * seemed overkill for now)
167 	 */
168 	struct util_dynarray rbrc_patches;
169 
170 	/* Keep track of GMEM related values that need to be patched up once we
171 	 * know the gmem layout:
172 	 */
173 	struct util_dynarray gmem_patches;
174 
175 	/* Keep track of pointer to start of MEM exports for a20x binning shaders
176 	 *
177 	 * this is so the end of the shader can be cut off at the right point
178 	 * depending on the GMEM configuration
179 	 */
180 	struct util_dynarray shader_patches;
181 
182 	struct pipe_framebuffer_state framebuffer;
183 
184 	struct fd_submit *submit;
185 
186 	/** draw pass cmdstream: */
187 	struct fd_ringbuffer *draw;
188 	/** binning pass cmdstream: */
189 	struct fd_ringbuffer *binning;
190 	/** tiling/gmem (IB0) cmdstream: */
191 	struct fd_ringbuffer *gmem;
192 
193 	/** preemble cmdstream (executed once before first tile): */
194 	struct fd_ringbuffer *prologue;
195 
196 	/** epilogue cmdstream (executed after each tile): */
197 	struct fd_ringbuffer *epilogue;
198 
199 	struct fd_ringbuffer *tile_setup;
200 	struct fd_ringbuffer *tile_fini;
201 
202 	union pipe_color_union clear_color[MAX_RENDER_TARGETS];
203 	double clear_depth;
204 	unsigned clear_stencil;
205 
206 	/**
207 	 * hw query related state:
208 	 */
209 	/*@{*/
210 	/* next sample offset.. incremented for each sample in the batch/
211 	 * submit, reset to zero on next submit.
212 	 */
213 	uint32_t next_sample_offset;
214 
215 	/* cached samples (in case multiple queries need to reference
216 	 * the same sample snapshot)
217 	 */
218 	struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS];
219 
220 	/* which sample providers were active in the current batch: */
221 	uint32_t active_providers;
222 
223 	/* tracking for current stage, to know when to start/stop
224 	 * any active queries:
225 	 */
226 	enum fd_render_stage stage;
227 
228 	/* list of samples in current batch: */
229 	struct util_dynarray samples;
230 
231 	/* current query result bo and tile stride: */
232 	struct pipe_resource *query_buf;
233 	uint32_t query_tile_stride;
234 	/*@}*/
235 
236 
237 	/* Set of resources used by currently-unsubmitted batch (read or
238 	 * write).. does not hold a reference to the resource.
239 	 */
240 	struct set *resources;
241 
242 	/** key in batch-cache (if not null): */
243 	const void *key;
244 	uint32_t hash;
245 
246 	/** set of dependent batches.. holds refs to dependent batches: */
247 	uint32_t dependents_mask;
248 
249 	/* Buffer for tessellation engine input
250 	 */
251 	struct fd_bo *tessfactor_bo;
252 	uint32_t tessfactor_size;
253 
254 	/* Buffer for passing parameters between TCS and TES
255 	 */
256 	struct fd_bo *tessparam_bo;
257 	uint32_t tessparam_size;
258 
259 	struct fd_ringbuffer *tess_addrs_constobj;
260 
261 	struct list_head log_chunks;  /* list of unflushed log chunks in fifo order */
262 };
263 
264 struct fd_batch * fd_batch_create(struct fd_context *ctx, bool nondraw);
265 
266 void fd_batch_reset(struct fd_batch *batch);
267 void fd_batch_flush(struct fd_batch *batch);
268 void fd_batch_add_dep(struct fd_batch *batch, struct fd_batch *dep);
269 void fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc);
270 void fd_batch_resource_read_slowpath(struct fd_batch *batch, struct fd_resource *rsc);
271 void fd_batch_check_size(struct fd_batch *batch);
272 
273 /* not called directly: */
274 void __fd_batch_describe(char* buf, const struct fd_batch *batch);
275 void __fd_batch_destroy(struct fd_batch *batch);
276 
277 /*
278  * NOTE the rule is, you need to hold the screen->lock when destroying
279  * a batch..  so either use fd_batch_reference() (which grabs the lock
280  * for you) if you don't hold the lock, or fd_batch_reference_locked()
281  * if you do hold the lock.
282  *
283  * WARNING the _locked() version can briefly drop the lock.  Without
284  * recursive mutexes, I'm not sure there is much else we can do (since
285  * __fd_batch_destroy() needs to unref resources)
286  *
287  * WARNING you must acquire the screen->lock and use the _locked()
288  * version in case that the batch being ref'd can disappear under
289  * you.
290  */
291 
292 /* fwd-decl prototypes to untangle header dependency :-/ */
293 static inline void fd_context_assert_locked(struct fd_context *ctx);
294 static inline void fd_context_lock(struct fd_context *ctx);
295 static inline void fd_context_unlock(struct fd_context *ctx);
296 
297 static inline void
fd_batch_reference_locked(struct fd_batch ** ptr,struct fd_batch * batch)298 fd_batch_reference_locked(struct fd_batch **ptr, struct fd_batch *batch)
299 {
300 	struct fd_batch *old_batch = *ptr;
301 
302 	/* only need lock if a reference is dropped: */
303 	if (old_batch)
304 		fd_context_assert_locked(old_batch->ctx);
305 
306 	if (pipe_reference_described(&(*ptr)->reference, &batch->reference,
307 			(debug_reference_descriptor)__fd_batch_describe))
308 		__fd_batch_destroy(old_batch);
309 
310 	*ptr = batch;
311 }
312 
313 static inline void
fd_batch_reference(struct fd_batch ** ptr,struct fd_batch * batch)314 fd_batch_reference(struct fd_batch **ptr, struct fd_batch *batch)
315 {
316 	struct fd_batch *old_batch = *ptr;
317 	struct fd_context *ctx = old_batch ? old_batch->ctx : NULL;
318 
319 	if (ctx)
320 		fd_context_lock(ctx);
321 
322 	fd_batch_reference_locked(ptr, batch);
323 
324 	if (ctx)
325 		fd_context_unlock(ctx);
326 }
327 
328 #include "freedreno_context.h"
329 
330 static inline void
fd_reset_wfi(struct fd_batch * batch)331 fd_reset_wfi(struct fd_batch *batch)
332 {
333 	batch->needs_wfi = true;
334 }
335 
336 void fd_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring);
337 
338 /* emit a CP_EVENT_WRITE:
339  */
340 static inline void
fd_event_write(struct fd_batch * batch,struct fd_ringbuffer * ring,enum vgt_event_type evt)341 fd_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring,
342 		enum vgt_event_type evt)
343 {
344 	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
345 	OUT_RING(ring, evt);
346 	fd_reset_wfi(batch);
347 }
348 
349 /* Get per-tile epilogue */
350 static inline struct fd_ringbuffer *
fd_batch_get_epilogue(struct fd_batch * batch)351 fd_batch_get_epilogue(struct fd_batch *batch)
352 {
353 	if (batch->epilogue == NULL)
354 		batch->epilogue = fd_submit_new_ringbuffer(batch->submit, 0x1000, 0);
355 
356 	return batch->epilogue;
357 }
358 
359 struct fd_ringbuffer * fd_batch_get_prologue(struct fd_batch *batch);
360 
361 #endif /* FREEDRENO_BATCH_H_ */
362