1 /*
2  * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #ifndef FD5_EMIT_H
28 #define FD5_EMIT_H
29 
30 #include "pipe/p_context.h"
31 
32 #include "freedreno_context.h"
33 #include "fd5_context.h"
34 #include "fd5_format.h"
35 #include "fd5_program.h"
36 #include "fd5_screen.h"
37 #include "ir3_gallium.h"
38 
39 struct fd_ringbuffer;
40 
41 /* grouped together emit-state for prog/vertex/state emit: */
42 struct fd5_emit {
43 	struct pipe_debug_callback *debug;
44 	const struct fd_vertex_state *vtx;
45 	const struct fd_program_stateobj *prog;
46 	const struct pipe_draw_info *info;
47 	bool binning_pass;
48 	struct ir3_shader_key key;
49 	enum fd_dirty_3d_state dirty;
50 
51 	uint32_t sprite_coord_enable;  /* bitmask */
52 	bool sprite_coord_mode;
53 	bool rasterflat;
54 	bool no_decode_srgb;
55 
56 	/* in binning pass, we don't have real frag shader, so we
57 	 * don't know if real draw disqualifies lrz write.  So just
58 	 * figure that out up-front and stash it in the emit.
59 	 */
60 	bool no_lrz_write;
61 
62 	/* cached to avoid repeated lookups of same variants: */
63 	const struct ir3_shader_variant *vs, *fs;
64 	/* TODO: other shader stages.. */
65 
66 	unsigned streamout_mask;
67 };
68 
fd5_emit_format(struct pipe_surface * surf)69 static inline enum a5xx_color_fmt fd5_emit_format(struct pipe_surface *surf)
70 {
71 	if (!surf)
72 		return 0;
73 	return fd5_pipe2color(surf->format);
74 }
75 
76 static inline const struct ir3_shader_variant *
fd5_emit_get_vp(struct fd5_emit * emit)77 fd5_emit_get_vp(struct fd5_emit *emit)
78 {
79 	if (!emit->vs) {
80 		struct ir3_shader *shader = emit->prog->vs;
81 		emit->vs = ir3_shader_variant(shader, emit->key,
82 				emit->binning_pass, emit->debug);
83 	}
84 	return emit->vs;
85 }
86 
87 static inline const struct ir3_shader_variant *
fd5_emit_get_fp(struct fd5_emit * emit)88 fd5_emit_get_fp(struct fd5_emit *emit)
89 {
90 	if (!emit->fs) {
91 		if (emit->binning_pass) {
92 			/* use dummy stateobj to simplify binning vs non-binning: */
93 			static const struct ir3_shader_variant binning_fs = {};
94 			emit->fs = &binning_fs;
95 		} else {
96 			struct ir3_shader *shader = emit->prog->fs;
97 			emit->fs = ir3_shader_variant(shader, emit->key,
98 					false, emit->debug);
99 		}
100 	}
101 	return emit->fs;
102 }
103 
104 static inline void
fd5_cache_flush(struct fd_batch * batch,struct fd_ringbuffer * ring)105 fd5_cache_flush(struct fd_batch *batch, struct fd_ringbuffer *ring)
106 {
107 	fd_reset_wfi(batch);
108 	OUT_PKT4(ring, REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_LO, 5);
109 	OUT_RING(ring, 0x00000000);   /* UCHE_CACHE_INVALIDATE_MIN_LO */
110 	OUT_RING(ring, 0x00000000);   /* UCHE_CACHE_INVALIDATE_MIN_HI */
111 	OUT_RING(ring, 0x00000000);   /* UCHE_CACHE_INVALIDATE_MAX_LO */
112 	OUT_RING(ring, 0x00000000);   /* UCHE_CACHE_INVALIDATE_MAX_HI */
113 	OUT_RING(ring, 0x00000012);   /* UCHE_CACHE_INVALIDATE */
114 	fd_wfi(batch, ring);
115 }
116 
117 static inline void
fd5_set_render_mode(struct fd_context * ctx,struct fd_ringbuffer * ring,enum render_mode_cmd mode)118 fd5_set_render_mode(struct fd_context *ctx, struct fd_ringbuffer *ring,
119 		enum render_mode_cmd mode)
120 {
121 	/* TODO add preemption support, gmem bypass, etc */
122 	emit_marker5(ring, 7);
123 	OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
124 	OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(mode));
125 	OUT_RING(ring, 0x00000000);   /* ADDR_LO */
126 	OUT_RING(ring, 0x00000000);   /* ADDR_HI */
127 	OUT_RING(ring, COND(mode == GMEM, CP_SET_RENDER_MODE_3_GMEM_ENABLE) |
128 			COND(mode == BINNING, CP_SET_RENDER_MODE_3_VSC_ENABLE));
129 	OUT_RING(ring, 0x00000000);
130 	emit_marker5(ring, 7);
131 }
132 
133 static inline void
fd5_emit_blit(struct fd_context * ctx,struct fd_ringbuffer * ring)134 fd5_emit_blit(struct fd_context *ctx, struct fd_ringbuffer *ring)
135 {
136 	struct fd5_context *fd5_ctx = fd5_context(ctx);
137 
138 	emit_marker5(ring, 7);
139 
140 	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
141 	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(BLIT));
142 	OUT_RELOC(ring, fd5_ctx->blit_mem, 0, 0, 0);  /* ADDR_LO/HI */
143 	OUT_RING(ring, 0x00000000);
144 
145 	emit_marker5(ring, 7);
146 }
147 
148 static inline void
fd5_emit_render_cntl(struct fd_context * ctx,bool blit,bool binning)149 fd5_emit_render_cntl(struct fd_context *ctx, bool blit, bool binning)
150 {
151 	struct fd_ringbuffer *ring = binning ? ctx->batch->binning : ctx->batch->draw;
152 
153 	/* TODO eventually this partially depends on the pfb state, ie.
154 	 * which of the cbuf(s)/zsbuf has an UBWC flag buffer.. that part
155 	 * we could probably cache and just regenerate if framebuffer
156 	 * state is dirty (or something like that)..
157 	 *
158 	 * Other bits seem to depend on query state, like if samples-passed
159 	 * query is active.
160 	 */
161 	bool samples_passed = (fd5_context(ctx)->samples_passed_queries > 0);
162 	OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
163 	OUT_RING(ring, 0x00000000 |   /* RB_RENDER_CNTL */
164 			COND(binning, A5XX_RB_RENDER_CNTL_BINNING_PASS) |
165 			COND(binning, A5XX_RB_RENDER_CNTL_DISABLE_COLOR_PIPE) |
166 			COND(samples_passed, A5XX_RB_RENDER_CNTL_SAMPLES_PASSED) |
167 			COND(!blit, 0x8));
168 
169 	OUT_PKT4(ring, REG_A5XX_GRAS_SC_CNTL, 1);
170 	OUT_RING(ring, 0x00000008 |   /* GRAS_SC_CNTL */
171 			COND(binning, A5XX_GRAS_SC_CNTL_BINNING_PASS) |
172 			COND(samples_passed, A5XX_GRAS_SC_CNTL_SAMPLES_PASSED));
173 }
174 
175 static inline void
fd5_emit_lrz_flush(struct fd_ringbuffer * ring)176 fd5_emit_lrz_flush(struct fd_ringbuffer *ring)
177 {
178 	/* TODO I think the extra writes to GRAS_LRZ_CNTL are probably
179 	 * a workaround and not needed on all a5xx.
180 	 */
181 	OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
182 	OUT_RING(ring, A5XX_GRAS_LRZ_CNTL_ENABLE);
183 
184 	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
185 	OUT_RING(ring, LRZ_FLUSH);
186 
187 	OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
188 	OUT_RING(ring, 0x0);
189 }
190 
191 void fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd5_emit *emit);
192 
193 void fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
194 		struct fd5_emit *emit);
195 
196 void fd5_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
197 		struct ir3_shader_variant *cp);
198 void fd5_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
199 		struct fd_context *ctx, const struct pipe_grid_info *info);
200 
201 void fd5_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring);
202 
203 void fd5_emit_init_screen(struct pipe_screen *pscreen);
204 void fd5_emit_init(struct pipe_context *pctx);
205 
206 static inline void
fd5_emit_ib(struct fd_ringbuffer * ring,struct fd_ringbuffer * target)207 fd5_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
208 {
209 	/* for debug after a lock up, write a unique counter value
210 	 * to scratch6 for each IB, to make it easier to match up
211 	 * register dumps to cmdstream.  The combination of IB and
212 	 * DRAW (scratch7) is enough to "triangulate" the particular
213 	 * draw that caused lockup.
214 	 */
215 	emit_marker5(ring, 6);
216 	__OUT_IB5(ring, target);
217 	emit_marker5(ring, 6);
218 }
219 
220 #endif /* FD5_EMIT_H */
221