1 /*
2  * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 #ifndef R600_ASM_H
24 #define R600_ASM_H
25 
26 #include "r600_pipe.h"
27 #include "r600_isa.h"
28 #include "tgsi/tgsi_exec.h"
29 
30 #ifdef __cplusplus
31 extern "C" {
32 #endif
33 
34 struct r600_bytecode_alu_src {
35 	unsigned			sel;
36 	unsigned			chan;
37 	unsigned			neg;
38 	unsigned			abs;
39 	unsigned			rel;
40 	unsigned			kc_bank;
41 	unsigned			kc_rel;
42 	uint32_t			value;
43 };
44 
45 struct r600_bytecode_alu_dst {
46 	unsigned			sel;
47 	unsigned			chan;
48 	unsigned			clamp;
49 	unsigned			write;
50 	unsigned			rel;
51 };
52 
53 struct r600_bytecode_alu {
54 	struct list_head		list;
55 	struct r600_bytecode_alu_src		src[3];
56 	struct r600_bytecode_alu_dst		dst;
57 	unsigned			op;
58 	unsigned			last;
59 	unsigned			is_op3;
60 	unsigned			is_lds_idx_op;
61 	unsigned			execute_mask;
62 	unsigned			update_pred;
63 	unsigned			pred_sel;
64 	unsigned			bank_swizzle;
65 	unsigned			bank_swizzle_force;
66 	unsigned			omod;
67 	unsigned                        index_mode;
68 	unsigned                        lds_idx;
69 };
70 
71 struct r600_bytecode_tex {
72 	struct list_head		list;
73 	unsigned			op;
74 	unsigned			inst_mod;
75 	unsigned			resource_id;
76 	unsigned			src_gpr;
77 	unsigned			src_rel;
78 	unsigned			dst_gpr;
79 	unsigned			dst_rel;
80 	unsigned			dst_sel_x;
81 	unsigned			dst_sel_y;
82 	unsigned			dst_sel_z;
83 	unsigned			dst_sel_w;
84 	unsigned			lod_bias;
85 	unsigned			coord_type_x;
86 	unsigned			coord_type_y;
87 	unsigned			coord_type_z;
88 	unsigned			coord_type_w;
89 	int				offset_x;
90 	int				offset_y;
91 	int				offset_z;
92 	unsigned			sampler_id;
93 	unsigned			src_sel_x;
94 	unsigned			src_sel_y;
95 	unsigned			src_sel_z;
96 	unsigned			src_sel_w;
97 	/* indexed samplers/resources only on evergreen/cayman */
98 	unsigned			sampler_index_mode;
99 	unsigned			resource_index_mode;
100 };
101 
102 struct r600_bytecode_vtx {
103 	struct list_head		list;
104 	unsigned			op;
105 	unsigned			fetch_type;
106 	unsigned			buffer_id;
107 	unsigned			src_gpr;
108 	unsigned			src_sel_x;
109 	unsigned			mega_fetch_count;
110 	unsigned			dst_gpr;
111 	unsigned			dst_sel_x;
112 	unsigned			dst_sel_y;
113 	unsigned			dst_sel_z;
114 	unsigned			dst_sel_w;
115 	unsigned			use_const_fields;
116 	unsigned			data_format;
117 	unsigned			num_format_all;
118 	unsigned			format_comp_all;
119 	unsigned			srf_mode_all;
120 	unsigned			offset;
121 	unsigned			endian;
122 	unsigned			buffer_index_mode;
123 
124 	// READ_SCRATCH fields
125 	unsigned			uncached;
126 	unsigned			indexed;
127 	unsigned			src_sel_y;
128 	unsigned			src_rel;
129 	unsigned			elem_size;
130 	unsigned			array_size;
131 	unsigned			array_base;
132 	unsigned			burst_count;
133 	unsigned			dst_rel;
134 };
135 
136 struct r600_bytecode_gds {
137 	struct list_head		list;
138 	unsigned			op;
139 	unsigned			src_gpr;
140 	unsigned			src_rel;
141 	unsigned			src_sel_x;
142 	unsigned			src_sel_y;
143 	unsigned			src_sel_z;
144 	unsigned			src_gpr2;
145 	unsigned			dst_gpr;
146 	unsigned			dst_rel;
147 	unsigned			dst_sel_x;
148 	unsigned			dst_sel_y;
149 	unsigned			dst_sel_z;
150 	unsigned			dst_sel_w;
151 	unsigned			uav_index_mode;
152 	unsigned                        uav_id;
153 	unsigned                        alloc_consume;
154 	unsigned                        bcast_first_req;
155 };
156 
157 struct r600_bytecode_output {
158 	unsigned			array_base;
159 	unsigned			array_size;
160 	unsigned			comp_mask;
161 	unsigned			type;
162 
163 	unsigned			op;
164 
165 	unsigned			elem_size;
166 	unsigned			gpr;
167 	unsigned			swizzle_x;
168 	unsigned			swizzle_y;
169 	unsigned			swizzle_z;
170 	unsigned			swizzle_w;
171 	unsigned			burst_count;
172 	unsigned			index_gpr;
173 	unsigned			mark; /* used by MEM_SCRATCH */
174 };
175 
176 struct r600_bytecode_rat {
177 	unsigned			id;
178 	unsigned			inst;
179 	unsigned			index_mode;
180 };
181 
182 struct r600_bytecode_kcache {
183 	unsigned			bank;
184 	unsigned			mode;
185 	unsigned			addr;
186 	unsigned			index_mode;
187 };
188 
189 struct r600_bytecode_cf {
190 	struct list_head		list;
191 
192 	unsigned			op;
193 	unsigned			addr;
194 	unsigned			ndw;
195 	unsigned			id;
196 	unsigned			cond;
197 	unsigned			pop_count;
198 	unsigned			count;
199 	unsigned			cf_addr; /* control flow addr */
200 	struct r600_bytecode_kcache		kcache[4];
201 	unsigned			r6xx_uses_waterfall;
202 	unsigned			eg_alu_extended;
203 	unsigned			barrier;
204 	unsigned			end_of_program;
205 	unsigned                        mark;
206 	unsigned                        vpm;
207 	struct list_head		alu;
208 	struct list_head		tex;
209 	struct list_head		vtx;
210 	struct list_head		gds;
211 	struct r600_bytecode_output		output;
212 	struct r600_bytecode_rat		rat;
213 	struct r600_bytecode_alu		*curr_bs_head;
214 	struct r600_bytecode_alu		*prev_bs_head;
215 	struct r600_bytecode_alu		*prev2_bs_head;
216 	unsigned isa[2];
217 	unsigned nlds_read;
218 	unsigned nqueue_read;
219 };
220 
221 #define FC_NONE				0
222 #define FC_IF				1
223 #define FC_LOOP				2
224 #define FC_REP				3
225 #define FC_PUSH_VPM			4
226 #define FC_PUSH_WQM			5
227 
228 struct r600_cf_stack_entry {
229 	int				type;
230 	struct r600_bytecode_cf		*start;
231 	struct r600_bytecode_cf		**mid; /* used to store the else point */
232 	int				num_mid;
233 };
234 
235 #define SQ_MAX_CALL_DEPTH 0x00000020
236 
237 #define AR_HANDLE_NORMAL 0
238 #define AR_HANDLE_RV6XX 1 /* except RV670 */
239 
240 struct r600_stack_info {
241 	/* current level of non-WQM PUSH operations
242 	 * (PUSH, PUSH_ELSE, ALU_PUSH_BEFORE) */
243 	int push;
244 	/* current level of WQM PUSH operations
245 	 * (PUSH, PUSH_ELSE, PUSH_WQM) */
246 	int push_wqm;
247 	/* current loop level */
248 	int loop;
249 
250 	/* required depth */
251 	int max_entries;
252 	/* subentries per entry */
253 	int entry_size;
254 };
255 
256 struct r600_bytecode {
257 	enum chip_class			chip_class;
258 	enum radeon_family		family;
259 	bool				has_compressed_msaa_texturing;
260 	int				type;
261 	struct list_head		cf;
262 	struct r600_bytecode_cf		*cf_last;
263 	unsigned			ndw;
264 	unsigned			ncf;
265 	unsigned			ngpr;
266 	unsigned			nstack;
267 	unsigned			nlds_dw;
268 	unsigned			nresource;
269 	unsigned			force_add_cf;
270 	uint32_t			*bytecode;
271 	uint32_t			fc_sp;
272 	struct r600_cf_stack_entry	fc_stack[TGSI_EXEC_MAX_NESTING];
273 	struct r600_stack_info		stack;
274 	unsigned	ar_loaded;
275 	unsigned	ar_reg;
276 	unsigned	ar_chan;
277 	unsigned        ar_handling;
278 	unsigned        r6xx_nop_after_rel_dst;
279 	bool            index_loaded[2];
280 	unsigned        index_reg[2]; /* indexing register CF_INDEX_[01] */
281 	unsigned        index_reg_chan[2]; /* indexing register chanel CF_INDEX_[01] */
282 	unsigned        debug_id;
283 	struct r600_isa* isa;
284 	struct r600_bytecode_output pending_outputs[5];
285 	int n_pending_outputs;
286 	boolean			need_wait_ack; /* emit a pending WAIT_ACK prior to control flow */
287 	boolean			precise;
288 };
289 
290 /* eg_asm.c */
291 int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf);
292 int egcm_load_index_reg(struct r600_bytecode *bc, unsigned id, bool inside_alu_clause);
293 int eg_bytecode_gds_build(struct r600_bytecode *bc, struct r600_bytecode_gds *gds, unsigned id);
294 int eg_bytecode_alu_build(struct r600_bytecode *bc,
295 			  struct r600_bytecode_alu *alu, unsigned id);
296 /* r600_asm.c */
297 void r600_bytecode_init(struct r600_bytecode *bc,
298 			enum chip_class chip_class,
299 			enum radeon_family family,
300 			bool has_compressed_msaa_texturing);
301 void r600_bytecode_clear(struct r600_bytecode *bc);
302 int r600_bytecode_add_alu(struct r600_bytecode *bc,
303 		const struct r600_bytecode_alu *alu);
304 int r600_bytecode_add_vtx(struct r600_bytecode *bc,
305 		const struct r600_bytecode_vtx *vtx);
306 int r600_bytecode_add_vtx_tc(struct r600_bytecode *bc,
307 			     const struct r600_bytecode_vtx *vtx);
308 int r600_bytecode_add_tex(struct r600_bytecode *bc,
309 		const struct r600_bytecode_tex *tex);
310 int r600_bytecode_add_gds(struct r600_bytecode *bc,
311 		const struct r600_bytecode_gds *gds);
312 int r600_bytecode_add_output(struct r600_bytecode *bc,
313 		const struct r600_bytecode_output *output);
314 int r600_bytecode_add_pending_output(struct r600_bytecode *bc,
315 		const struct r600_bytecode_output *output);
316 void r600_bytecode_need_wait_ack(struct r600_bytecode *bc, boolean needed);
317 boolean r600_bytecode_get_need_wait_ack(struct r600_bytecode *bc);
318 int r600_bytecode_build(struct r600_bytecode *bc);
319 int r600_bytecode_add_cf(struct r600_bytecode *bc);
320 int r600_bytecode_add_cfinst(struct r600_bytecode *bc,
321 		unsigned op);
322 int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
323 		const struct r600_bytecode_alu *alu, unsigned type);
324 void r600_bytecode_special_constants(uint32_t value,
325 		unsigned *sel, unsigned *neg, unsigned abs);
326 void r600_bytecode_disasm(struct r600_bytecode *bc);
327 void r600_bytecode_alu_read(struct r600_bytecode *bc,
328 		struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
329 
330 int cm_bytecode_add_cf_end(struct r600_bytecode *bc);
331 
332 void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
333 				      unsigned count,
334 				      const struct pipe_vertex_element *elements);
335 
336 /* r700_asm.c */
337 void r700_bytecode_cf_vtx_build(uint32_t *bytecode,
338 		const struct r600_bytecode_cf *cf);
339 int r700_bytecode_alu_build(struct r600_bytecode *bc,
340 		struct r600_bytecode_alu *alu, unsigned id);
341 void r700_bytecode_alu_read(struct r600_bytecode *bc,
342 		struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
343 int r700_bytecode_fetch_mem_build(struct r600_bytecode *bc,
344 		struct r600_bytecode_vtx *mem, unsigned id);
345 
346 void r600_bytecode_export_read(struct r600_bytecode *bc,
347 		struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
348 void eg_bytecode_export_read(struct r600_bytecode *bc,
349 		struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
350 
351 void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
352 			   unsigned *num_format, unsigned *format_comp, unsigned *endian);
353 
fp64_switch(int i)354 static inline int fp64_switch(int i)
355 {
356 	switch (i) {
357 	case 0:
358 		return 1;
359 	case 1:
360 		return 0;
361 	case 2:
362 		return 3;
363 	case 3:
364 		return 2;
365 	}
366 	return 0;
367 }
368 
369 #ifdef __cplusplus
370 }
371 #endif
372 
373 #endif
374