1 /*
2  * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #ifndef INSTR_A3XX_H_
25 #define INSTR_A3XX_H_
26 
27 #define PACKED __attribute__((__packed__))
28 
29 #include <stdint.h>
30 #include <stdio.h>
31 #include <stdbool.h>
32 #include <assert.h>
33 
34 void ir3_assert_handler(const char *expr, const char *file, int line,
35 		const char *func) __attribute__((weak)) __attribute__ ((__noreturn__));
36 
37 /* A wrapper for assert() that allows overriding handling of a failed
38  * assert.  This is needed for tools like crashdec which can want to
39  * attempt to disassemble memory that might not actually be valid
40  * instructions.
41  */
42 #define ir3_assert(expr) do { \
43 		if (!(expr)) { \
44 			if (ir3_assert_handler) { \
45 				ir3_assert_handler(#expr, __FILE__, __LINE__, __func__); \
46 			} \
47 			assert(expr); \
48 		} \
49 	} while (0)
50 /* size of largest OPC field of all the instruction categories: */
51 #define NOPC_BITS 6
52 
53 #define _OPC(cat, opc)   (((cat) << NOPC_BITS) | opc)
54 
55 typedef enum {
56 	/* category 0: */
57 	OPC_NOP             = _OPC(0, 0),
58 	OPC_B               = _OPC(0, 1),
59 	OPC_JUMP            = _OPC(0, 2),
60 	OPC_CALL            = _OPC(0, 3),
61 	OPC_RET             = _OPC(0, 4),
62 	OPC_KILL            = _OPC(0, 5),
63 	OPC_END             = _OPC(0, 6),
64 	OPC_EMIT            = _OPC(0, 7),
65 	OPC_CUT             = _OPC(0, 8),
66 	OPC_CHMASK          = _OPC(0, 9),
67 	OPC_CHSH            = _OPC(0, 10),
68 	OPC_FLOW_REV        = _OPC(0, 11),
69 
70 	OPC_BKT             = _OPC(0, 16),
71 	OPC_STKS            = _OPC(0, 17),
72 	OPC_STKR            = _OPC(0, 18),
73 	OPC_XSET            = _OPC(0, 19),
74 	OPC_XCLR            = _OPC(0, 20),
75 	OPC_GETONE          = _OPC(0, 21),
76 	OPC_DBG             = _OPC(0, 22),
77 	OPC_SHPS            = _OPC(0, 23),   /* shader prologue start */
78 	OPC_SHPE            = _OPC(0, 24),   /* shader prologue end */
79 
80 	OPC_PREDT           = _OPC(0, 29),   /* predicated true */
81 	OPC_PREDF           = _OPC(0, 30),   /* predicated false */
82 	OPC_PREDE           = _OPC(0, 31),   /* predicated end */
83 
84 	/* category 1: */
85 	OPC_MOV             = _OPC(1, 0),
86 
87 	/* category 2: */
88 	OPC_ADD_F           = _OPC(2, 0),
89 	OPC_MIN_F           = _OPC(2, 1),
90 	OPC_MAX_F           = _OPC(2, 2),
91 	OPC_MUL_F           = _OPC(2, 3),
92 	OPC_SIGN_F          = _OPC(2, 4),
93 	OPC_CMPS_F          = _OPC(2, 5),
94 	OPC_ABSNEG_F        = _OPC(2, 6),
95 	OPC_CMPV_F          = _OPC(2, 7),
96 	/* 8 - invalid */
97 	OPC_FLOOR_F         = _OPC(2, 9),
98 	OPC_CEIL_F          = _OPC(2, 10),
99 	OPC_RNDNE_F         = _OPC(2, 11),
100 	OPC_RNDAZ_F         = _OPC(2, 12),
101 	OPC_TRUNC_F         = _OPC(2, 13),
102 	/* 14-15 - invalid */
103 	OPC_ADD_U           = _OPC(2, 16),
104 	OPC_ADD_S           = _OPC(2, 17),
105 	OPC_SUB_U           = _OPC(2, 18),
106 	OPC_SUB_S           = _OPC(2, 19),
107 	OPC_CMPS_U          = _OPC(2, 20),
108 	OPC_CMPS_S          = _OPC(2, 21),
109 	OPC_MIN_U           = _OPC(2, 22),
110 	OPC_MIN_S           = _OPC(2, 23),
111 	OPC_MAX_U           = _OPC(2, 24),
112 	OPC_MAX_S           = _OPC(2, 25),
113 	OPC_ABSNEG_S        = _OPC(2, 26),
114 	/* 27 - invalid */
115 	OPC_AND_B           = _OPC(2, 28),
116 	OPC_OR_B            = _OPC(2, 29),
117 	OPC_NOT_B           = _OPC(2, 30),
118 	OPC_XOR_B           = _OPC(2, 31),
119 	/* 32 - invalid */
120 	OPC_CMPV_U          = _OPC(2, 33),
121 	OPC_CMPV_S          = _OPC(2, 34),
122 	/* 35-47 - invalid */
123 	OPC_MUL_U24         = _OPC(2, 48), /* 24b mul into 32b result */
124 	OPC_MUL_S24         = _OPC(2, 49), /* 24b mul into 32b result with sign extension */
125 	OPC_MULL_U          = _OPC(2, 50),
126 	OPC_BFREV_B         = _OPC(2, 51),
127 	OPC_CLZ_S           = _OPC(2, 52),
128 	OPC_CLZ_B           = _OPC(2, 53),
129 	OPC_SHL_B           = _OPC(2, 54),
130 	OPC_SHR_B           = _OPC(2, 55),
131 	OPC_ASHR_B          = _OPC(2, 56),
132 	OPC_BARY_F          = _OPC(2, 57),
133 	OPC_MGEN_B          = _OPC(2, 58),
134 	OPC_GETBIT_B        = _OPC(2, 59),
135 	OPC_SETRM           = _OPC(2, 60),
136 	OPC_CBITS_B         = _OPC(2, 61),
137 	OPC_SHB             = _OPC(2, 62),
138 	OPC_MSAD            = _OPC(2, 63),
139 
140 	/* category 3: */
141 	OPC_MAD_U16         = _OPC(3, 0),
142 	OPC_MADSH_U16       = _OPC(3, 1),
143 	OPC_MAD_S16         = _OPC(3, 2),
144 	OPC_MADSH_M16       = _OPC(3, 3),   /* should this be .s16? */
145 	OPC_MAD_U24         = _OPC(3, 4),
146 	OPC_MAD_S24         = _OPC(3, 5),
147 	OPC_MAD_F16         = _OPC(3, 6),
148 	OPC_MAD_F32         = _OPC(3, 7),
149 	OPC_SEL_B16         = _OPC(3, 8),
150 	OPC_SEL_B32         = _OPC(3, 9),
151 	OPC_SEL_S16         = _OPC(3, 10),
152 	OPC_SEL_S32         = _OPC(3, 11),
153 	OPC_SEL_F16         = _OPC(3, 12),
154 	OPC_SEL_F32         = _OPC(3, 13),
155 	OPC_SAD_S16         = _OPC(3, 14),
156 	OPC_SAD_S32         = _OPC(3, 15),
157 
158 	/* category 4: */
159 	OPC_RCP             = _OPC(4, 0),
160 	OPC_RSQ             = _OPC(4, 1),
161 	OPC_LOG2            = _OPC(4, 2),
162 	OPC_EXP2            = _OPC(4, 3),
163 	OPC_SIN             = _OPC(4, 4),
164 	OPC_COS             = _OPC(4, 5),
165 	OPC_SQRT            = _OPC(4, 6),
166 	/* NOTE that these are 8+opc from their highp equivs, so it's possible
167 	 * that the high order bit in the opc field has been repurposed for
168 	 * half-precision use?  But note that other ops (rcp/lsin/cos/sqrt)
169 	 * still use the same opc as highp
170 	 */
171 	OPC_HRSQ            = _OPC(4, 9),
172 	OPC_HLOG2           = _OPC(4, 10),
173 	OPC_HEXP2           = _OPC(4, 11),
174 
175 	/* category 5: */
176 	OPC_ISAM            = _OPC(5, 0),
177 	OPC_ISAML           = _OPC(5, 1),
178 	OPC_ISAMM           = _OPC(5, 2),
179 	OPC_SAM             = _OPC(5, 3),
180 	OPC_SAMB            = _OPC(5, 4),
181 	OPC_SAML            = _OPC(5, 5),
182 	OPC_SAMGQ           = _OPC(5, 6),
183 	OPC_GETLOD          = _OPC(5, 7),
184 	OPC_CONV            = _OPC(5, 8),
185 	OPC_CONVM           = _OPC(5, 9),
186 	OPC_GETSIZE         = _OPC(5, 10),
187 	OPC_GETBUF          = _OPC(5, 11),
188 	OPC_GETPOS          = _OPC(5, 12),
189 	OPC_GETINFO         = _OPC(5, 13),
190 	OPC_DSX             = _OPC(5, 14),
191 	OPC_DSY             = _OPC(5, 15),
192 	OPC_GATHER4R        = _OPC(5, 16),
193 	OPC_GATHER4G        = _OPC(5, 17),
194 	OPC_GATHER4B        = _OPC(5, 18),
195 	OPC_GATHER4A        = _OPC(5, 19),
196 	OPC_SAMGP0          = _OPC(5, 20),
197 	OPC_SAMGP1          = _OPC(5, 21),
198 	OPC_SAMGP2          = _OPC(5, 22),
199 	OPC_SAMGP3          = _OPC(5, 23),
200 	OPC_DSXPP_1         = _OPC(5, 24),
201 	OPC_DSYPP_1         = _OPC(5, 25),
202 	OPC_RGETPOS         = _OPC(5, 26),
203 	OPC_RGETINFO        = _OPC(5, 27),
204 	/* cat5 meta instructions, placed above the cat5 opc field's size */
205 	OPC_DSXPP_MACRO     = _OPC(5, 32),
206 	OPC_DSYPP_MACRO     = _OPC(5, 33),
207 
208 	/* category 6: */
209 	OPC_LDG             = _OPC(6, 0),        /* load-global */
210 	OPC_LDL             = _OPC(6, 1),
211 	OPC_LDP             = _OPC(6, 2),
212 	OPC_STG             = _OPC(6, 3),        /* store-global */
213 	OPC_STL             = _OPC(6, 4),
214 	OPC_STP             = _OPC(6, 5),
215 	OPC_LDIB            = _OPC(6, 6),
216 	OPC_G2L             = _OPC(6, 7),
217 	OPC_L2G             = _OPC(6, 8),
218 	OPC_PREFETCH        = _OPC(6, 9),
219 	OPC_LDLW            = _OPC(6, 10),
220 	OPC_STLW            = _OPC(6, 11),
221 	OPC_RESFMT          = _OPC(6, 14),
222 	OPC_RESINFO         = _OPC(6, 15),
223 	OPC_ATOMIC_ADD      = _OPC(6, 16),
224 	OPC_ATOMIC_SUB      = _OPC(6, 17),
225 	OPC_ATOMIC_XCHG     = _OPC(6, 18),
226 	OPC_ATOMIC_INC      = _OPC(6, 19),
227 	OPC_ATOMIC_DEC      = _OPC(6, 20),
228 	OPC_ATOMIC_CMPXCHG  = _OPC(6, 21),
229 	OPC_ATOMIC_MIN      = _OPC(6, 22),
230 	OPC_ATOMIC_MAX      = _OPC(6, 23),
231 	OPC_ATOMIC_AND      = _OPC(6, 24),
232 	OPC_ATOMIC_OR       = _OPC(6, 25),
233 	OPC_ATOMIC_XOR      = _OPC(6, 26),
234 	OPC_LDGB            = _OPC(6, 27),
235 	OPC_STGB            = _OPC(6, 28),
236 	OPC_STIB            = _OPC(6, 29),
237 	OPC_LDC             = _OPC(6, 30),
238 	OPC_LDLV            = _OPC(6, 31),
239 
240 	/* category 7: */
241 	OPC_BAR             = _OPC(7, 0),
242 	OPC_FENCE           = _OPC(7, 1),
243 
244 	/* meta instructions (category -1): */
245 	/* placeholder instr to mark shader inputs: */
246 	OPC_META_INPUT      = _OPC(-1, 0),
247 	/* The "collect" and "split" instructions are used for keeping
248 	 * track of instructions that write to multiple dst registers
249 	 * (split) like texture sample instructions, or read multiple
250 	 * consecutive scalar registers (collect) (bary.f, texture samp)
251 	 *
252 	 * A "split" extracts a scalar component from a vecN, and a
253 	 * "collect" gathers multiple scalar components into a vecN
254 	 */
255 	OPC_META_SPLIT      = _OPC(-1, 2),
256 	OPC_META_COLLECT    = _OPC(-1, 3),
257 
258 	/* placeholder for texture fetches that run before FS invocation
259 	 * starts:
260 	 */
261 	OPC_META_TEX_PREFETCH = _OPC(-1, 4),
262 
263 } opc_t;
264 
265 #define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
266 #define opc_op(opc)  ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
267 
268 const char *disasm_a3xx_instr_name(opc_t opc);
269 
270 typedef enum {
271 	TYPE_F16 = 0,
272 	TYPE_F32 = 1,
273 	TYPE_U16 = 2,
274 	TYPE_U32 = 3,
275 	TYPE_S16 = 4,
276 	TYPE_S32 = 5,
277 	TYPE_U8  = 6,
278 	TYPE_S8  = 7,  // XXX I assume?
279 } type_t;
280 
type_size(type_t type)281 static inline uint32_t type_size(type_t type)
282 {
283 	switch (type) {
284 	case TYPE_F32:
285 	case TYPE_U32:
286 	case TYPE_S32:
287 		return 32;
288 	case TYPE_F16:
289 	case TYPE_U16:
290 	case TYPE_S16:
291 		return 16;
292 	case TYPE_U8:
293 	case TYPE_S8:
294 		return 8;
295 	default:
296 		ir3_assert(0); /* invalid type */
297 		return 0;
298 	}
299 }
300 
type_float(type_t type)301 static inline int type_float(type_t type)
302 {
303 	return (type == TYPE_F32) || (type == TYPE_F16);
304 }
305 
type_uint(type_t type)306 static inline int type_uint(type_t type)
307 {
308 	return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
309 }
310 
type_sint(type_t type)311 static inline int type_sint(type_t type)
312 {
313 	return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
314 }
315 
316 typedef union PACKED {
317 	/* normal gpr or const src register: */
318 	struct PACKED {
319 		uint32_t comp  : 2;
320 		uint32_t num   : 10;
321 	};
322 	/* for immediate val: */
323 	int32_t  iim_val   : 11;
324 	/* to make compiler happy: */
325 	uint32_t dummy32;
326 	uint32_t dummy10   : 10;
327 	int32_t  idummy10  : 10;
328 	uint32_t dummy11   : 11;
329 	uint32_t dummy12   : 12;
330 	uint32_t dummy13   : 13;
331 	uint32_t dummy8    : 8;
332 	int32_t  idummy13  : 13;
333 	int32_t  idummy8   : 8;
334 } reg_t;
335 
336 /* comp:
337  *   0 - x
338  *   1 - y
339  *   2 - z
340  *   3 - w
341  */
regid(int num,int comp)342 static inline uint32_t regid(int num, int comp)
343 {
344 	return (num << 2) | (comp & 0x3);
345 }
346 
347 #define INVALID_REG      regid(63, 0)
348 #define VALIDREG(r)      ((r) != INVALID_REG)
349 #define CONDREG(r, val)  COND(VALIDREG(r), (val))
350 
351 /* special registers: */
352 #define REG_A0 61       /* address register */
353 #define REG_P0 62       /* predicate register */
354 
reg_special(reg_t reg)355 static inline int reg_special(reg_t reg)
356 {
357 	return (reg.num == REG_A0) || (reg.num == REG_P0);
358 }
359 
360 typedef enum {
361 	BRANCH_PLAIN = 0,   /* br */
362 	BRANCH_OR    = 1,   /* brao */
363 	BRANCH_AND   = 2,   /* braa */
364 	BRANCH_CONST = 3,   /* brac */
365 	BRANCH_ANY   = 4,   /* bany */
366 	BRANCH_ALL   = 5,   /* ball */
367 	BRANCH_X     = 6,   /* brax ??? */
368 } brtype_t;
369 
370 typedef struct PACKED {
371 	/* dword0: */
372 	union PACKED {
373 		struct PACKED {
374 			int16_t  immed    : 16;
375 			uint32_t dummy1   : 16;
376 		} a3xx;
377 		struct PACKED {
378 			int32_t  immed    : 20;
379 			uint32_t dummy1   : 12;
380 		} a4xx;
381 		struct PACKED {
382 			int32_t immed     : 32;
383 		} a5xx;
384 	};
385 
386 	/* dword1: */
387 	uint32_t idx      : 5;  /* brac.N index */
388 	uint32_t brtype   : 3;  /* branch type, see brtype_t */
389 	uint32_t repeat   : 3;
390 	uint32_t dummy3   : 1;
391 	uint32_t ss       : 1;
392 	uint32_t inv1     : 1;
393 	uint32_t comp1    : 2;
394 	uint32_t eq       : 1;
395 	uint32_t opc_hi   : 1;  /* at least one bit */
396 	uint32_t dummy4   : 2;
397 	uint32_t inv0     : 1;
398 	uint32_t comp0    : 2;  /* component for first src */
399 	uint32_t opc      : 4;
400 	uint32_t jmp_tgt  : 1;
401 	uint32_t sync     : 1;
402 	uint32_t opc_cat  : 3;
403 } instr_cat0_t;
404 
405 typedef struct PACKED {
406 	/* dword0: */
407 	union PACKED {
408 		/* for normal src register: */
409 		struct PACKED {
410 			uint32_t src : 11;
411 			/* at least low bit of pad must be zero or it will
412 			 * look like a address relative src
413 			 */
414 			uint32_t pad : 21;
415 		};
416 		/* for address relative: */
417 		struct PACKED {
418 			int32_t  off : 10;
419 			uint32_t src_rel_c : 1;
420 			uint32_t src_rel : 1;
421 			uint32_t unknown : 20;
422 		};
423 		/* for immediate: */
424 		int32_t  iim_val;
425 		uint32_t uim_val;
426 		float    fim_val;
427 	};
428 
429 	/* dword1: */
430 	uint32_t dst        : 8;
431 	uint32_t repeat     : 3;
432 	uint32_t src_r      : 1;
433 	uint32_t ss         : 1;
434 	uint32_t ul         : 1;
435 	uint32_t dst_type   : 3;
436 	uint32_t dst_rel    : 1;
437 	uint32_t src_type   : 3;
438 	uint32_t src_c      : 1;
439 	uint32_t src_im     : 1;
440 	uint32_t even       : 1;
441 	uint32_t pos_inf    : 1;
442 	uint32_t must_be_0  : 2;
443 	uint32_t jmp_tgt    : 1;
444 	uint32_t sync       : 1;
445 	uint32_t opc_cat    : 3;
446 } instr_cat1_t;
447 
448 typedef struct PACKED {
449 	/* dword0: */
450 	union PACKED {
451 		struct PACKED {
452 			uint32_t src1         : 11;
453 			uint32_t must_be_zero1: 2;
454 			uint32_t src1_im      : 1;   /* immediate */
455 			uint32_t src1_neg     : 1;   /* negate */
456 			uint32_t src1_abs     : 1;   /* absolute value */
457 		};
458 		struct PACKED {
459 			uint32_t src1         : 10;
460 			uint32_t src1_c       : 1;   /* relative-const */
461 			uint32_t src1_rel     : 1;   /* relative address */
462 			uint32_t must_be_zero : 1;
463 			uint32_t dummy        : 3;
464 		} rel1;
465 		struct PACKED {
466 			uint32_t src1         : 12;
467 			uint32_t src1_c       : 1;   /* const */
468 			uint32_t dummy        : 3;
469 		} c1;
470 	};
471 
472 	union PACKED {
473 		struct PACKED {
474 			uint32_t src2         : 11;
475 			uint32_t must_be_zero2: 2;
476 			uint32_t src2_im      : 1;   /* immediate */
477 			uint32_t src2_neg     : 1;   /* negate */
478 			uint32_t src2_abs     : 1;   /* absolute value */
479 		};
480 		struct PACKED {
481 			uint32_t src2         : 10;
482 			uint32_t src2_c       : 1;   /* relative-const */
483 			uint32_t src2_rel     : 1;   /* relative address */
484 			uint32_t must_be_zero : 1;
485 			uint32_t dummy        : 3;
486 		} rel2;
487 		struct PACKED {
488 			uint32_t src2         : 12;
489 			uint32_t src2_c       : 1;   /* const */
490 			uint32_t dummy        : 3;
491 		} c2;
492 	};
493 
494 	/* dword1: */
495 	uint32_t dst      : 8;
496 	uint32_t repeat   : 2;
497 	uint32_t sat      : 1;
498 	uint32_t src1_r   : 1;   /* doubles as nop0 if repeat==0 */
499 	uint32_t ss       : 1;
500 	uint32_t ul       : 1;   /* dunno */
501 	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
502 	uint32_t ei       : 1;
503 	uint32_t cond     : 3;
504 	uint32_t src2_r   : 1;   /* doubles as nop1 if repeat==0 */
505 	uint32_t full     : 1;   /* not half */
506 	uint32_t opc      : 6;
507 	uint32_t jmp_tgt  : 1;
508 	uint32_t sync     : 1;
509 	uint32_t opc_cat  : 3;
510 } instr_cat2_t;
511 
512 typedef struct PACKED {
513 	/* dword0: */
514 	union PACKED {
515 		struct PACKED {
516 			uint32_t src1         : 11;
517 			uint32_t must_be_zero1: 2;
518 			uint32_t src2_c       : 1;
519 			uint32_t src1_neg     : 1;
520 			uint32_t src2_r       : 1;  /* doubles as nop1 if repeat==0 */
521 		};
522 		struct PACKED {
523 			uint32_t src1         : 10;
524 			uint32_t src1_c       : 1;
525 			uint32_t src1_rel     : 1;
526 			uint32_t must_be_zero : 1;
527 			uint32_t dummy        : 3;
528 		} rel1;
529 		struct PACKED {
530 			uint32_t src1         : 12;
531 			uint32_t src1_c       : 1;
532 			uint32_t dummy        : 3;
533 		} c1;
534 	};
535 
536 	union PACKED {
537 		struct PACKED {
538 			uint32_t src3         : 11;
539 			uint32_t must_be_zero2: 2;
540 			uint32_t src3_r       : 1;
541 			uint32_t src2_neg     : 1;
542 			uint32_t src3_neg     : 1;
543 		};
544 		struct PACKED {
545 			uint32_t src3         : 10;
546 			uint32_t src3_c       : 1;
547 			uint32_t src3_rel     : 1;
548 			uint32_t must_be_zero : 1;
549 			uint32_t dummy        : 3;
550 		} rel2;
551 		struct PACKED {
552 			uint32_t src3         : 12;
553 			uint32_t src3_c       : 1;
554 			uint32_t dummy        : 3;
555 		} c2;
556 	};
557 
558 	/* dword1: */
559 	uint32_t dst      : 8;
560 	uint32_t repeat   : 2;
561 	uint32_t sat      : 1;
562 	uint32_t src1_r   : 1;   /* doubles as nop0 if repeat==0 */
563 	uint32_t ss       : 1;
564 	uint32_t ul       : 1;
565 	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
566 	uint32_t src2     : 8;
567 	uint32_t opc      : 4;
568 	uint32_t jmp_tgt  : 1;
569 	uint32_t sync     : 1;
570 	uint32_t opc_cat  : 3;
571 } instr_cat3_t;
572 
instr_cat3_full(instr_cat3_t * cat3)573 static inline bool instr_cat3_full(instr_cat3_t *cat3)
574 {
575 	switch (_OPC(3, cat3->opc)) {
576 	case OPC_MAD_F16:
577 	case OPC_MAD_U16:
578 	case OPC_MAD_S16:
579 	case OPC_SEL_B16:
580 	case OPC_SEL_S16:
581 	case OPC_SEL_F16:
582 	case OPC_SAD_S16:
583 	case OPC_SAD_S32:  // really??
584 		return false;
585 	default:
586 		return true;
587 	}
588 }
589 
590 typedef struct PACKED {
591 	/* dword0: */
592 	union PACKED {
593 		struct PACKED {
594 			uint32_t src          : 11;
595 			uint32_t must_be_zero1: 2;
596 			uint32_t src_im       : 1;   /* immediate */
597 			uint32_t src_neg      : 1;   /* negate */
598 			uint32_t src_abs      : 1;   /* absolute value */
599 		};
600 		struct PACKED {
601 			uint32_t src          : 10;
602 			uint32_t src_c        : 1;   /* relative-const */
603 			uint32_t src_rel      : 1;   /* relative address */
604 			uint32_t must_be_zero : 1;
605 			uint32_t dummy        : 3;
606 		} rel;
607 		struct PACKED {
608 			uint32_t src          : 12;
609 			uint32_t src_c        : 1;   /* const */
610 			uint32_t dummy        : 3;
611 		} c;
612 	};
613 	uint32_t dummy1   : 16;  /* seem to be ignored */
614 
615 	/* dword1: */
616 	uint32_t dst      : 8;
617 	uint32_t repeat   : 2;
618 	uint32_t sat      : 1;
619 	uint32_t src_r    : 1;
620 	uint32_t ss       : 1;
621 	uint32_t ul       : 1;
622 	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
623 	uint32_t dummy2   : 5;   /* seem to be ignored */
624 	uint32_t full     : 1;   /* not half */
625 	uint32_t opc      : 6;
626 	uint32_t jmp_tgt  : 1;
627 	uint32_t sync     : 1;
628 	uint32_t opc_cat  : 3;
629 } instr_cat4_t;
630 
631 /* With is_bindless_s2en = 1, this determines whether bindless is enabled and
632  * if so, how to get the (base, index) pair for both sampler and texture.
633  * There is a single base embedded in the instruction, which is always used
634  * for the texture.
635  */
636 typedef enum {
637 	/* Use traditional GL binding model, get texture and sampler index
638 	 * from src3 which is not presumed to be uniform. This is
639 	 * backwards-compatible with earlier generations, where this field was
640 	 * always 0 and nonuniform-indexed sampling always worked.
641 	 */
642 	CAT5_NONUNIFORM = 0,
643 
644 	/* The sampler base comes from the low 3 bits of a1.x, and the sampler
645 	 * and texture index come from src3 which is presumed to be uniform.
646 	 */
647 	CAT5_BINDLESS_A1_UNIFORM = 1,
648 
649 	/* The texture and sampler share the same base, and the sampler and
650 	 * texture index come from src3 which is *not* presumed to be uniform.
651 	 */
652 	CAT5_BINDLESS_NONUNIFORM = 2,
653 
654 	/* The sampler base comes from the low 3 bits of a1.x, and the sampler
655 	 * and texture index come from src3 which is *not* presumed to be
656 	 * uniform.
657 	 */
658 	CAT5_BINDLESS_A1_NONUNIFORM = 3,
659 
660 	/* Use traditional GL binding model, get texture and sampler index
661 	 * from src3 which is presumed to be uniform.
662 	 */
663 	CAT5_UNIFORM = 4,
664 
665 	/* The texture and sampler share the same base, and the sampler and
666 	 * texture index come from src3 which is presumed to be uniform.
667 	 */
668 	CAT5_BINDLESS_UNIFORM = 5,
669 
670 	/* The texture and sampler share the same base, get sampler index from low
671 	 * 4 bits of src3 and texture index from high 4 bits.
672 	 */
673 	CAT5_BINDLESS_IMM = 6,
674 
675 	/* The sampler base comes from the low 3 bits of a1.x, and the texture
676 	 * index comes from the next 8 bits of a1.x. The sampler index is an
677 	 * immediate in src3.
678 	 */
679 	CAT5_BINDLESS_A1_IMM = 7,
680 } cat5_desc_mode_t;
681 
682 typedef struct PACKED {
683 	/* dword0: */
684 	union PACKED {
685 		/* normal case: */
686 		struct PACKED {
687 			uint32_t full     : 1;   /* not half */
688 			uint32_t src1     : 8;
689 			uint32_t src2     : 8;
690 			uint32_t dummy1   : 4;   /* seem to be ignored */
691 			uint32_t samp     : 4;
692 			uint32_t tex      : 7;
693 		} norm;
694 		/* s2en case: */
695 		struct PACKED {
696 			uint32_t full         : 1;   /* not half */
697 			uint32_t src1         : 8;
698 			uint32_t src2         : 8;
699 			uint32_t dummy1       : 2;
700 			uint32_t base_hi      : 2;
701 			uint32_t src3         : 8;
702 			uint32_t desc_mode    : 3;
703 		} s2en_bindless;
704 		/* same in either case: */
705 		// XXX I think, confirm this
706 		struct PACKED {
707 			uint32_t full     : 1;   /* not half */
708 			uint32_t src1     : 8;
709 			uint32_t src2     : 8;
710 			uint32_t pad      : 15;
711 		};
712 	};
713 
714 	/* dword1: */
715 	uint32_t dst              : 8;
716 	uint32_t wrmask           : 4;   /* write-mask */
717 	uint32_t type             : 3;
718 	uint32_t base_lo          : 1;   /* used with bindless */
719 	uint32_t is_3d            : 1;
720 
721 	uint32_t is_a             : 1;
722 	uint32_t is_s             : 1;
723 	uint32_t is_s2en_bindless : 1;
724 	uint32_t is_o             : 1;
725 	uint32_t is_p             : 1;
726 
727 	uint32_t opc              : 5;
728 	uint32_t jmp_tgt          : 1;
729 	uint32_t sync             : 1;
730 	uint32_t opc_cat          : 3;
731 } instr_cat5_t;
732 
733 /* dword0 encoding for src_off: [src1 + off], src2: */
734 typedef struct PACKED {
735 	/* dword0: */
736 	uint32_t mustbe1  : 1;
737 	int32_t  off      : 13;
738 	uint32_t src1     : 8;
739 	uint32_t src1_im  : 1;
740 	uint32_t src2_im  : 1;
741 	uint32_t src2     : 8;
742 
743 	/* dword1: */
744 	uint32_t dword1;
745 } instr_cat6a_t;
746 
747 /* dword0 encoding for !src_off: [src1], src2 */
748 typedef struct PACKED {
749 	/* dword0: */
750 	uint32_t mustbe0  : 1;
751 	uint32_t src1     : 8;
752 	uint32_t pad      : 5;
753 	uint32_t ignore0  : 8;
754 	uint32_t src1_im  : 1;
755 	uint32_t src2_im  : 1;
756 	uint32_t src2     : 8;
757 
758 	/* dword1: */
759 	uint32_t dword1;
760 } instr_cat6b_t;
761 
762 /* dword1 encoding for dst_off: */
763 typedef struct PACKED {
764 	/* dword0: */
765 	uint32_t dw0_pad1 : 9;
766 	int32_t off_high : 5;
767 	uint32_t dw0_pad2 : 18;
768 
769 	uint32_t off      : 8;
770 	uint32_t mustbe1  : 1;
771 	uint32_t dst      : 8;
772 	uint32_t pad1     : 15;
773 } instr_cat6c_t;
774 
775 /* dword1 encoding for !dst_off: */
776 typedef struct PACKED {
777 	/* dword0: */
778 	uint32_t dword0;
779 
780 	uint32_t dst      : 8;
781 	uint32_t mustbe0  : 1;
782 	uint32_t idx      : 8;
783 	uint32_t pad0     : 15;
784 } instr_cat6d_t;
785 
786 /* ldgb and atomics..
787  *
788  * ldgb:      pad0=0, pad3=1
789  * atomic .g: pad0=1, pad3=1
790  *        .l: pad0=1, pad3=0
791  */
792 typedef struct PACKED {
793 	/* dword0: */
794 	uint32_t pad0     : 1;
795 	uint32_t src3     : 8;
796 	uint32_t d        : 2;
797 	uint32_t typed    : 1;
798 	uint32_t type_size : 2;
799 	uint32_t src1     : 8;
800 	uint32_t src1_im  : 1;
801 	uint32_t src2_im  : 1;
802 	uint32_t src2     : 8;
803 
804 	/* dword1: */
805 	uint32_t dst      : 8;
806 	uint32_t mustbe0  : 1;
807 	uint32_t src_ssbo : 8;
808 	uint32_t pad2     : 3;  // type
809 	uint32_t g        : 1;
810 	uint32_t src_ssbo_im : 1;
811 	uint32_t pad4     : 10; // opc/jmp_tgt/sync/opc_cat
812 } instr_cat6ldgb_t;
813 
814 /* stgb, pad0=0, pad3=2
815  */
816 typedef struct PACKED {
817 	/* dword0: */
818 	uint32_t mustbe1  : 1;  // ???
819 	uint32_t src1     : 8;
820 	uint32_t d        : 2;
821 	uint32_t typed    : 1;
822 	uint32_t type_size : 2;
823 	uint32_t pad0     : 9;
824 	uint32_t src2_im  : 1;
825 	uint32_t src2     : 8;
826 
827 	/* dword1: */
828 	uint32_t src3     : 8;
829 	uint32_t src3_im  : 1;
830 	uint32_t dst_ssbo : 8;
831 	uint32_t pad2     : 3;  // type
832 	uint32_t pad3     : 2;
833 	uint32_t pad4     : 10; // opc/jmp_tgt/sync/opc_cat
834 } instr_cat6stgb_t;
835 
836 typedef union PACKED {
837 	instr_cat6a_t a;
838 	instr_cat6b_t b;
839 	instr_cat6c_t c;
840 	instr_cat6d_t d;
841 	instr_cat6ldgb_t ldgb;
842 	instr_cat6stgb_t stgb;
843 	struct PACKED {
844 		/* dword0: */
845 		uint32_t src_off  : 1;
846 		uint32_t pad1     : 31;
847 
848 		/* dword1: */
849 		uint32_t pad2     : 8;
850 		uint32_t dst_off  : 1;
851 		uint32_t pad3     : 8;
852 		uint32_t type     : 3;
853 		uint32_t g        : 1;  /* or in some cases it means dst immed */
854 		uint32_t pad4     : 1;
855 		uint32_t opc      : 5;
856 		uint32_t jmp_tgt  : 1;
857 		uint32_t sync     : 1;
858 		uint32_t opc_cat  : 3;
859 	};
860 } instr_cat6_t;
861 
862 /* Similar to cat5_desc_mode_t, describes how the descriptor is loaded.
863  */
864 typedef enum {
865 	/* Use old GL binding model with an immediate index. */
866 	CAT6_IMM = 0,
867 
868 	CAT6_UNIFORM = 1,
869 
870 	CAT6_NONUNIFORM = 2,
871 
872 	/* Use the bindless model, with an immediate index.
873 	 */
874 	CAT6_BINDLESS_IMM = 4,
875 
876 	/* Use the bindless model, with a uniform register index.
877 	 */
878 	CAT6_BINDLESS_UNIFORM = 5,
879 
880 	/* Use the bindless model, with a register index that isn't guaranteed
881 	 * to be uniform. This presumably checks if the indices are equal and
882 	 * splits up the load/store, because it works the way you would
883 	 * expect.
884 	 */
885 	CAT6_BINDLESS_NONUNIFORM = 6,
886 } cat6_desc_mode_t;
887 
888 /**
889  * For atomic ops (which return a value):
890  *
891  *    pad1=1, pad3=c, pad5=3
892  *    src1    - vecN offset/coords
893  *    src2.x  - is actually dest register
894  *    src2.y  - is 'data' except for cmpxchg where src2.y is 'compare'
895  *              and src2.z is 'data'
896  *
897  * For stib (which does not return a value):
898  *    pad1=0, pad3=c, pad5=2
899  *    src1    - vecN offset/coords
900  *    src2    - value to store
901  *
902  * For ldib:
903  *    pad1=1, pad3=c, pad5=2
904  *    src1    - vecN offset/coords
905  *
906  * for ldc (load from UBO using descriptor):
907  *    pad1=0, pad3=8, pad5=2
908  *
909  * pad2 and pad5 are only observed to be 0.
910  */
911 typedef struct PACKED {
912 	/* dword0: */
913 	uint32_t pad1     : 1;
914 	uint32_t base     : 3;
915 	uint32_t pad2     : 2;
916 	uint32_t desc_mode : 3;
917 	uint32_t d        : 2;
918 	uint32_t typed    : 1;
919 	uint32_t type_size : 2;
920 	uint32_t opc      : 5;
921 	uint32_t pad3     : 5;
922 	uint32_t src1     : 8;  /* coordinate/offset */
923 
924 	/* dword1: */
925 	uint32_t src2     : 8;  /* or the dst for load instructions */
926 	uint32_t pad4     : 1;  //mustbe0 ??
927 	uint32_t ssbo     : 8;  /* ssbo/image binding point */
928 	uint32_t type     : 3;
929 	uint32_t pad5     : 7;
930 	uint32_t jmp_tgt  : 1;
931 	uint32_t sync     : 1;
932 	uint32_t opc_cat  : 3;
933 } instr_cat6_a6xx_t;
934 
935 typedef struct PACKED {
936 	/* dword0: */
937 	uint32_t pad1     : 32;
938 
939 	/* dword1: */
940 	uint32_t pad2     : 12;
941 	uint32_t ss       : 1;  /* maybe in the encoding, but blob only uses (sy) */
942 	uint32_t pad3     : 6;
943 	uint32_t w        : 1;  /* write */
944 	uint32_t r        : 1;  /* read */
945 	uint32_t l        : 1;  /* local */
946 	uint32_t g        : 1;  /* global */
947 	uint32_t opc      : 4;  /* presumed, but only a couple known OPCs */
948 	uint32_t jmp_tgt  : 1;  /* (jp) */
949 	uint32_t sync     : 1;  /* (sy) */
950 	uint32_t opc_cat  : 3;
951 } instr_cat7_t;
952 
953 typedef union PACKED {
954 	instr_cat0_t cat0;
955 	instr_cat1_t cat1;
956 	instr_cat2_t cat2;
957 	instr_cat3_t cat3;
958 	instr_cat4_t cat4;
959 	instr_cat5_t cat5;
960 	instr_cat6_t cat6;
961 	instr_cat6_a6xx_t cat6_a6xx;
962 	instr_cat7_t cat7;
963 	struct PACKED {
964 		/* dword0: */
965 		uint32_t pad1     : 32;
966 
967 		/* dword1: */
968 		uint32_t pad2     : 12;
969 		uint32_t ss       : 1;  /* cat1-cat4 (cat0??) and cat7 (?) */
970 		uint32_t ul       : 1;  /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
971 		uint32_t pad3     : 13;
972 		uint32_t jmp_tgt  : 1;
973 		uint32_t sync     : 1;
974 		uint32_t opc_cat  : 3;
975 
976 	};
977 } instr_t;
978 
instr_repeat(instr_t * instr)979 static inline uint32_t instr_repeat(instr_t *instr)
980 {
981 	switch (instr->opc_cat) {
982 	case 0:  return instr->cat0.repeat;
983 	case 1:  return instr->cat1.repeat;
984 	case 2:  return instr->cat2.repeat;
985 	case 3:  return instr->cat3.repeat;
986 	case 4:  return instr->cat4.repeat;
987 	default: return 0;
988 	}
989 }
990 
instr_sat(instr_t * instr)991 static inline bool instr_sat(instr_t *instr)
992 {
993 	switch (instr->opc_cat) {
994 	case 2:  return instr->cat2.sat;
995 	case 3:  return instr->cat3.sat;
996 	case 4:  return instr->cat4.sat;
997 	default: return false;
998 	}
999 }
1000 
1001 /* We can probably drop the gpu_id arg, but keeping it for now so we can
1002  * assert if we see something we think should be new encoding on an older
1003  * gpu.
1004  */
is_cat6_legacy(instr_t * instr,unsigned gpu_id)1005 static inline bool is_cat6_legacy(instr_t *instr, unsigned gpu_id)
1006 {
1007 	instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx;
1008 
1009 	/* At least one of these two bits is pad in all the possible
1010 	 * "legacy" cat6 encodings, and a analysis of all the pre-a6xx
1011 	 * cmdstream traces I have indicates that the pad bit is zero
1012 	 * in all cases.  So we can use this to detect new encoding:
1013 	 */
1014 	if ((cat6->pad3 & 0x8) && (cat6->pad5 & 0x2)) {
1015 		ir3_assert(gpu_id >= 600);
1016 		ir3_assert(instr->cat6.opc == 0);
1017 		return false;
1018 	}
1019 
1020 	return true;
1021 }
1022 
instr_opc(instr_t * instr,unsigned gpu_id)1023 static inline uint32_t instr_opc(instr_t *instr, unsigned gpu_id)
1024 {
1025 	switch (instr->opc_cat) {
1026 	case 0:  return instr->cat0.opc | instr->cat0.opc_hi << 4;
1027 	case 1:  return 0;
1028 	case 2:  return instr->cat2.opc;
1029 	case 3:  return instr->cat3.opc;
1030 	case 4:  return instr->cat4.opc;
1031 	case 5:  return instr->cat5.opc;
1032 	case 6:
1033 		if (!is_cat6_legacy(instr, gpu_id))
1034 			return instr->cat6_a6xx.opc;
1035 		return instr->cat6.opc;
1036 	case 7:  return instr->cat7.opc;
1037 	default: return 0;
1038 	}
1039 }
1040 
is_mad(opc_t opc)1041 static inline bool is_mad(opc_t opc)
1042 {
1043 	switch (opc) {
1044 	case OPC_MAD_U16:
1045 	case OPC_MAD_S16:
1046 	case OPC_MAD_U24:
1047 	case OPC_MAD_S24:
1048 	case OPC_MAD_F16:
1049 	case OPC_MAD_F32:
1050 		return true;
1051 	default:
1052 		return false;
1053 	}
1054 }
1055 
is_madsh(opc_t opc)1056 static inline bool is_madsh(opc_t opc)
1057 {
1058 	switch (opc) {
1059 	case OPC_MADSH_U16:
1060 	case OPC_MADSH_M16:
1061 		return true;
1062 	default:
1063 		return false;
1064 	}
1065 }
1066 
is_atomic(opc_t opc)1067 static inline bool is_atomic(opc_t opc)
1068 {
1069 	switch (opc) {
1070 	case OPC_ATOMIC_ADD:
1071 	case OPC_ATOMIC_SUB:
1072 	case OPC_ATOMIC_XCHG:
1073 	case OPC_ATOMIC_INC:
1074 	case OPC_ATOMIC_DEC:
1075 	case OPC_ATOMIC_CMPXCHG:
1076 	case OPC_ATOMIC_MIN:
1077 	case OPC_ATOMIC_MAX:
1078 	case OPC_ATOMIC_AND:
1079 	case OPC_ATOMIC_OR:
1080 	case OPC_ATOMIC_XOR:
1081 		return true;
1082 	default:
1083 		return false;
1084 	}
1085 }
1086 
is_ssbo(opc_t opc)1087 static inline bool is_ssbo(opc_t opc)
1088 {
1089 	switch (opc) {
1090 	case OPC_RESFMT:
1091 	case OPC_RESINFO:
1092 	case OPC_LDGB:
1093 	case OPC_STGB:
1094 	case OPC_STIB:
1095 		return true;
1096 	default:
1097 		return false;
1098 	}
1099 }
1100 
is_isam(opc_t opc)1101 static inline bool is_isam(opc_t opc)
1102 {
1103 	switch (opc) {
1104 	case OPC_ISAM:
1105 	case OPC_ISAML:
1106 	case OPC_ISAMM:
1107 		return true;
1108 	default:
1109 		return false;
1110 	}
1111 }
1112 
1113 
is_cat2_float(opc_t opc)1114 static inline bool is_cat2_float(opc_t opc)
1115 {
1116 	switch (opc) {
1117 	case OPC_ADD_F:
1118 	case OPC_MIN_F:
1119 	case OPC_MAX_F:
1120 	case OPC_MUL_F:
1121 	case OPC_SIGN_F:
1122 	case OPC_CMPS_F:
1123 	case OPC_ABSNEG_F:
1124 	case OPC_CMPV_F:
1125 	case OPC_FLOOR_F:
1126 	case OPC_CEIL_F:
1127 	case OPC_RNDNE_F:
1128 	case OPC_RNDAZ_F:
1129 	case OPC_TRUNC_F:
1130 		return true;
1131 
1132 	default:
1133 		return false;
1134 	}
1135 }
1136 
is_cat3_float(opc_t opc)1137 static inline bool is_cat3_float(opc_t opc)
1138 {
1139 	switch (opc) {
1140 	case OPC_MAD_F16:
1141 	case OPC_MAD_F32:
1142 	case OPC_SEL_F16:
1143 	case OPC_SEL_F32:
1144 		return true;
1145 	default:
1146 		return false;
1147 	}
1148 }
1149 
1150 #endif /* INSTR_A3XX_H_ */
1151