1 /*
2  * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #ifndef INSTR_A3XX_H_
25 #define INSTR_A3XX_H_
26 
27 #define PACKED __attribute__((__packed__))
28 
29 #include <stdint.h>
30 #include <assert.h>
31 
32 /* size of largest OPC field of all the instruction categories: */
33 #define NOPC_BITS 6
34 
35 #define _OPC(cat, opc)   (((cat) << NOPC_BITS) | opc)
36 
37 typedef enum {
38 	/* category 0: */
39 	OPC_NOP             = _OPC(0, 0),
40 	OPC_BR              = _OPC(0, 1),
41 	OPC_JUMP            = _OPC(0, 2),
42 	OPC_CALL            = _OPC(0, 3),
43 	OPC_RET             = _OPC(0, 4),
44 	OPC_KILL            = _OPC(0, 5),
45 	OPC_END             = _OPC(0, 6),
46 	OPC_EMIT            = _OPC(0, 7),
47 	OPC_CUT             = _OPC(0, 8),
48 	OPC_CHMASK          = _OPC(0, 9),
49 	OPC_CHSH            = _OPC(0, 10),
50 	OPC_FLOW_REV        = _OPC(0, 11),
51 
52 	/* category 1: */
53 	OPC_MOV             = _OPC(1, 0),
54 
55 	/* category 2: */
56 	OPC_ADD_F           = _OPC(2, 0),
57 	OPC_MIN_F           = _OPC(2, 1),
58 	OPC_MAX_F           = _OPC(2, 2),
59 	OPC_MUL_F           = _OPC(2, 3),
60 	OPC_SIGN_F          = _OPC(2, 4),
61 	OPC_CMPS_F          = _OPC(2, 5),
62 	OPC_ABSNEG_F        = _OPC(2, 6),
63 	OPC_CMPV_F          = _OPC(2, 7),
64 	/* 8 - invalid */
65 	OPC_FLOOR_F         = _OPC(2, 9),
66 	OPC_CEIL_F          = _OPC(2, 10),
67 	OPC_RNDNE_F         = _OPC(2, 11),
68 	OPC_RNDAZ_F         = _OPC(2, 12),
69 	OPC_TRUNC_F         = _OPC(2, 13),
70 	/* 14-15 - invalid */
71 	OPC_ADD_U           = _OPC(2, 16),
72 	OPC_ADD_S           = _OPC(2, 17),
73 	OPC_SUB_U           = _OPC(2, 18),
74 	OPC_SUB_S           = _OPC(2, 19),
75 	OPC_CMPS_U          = _OPC(2, 20),
76 	OPC_CMPS_S          = _OPC(2, 21),
77 	OPC_MIN_U           = _OPC(2, 22),
78 	OPC_MIN_S           = _OPC(2, 23),
79 	OPC_MAX_U           = _OPC(2, 24),
80 	OPC_MAX_S           = _OPC(2, 25),
81 	OPC_ABSNEG_S        = _OPC(2, 26),
82 	/* 27 - invalid */
83 	OPC_AND_B           = _OPC(2, 28),
84 	OPC_OR_B            = _OPC(2, 29),
85 	OPC_NOT_B           = _OPC(2, 30),
86 	OPC_XOR_B           = _OPC(2, 31),
87 	/* 32 - invalid */
88 	OPC_CMPV_U          = _OPC(2, 33),
89 	OPC_CMPV_S          = _OPC(2, 34),
90 	/* 35-47 - invalid */
91 	OPC_MUL_U           = _OPC(2, 48),
92 	OPC_MUL_S           = _OPC(2, 49),
93 	OPC_MULL_U          = _OPC(2, 50),
94 	OPC_BFREV_B         = _OPC(2, 51),
95 	OPC_CLZ_S           = _OPC(2, 52),
96 	OPC_CLZ_B           = _OPC(2, 53),
97 	OPC_SHL_B           = _OPC(2, 54),
98 	OPC_SHR_B           = _OPC(2, 55),
99 	OPC_ASHR_B          = _OPC(2, 56),
100 	OPC_BARY_F          = _OPC(2, 57),
101 	OPC_MGEN_B          = _OPC(2, 58),
102 	OPC_GETBIT_B        = _OPC(2, 59),
103 	OPC_SETRM           = _OPC(2, 60),
104 	OPC_CBITS_B         = _OPC(2, 61),
105 	OPC_SHB             = _OPC(2, 62),
106 	OPC_MSAD            = _OPC(2, 63),
107 
108 	/* category 3: */
109 	OPC_MAD_U16         = _OPC(3, 0),
110 	OPC_MADSH_U16       = _OPC(3, 1),
111 	OPC_MAD_S16         = _OPC(3, 2),
112 	OPC_MADSH_M16       = _OPC(3, 3),   /* should this be .s16? */
113 	OPC_MAD_U24         = _OPC(3, 4),
114 	OPC_MAD_S24         = _OPC(3, 5),
115 	OPC_MAD_F16         = _OPC(3, 6),
116 	OPC_MAD_F32         = _OPC(3, 7),
117 	OPC_SEL_B16         = _OPC(3, 8),
118 	OPC_SEL_B32         = _OPC(3, 9),
119 	OPC_SEL_S16         = _OPC(3, 10),
120 	OPC_SEL_S32         = _OPC(3, 11),
121 	OPC_SEL_F16         = _OPC(3, 12),
122 	OPC_SEL_F32         = _OPC(3, 13),
123 	OPC_SAD_S16         = _OPC(3, 14),
124 	OPC_SAD_S32         = _OPC(3, 15),
125 
126 	/* category 4: */
127 	OPC_RCP             = _OPC(4, 0),
128 	OPC_RSQ             = _OPC(4, 1),
129 	OPC_LOG2            = _OPC(4, 2),
130 	OPC_EXP2            = _OPC(4, 3),
131 	OPC_SIN             = _OPC(4, 4),
132 	OPC_COS             = _OPC(4, 5),
133 	OPC_SQRT            = _OPC(4, 6),
134 	// 7-63 - invalid
135 
136 	/* category 5: */
137 	OPC_ISAM            = _OPC(5, 0),
138 	OPC_ISAML           = _OPC(5, 1),
139 	OPC_ISAMM           = _OPC(5, 2),
140 	OPC_SAM             = _OPC(5, 3),
141 	OPC_SAMB            = _OPC(5, 4),
142 	OPC_SAML            = _OPC(5, 5),
143 	OPC_SAMGQ           = _OPC(5, 6),
144 	OPC_GETLOD          = _OPC(5, 7),
145 	OPC_CONV            = _OPC(5, 8),
146 	OPC_CONVM           = _OPC(5, 9),
147 	OPC_GETSIZE         = _OPC(5, 10),
148 	OPC_GETBUF          = _OPC(5, 11),
149 	OPC_GETPOS          = _OPC(5, 12),
150 	OPC_GETINFO         = _OPC(5, 13),
151 	OPC_DSX             = _OPC(5, 14),
152 	OPC_DSY             = _OPC(5, 15),
153 	OPC_GATHER4R        = _OPC(5, 16),
154 	OPC_GATHER4G        = _OPC(5, 17),
155 	OPC_GATHER4B        = _OPC(5, 18),
156 	OPC_GATHER4A        = _OPC(5, 19),
157 	OPC_SAMGP0          = _OPC(5, 20),
158 	OPC_SAMGP1          = _OPC(5, 21),
159 	OPC_SAMGP2          = _OPC(5, 22),
160 	OPC_SAMGP3          = _OPC(5, 23),
161 	OPC_DSXPP_1         = _OPC(5, 24),
162 	OPC_DSYPP_1         = _OPC(5, 25),
163 	OPC_RGETPOS         = _OPC(5, 26),
164 	OPC_RGETINFO        = _OPC(5, 27),
165 
166 	/* category 6: */
167 	OPC_LDG             = _OPC(6, 0),        /* load-global */
168 	OPC_LDL             = _OPC(6, 1),
169 	OPC_LDP             = _OPC(6, 2),
170 	OPC_STG             = _OPC(6, 3),        /* store-global */
171 	OPC_STL             = _OPC(6, 4),
172 	OPC_STP             = _OPC(6, 5),
173 	OPC_STI             = _OPC(6, 6),
174 	OPC_G2L             = _OPC(6, 7),
175 	OPC_L2G             = _OPC(6, 8),
176 	OPC_PREFETCH        = _OPC(6, 9),
177 	OPC_LDLW            = _OPC(6, 10),
178 	OPC_STLW            = _OPC(6, 11),
179 	OPC_RESFMT          = _OPC(6, 14),
180 	OPC_RESINFO         = _OPC(6, 15),
181 	OPC_ATOMIC_ADD      = _OPC(6, 16),
182 	OPC_ATOMIC_SUB      = _OPC(6, 17),
183 	OPC_ATOMIC_XCHG     = _OPC(6, 18),
184 	OPC_ATOMIC_INC      = _OPC(6, 19),
185 	OPC_ATOMIC_DEC      = _OPC(6, 20),
186 	OPC_ATOMIC_CMPXCHG  = _OPC(6, 21),
187 	OPC_ATOMIC_MIN      = _OPC(6, 22),
188 	OPC_ATOMIC_MAX      = _OPC(6, 23),
189 	OPC_ATOMIC_AND      = _OPC(6, 24),
190 	OPC_ATOMIC_OR       = _OPC(6, 25),
191 	OPC_ATOMIC_XOR      = _OPC(6, 26),
192 	OPC_LDGB            = _OPC(6, 27),
193 	OPC_STGB            = _OPC(6, 28),
194 	OPC_STIB            = _OPC(6, 29),
195 	OPC_LDC             = _OPC(6, 30),
196 	OPC_LDLV            = _OPC(6, 31),
197 
198 	/* category 7: */
199 	OPC_BAR             = _OPC(7, 0),
200 	OPC_FENCE           = _OPC(7, 1),
201 
202 	/* meta instructions (category -1): */
203 	/* placeholder instr to mark shader inputs: */
204 	OPC_META_INPUT      = _OPC(-1, 0),
205 	OPC_META_PHI        = _OPC(-1, 1),
206 	/* The "fan-in" and "fan-out" instructions are used for keeping
207 	 * track of instructions that write to multiple dst registers
208 	 * (fan-out) like texture sample instructions, or read multiple
209 	 * consecutive scalar registers (fan-in) (bary.f, texture samp)
210 	 */
211 	OPC_META_FO         = _OPC(-1, 2),
212 	OPC_META_FI         = _OPC(-1, 3),
213 
214 } opc_t;
215 
216 #define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
217 #define opc_op(opc)  ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
218 
219 typedef enum {
220 	TYPE_F16 = 0,
221 	TYPE_F32 = 1,
222 	TYPE_U16 = 2,
223 	TYPE_U32 = 3,
224 	TYPE_S16 = 4,
225 	TYPE_S32 = 5,
226 	TYPE_U8  = 6,
227 	TYPE_S8  = 7,  // XXX I assume?
228 } type_t;
229 
type_size(type_t type)230 static inline uint32_t type_size(type_t type)
231 {
232 	switch (type) {
233 	case TYPE_F32:
234 	case TYPE_U32:
235 	case TYPE_S32:
236 		return 32;
237 	case TYPE_F16:
238 	case TYPE_U16:
239 	case TYPE_S16:
240 		return 16;
241 	case TYPE_U8:
242 	case TYPE_S8:
243 		return 8;
244 	default:
245 		assert(0); /* invalid type */
246 		return 0;
247 	}
248 }
249 
type_float(type_t type)250 static inline int type_float(type_t type)
251 {
252 	return (type == TYPE_F32) || (type == TYPE_F16);
253 }
254 
type_uint(type_t type)255 static inline int type_uint(type_t type)
256 {
257 	return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
258 }
259 
type_sint(type_t type)260 static inline int type_sint(type_t type)
261 {
262 	return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
263 }
264 
265 typedef union PACKED {
266 	/* normal gpr or const src register: */
267 	struct PACKED {
268 		uint32_t comp  : 2;
269 		uint32_t num   : 10;
270 	};
271 	/* for immediate val: */
272 	int32_t  iim_val   : 11;
273 	/* to make compiler happy: */
274 	uint32_t dummy32;
275 	uint32_t dummy10   : 10;
276 	int32_t  idummy10  : 10;
277 	uint32_t dummy11   : 11;
278 	uint32_t dummy12   : 12;
279 	uint32_t dummy13   : 13;
280 	uint32_t dummy8    : 8;
281 } reg_t;
282 
283 /* special registers: */
284 #define REG_A0 61       /* address register */
285 #define REG_P0 62       /* predicate register */
286 
reg_special(reg_t reg)287 static inline int reg_special(reg_t reg)
288 {
289 	return (reg.num == REG_A0) || (reg.num == REG_P0);
290 }
291 
292 typedef struct PACKED {
293 	/* dword0: */
294 	union PACKED {
295 		struct PACKED {
296 			int16_t  immed    : 16;
297 			uint32_t dummy1   : 16;
298 		} a3xx;
299 		struct PACKED {
300 			int32_t  immed    : 20;
301 			uint32_t dummy1   : 12;
302 		} a4xx;
303 		struct PACKED {
304 			uint32_t immed    : 32;
305 		} a5xx;
306 	};
307 
308 	/* dword1: */
309 	uint32_t dummy2   : 8;
310 	uint32_t repeat   : 3;
311 	uint32_t dummy3   : 1;
312 	uint32_t ss       : 1;
313 	uint32_t dummy4   : 7;
314 	uint32_t inv      : 1;
315 	uint32_t comp     : 2;
316 	uint32_t opc      : 4;
317 	uint32_t jmp_tgt  : 1;
318 	uint32_t sync     : 1;
319 	uint32_t opc_cat  : 3;
320 } instr_cat0_t;
321 
322 typedef struct PACKED {
323 	/* dword0: */
324 	union PACKED {
325 		/* for normal src register: */
326 		struct PACKED {
327 			uint32_t src : 11;
328 			/* at least low bit of pad must be zero or it will
329 			 * look like a address relative src
330 			 */
331 			uint32_t pad : 21;
332 		};
333 		/* for address relative: */
334 		struct PACKED {
335 			int32_t  off : 10;
336 			uint32_t src_rel_c : 1;
337 			uint32_t src_rel : 1;
338 			uint32_t unknown : 20;
339 		};
340 		/* for immediate: */
341 		int32_t  iim_val;
342 		uint32_t uim_val;
343 		float    fim_val;
344 	};
345 
346 	/* dword1: */
347 	uint32_t dst        : 8;
348 	uint32_t repeat     : 3;
349 	uint32_t src_r      : 1;
350 	uint32_t ss         : 1;
351 	uint32_t ul         : 1;
352 	uint32_t dst_type   : 3;
353 	uint32_t dst_rel    : 1;
354 	uint32_t src_type   : 3;
355 	uint32_t src_c      : 1;
356 	uint32_t src_im     : 1;
357 	uint32_t even       : 1;
358 	uint32_t pos_inf    : 1;
359 	uint32_t must_be_0  : 2;
360 	uint32_t jmp_tgt    : 1;
361 	uint32_t sync       : 1;
362 	uint32_t opc_cat    : 3;
363 } instr_cat1_t;
364 
365 typedef struct PACKED {
366 	/* dword0: */
367 	union PACKED {
368 		struct PACKED {
369 			uint32_t src1         : 11;
370 			uint32_t must_be_zero1: 2;
371 			uint32_t src1_im      : 1;   /* immediate */
372 			uint32_t src1_neg     : 1;   /* negate */
373 			uint32_t src1_abs     : 1;   /* absolute value */
374 		};
375 		struct PACKED {
376 			uint32_t src1         : 10;
377 			uint32_t src1_c       : 1;   /* relative-const */
378 			uint32_t src1_rel     : 1;   /* relative address */
379 			uint32_t must_be_zero : 1;
380 			uint32_t dummy        : 3;
381 		} rel1;
382 		struct PACKED {
383 			uint32_t src1         : 12;
384 			uint32_t src1_c       : 1;   /* const */
385 			uint32_t dummy        : 3;
386 		} c1;
387 	};
388 
389 	union PACKED {
390 		struct PACKED {
391 			uint32_t src2         : 11;
392 			uint32_t must_be_zero2: 2;
393 			uint32_t src2_im      : 1;   /* immediate */
394 			uint32_t src2_neg     : 1;   /* negate */
395 			uint32_t src2_abs     : 1;   /* absolute value */
396 		};
397 		struct PACKED {
398 			uint32_t src2         : 10;
399 			uint32_t src2_c       : 1;   /* relative-const */
400 			uint32_t src2_rel     : 1;   /* relative address */
401 			uint32_t must_be_zero : 1;
402 			uint32_t dummy        : 3;
403 		} rel2;
404 		struct PACKED {
405 			uint32_t src2         : 12;
406 			uint32_t src2_c       : 1;   /* const */
407 			uint32_t dummy        : 3;
408 		} c2;
409 	};
410 
411 	/* dword1: */
412 	uint32_t dst      : 8;
413 	uint32_t repeat   : 3;
414 	uint32_t src1_r   : 1;
415 	uint32_t ss       : 1;
416 	uint32_t ul       : 1;   /* dunno */
417 	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
418 	uint32_t ei       : 1;
419 	uint32_t cond     : 3;
420 	uint32_t src2_r   : 1;
421 	uint32_t full     : 1;   /* not half */
422 	uint32_t opc      : 6;
423 	uint32_t jmp_tgt  : 1;
424 	uint32_t sync     : 1;
425 	uint32_t opc_cat  : 3;
426 } instr_cat2_t;
427 
428 typedef struct PACKED {
429 	/* dword0: */
430 	union PACKED {
431 		struct PACKED {
432 			uint32_t src1         : 11;
433 			uint32_t must_be_zero1: 2;
434 			uint32_t src2_c       : 1;
435 			uint32_t src1_neg     : 1;
436 			uint32_t src2_r       : 1;
437 		};
438 		struct PACKED {
439 			uint32_t src1         : 10;
440 			uint32_t src1_c       : 1;
441 			uint32_t src1_rel     : 1;
442 			uint32_t must_be_zero : 1;
443 			uint32_t dummy        : 3;
444 		} rel1;
445 		struct PACKED {
446 			uint32_t src1         : 12;
447 			uint32_t src1_c       : 1;
448 			uint32_t dummy        : 3;
449 		} c1;
450 	};
451 
452 	union PACKED {
453 		struct PACKED {
454 			uint32_t src3         : 11;
455 			uint32_t must_be_zero2: 2;
456 			uint32_t src3_r       : 1;
457 			uint32_t src2_neg     : 1;
458 			uint32_t src3_neg     : 1;
459 		};
460 		struct PACKED {
461 			uint32_t src3         : 10;
462 			uint32_t src3_c       : 1;
463 			uint32_t src3_rel     : 1;
464 			uint32_t must_be_zero : 1;
465 			uint32_t dummy        : 3;
466 		} rel2;
467 		struct PACKED {
468 			uint32_t src3         : 12;
469 			uint32_t src3_c       : 1;
470 			uint32_t dummy        : 3;
471 		} c2;
472 	};
473 
474 	/* dword1: */
475 	uint32_t dst      : 8;
476 	uint32_t repeat   : 3;
477 	uint32_t src1_r   : 1;
478 	uint32_t ss       : 1;
479 	uint32_t ul       : 1;
480 	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
481 	uint32_t src2     : 8;
482 	uint32_t opc      : 4;
483 	uint32_t jmp_tgt  : 1;
484 	uint32_t sync     : 1;
485 	uint32_t opc_cat  : 3;
486 } instr_cat3_t;
487 
instr_cat3_full(instr_cat3_t * cat3)488 static inline bool instr_cat3_full(instr_cat3_t *cat3)
489 {
490 	switch (_OPC(3, cat3->opc)) {
491 	case OPC_MAD_F16:
492 	case OPC_MAD_U16:
493 	case OPC_MAD_S16:
494 	case OPC_SEL_B16:
495 	case OPC_SEL_S16:
496 	case OPC_SEL_F16:
497 	case OPC_SAD_S16:
498 	case OPC_SAD_S32:  // really??
499 		return false;
500 	default:
501 		return true;
502 	}
503 }
504 
505 typedef struct PACKED {
506 	/* dword0: */
507 	union PACKED {
508 		struct PACKED {
509 			uint32_t src          : 11;
510 			uint32_t must_be_zero1: 2;
511 			uint32_t src_im       : 1;   /* immediate */
512 			uint32_t src_neg      : 1;   /* negate */
513 			uint32_t src_abs      : 1;   /* absolute value */
514 		};
515 		struct PACKED {
516 			uint32_t src          : 10;
517 			uint32_t src_c        : 1;   /* relative-const */
518 			uint32_t src_rel      : 1;   /* relative address */
519 			uint32_t must_be_zero : 1;
520 			uint32_t dummy        : 3;
521 		} rel;
522 		struct PACKED {
523 			uint32_t src          : 12;
524 			uint32_t src_c        : 1;   /* const */
525 			uint32_t dummy        : 3;
526 		} c;
527 	};
528 	uint32_t dummy1   : 16;  /* seem to be ignored */
529 
530 	/* dword1: */
531 	uint32_t dst      : 8;
532 	uint32_t repeat   : 3;
533 	uint32_t src_r    : 1;
534 	uint32_t ss       : 1;
535 	uint32_t ul       : 1;
536 	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
537 	uint32_t dummy2   : 5;   /* seem to be ignored */
538 	uint32_t full     : 1;   /* not half */
539 	uint32_t opc      : 6;
540 	uint32_t jmp_tgt  : 1;
541 	uint32_t sync     : 1;
542 	uint32_t opc_cat  : 3;
543 } instr_cat4_t;
544 
545 typedef struct PACKED {
546 	/* dword0: */
547 	union PACKED {
548 		/* normal case: */
549 		struct PACKED {
550 			uint32_t full     : 1;   /* not half */
551 			uint32_t src1     : 8;
552 			uint32_t src2     : 8;
553 			uint32_t dummy1   : 4;   /* seem to be ignored */
554 			uint32_t samp     : 4;
555 			uint32_t tex      : 7;
556 		} norm;
557 		/* s2en case: */
558 		struct PACKED {
559 			uint32_t full     : 1;   /* not half */
560 			uint32_t src1     : 8;
561 			uint32_t src2     : 11;
562 			uint32_t dummy1   : 1;
563 			uint32_t src3     : 8;
564 			uint32_t dummy2   : 3;
565 		} s2en;
566 		/* same in either case: */
567 		// XXX I think, confirm this
568 		struct PACKED {
569 			uint32_t full     : 1;   /* not half */
570 			uint32_t src1     : 8;
571 			uint32_t pad      : 23;
572 		};
573 	};
574 
575 	/* dword1: */
576 	uint32_t dst      : 8;
577 	uint32_t wrmask   : 4;   /* write-mask */
578 	uint32_t type     : 3;
579 	uint32_t dummy2   : 1;   /* seems to be ignored */
580 	uint32_t is_3d    : 1;
581 
582 	uint32_t is_a     : 1;
583 	uint32_t is_s     : 1;
584 	uint32_t is_s2en  : 1;
585 	uint32_t is_o     : 1;
586 	uint32_t is_p     : 1;
587 
588 	uint32_t opc      : 5;
589 	uint32_t jmp_tgt  : 1;
590 	uint32_t sync     : 1;
591 	uint32_t opc_cat  : 3;
592 } instr_cat5_t;
593 
594 /* dword0 encoding for src_off: [src1 + off], src2: */
595 typedef struct PACKED {
596 	/* dword0: */
597 	uint32_t mustbe1  : 1;
598 	int32_t  off      : 13;
599 	uint32_t src1     : 8;
600 	uint32_t src1_im  : 1;
601 	uint32_t src2_im  : 1;
602 	uint32_t src2     : 8;
603 
604 	/* dword1: */
605 	uint32_t dword1;
606 } instr_cat6a_t;
607 
608 /* dword0 encoding for !src_off: [src1], src2 */
609 typedef struct PACKED {
610 	/* dword0: */
611 	uint32_t mustbe0  : 1;
612 	uint32_t src1     : 13;
613 	uint32_t ignore0  : 8;
614 	uint32_t src1_im  : 1;
615 	uint32_t src2_im  : 1;
616 	uint32_t src2     : 8;
617 
618 	/* dword1: */
619 	uint32_t dword1;
620 } instr_cat6b_t;
621 
622 /* dword1 encoding for dst_off: */
623 typedef struct PACKED {
624 	/* dword0: */
625 	uint32_t dword0;
626 
627 	/* note: there is some weird stuff going on where sometimes
628 	 * cat6->a.off is involved.. but that seems like a bug in
629 	 * the blob, since it is used even if !cat6->src_off
630 	 * It would make sense for there to be some more bits to
631 	 * bring us to 11 bits worth of offset, but not sure..
632 	 */
633 	int32_t off       : 8;
634 	uint32_t mustbe1  : 1;
635 	uint32_t dst      : 8;
636 	uint32_t pad1     : 15;
637 } instr_cat6c_t;
638 
639 /* dword1 encoding for !dst_off: */
640 typedef struct PACKED {
641 	/* dword0: */
642 	uint32_t dword0;
643 
644 	uint32_t dst      : 8;
645 	uint32_t mustbe0  : 1;
646 	uint32_t idx      : 8;
647 	uint32_t pad0     : 15;
648 } instr_cat6d_t;
649 
650 /* ldgb and atomics..
651  *
652  * ldgb:      pad0=0, pad3=1
653  * atomic .g: pad0=1, pad3=1
654  *        .l: pad0=1, pad3=0
655  */
656 typedef struct PACKED {
657 	/* dword0: */
658 	uint32_t pad0     : 1;
659 	uint32_t src3     : 8;
660 	uint32_t d        : 2;
661 	uint32_t typed    : 1;
662 	uint32_t type_size : 2;
663 	uint32_t src1     : 8;
664 	uint32_t src1_im  : 1;
665 	uint32_t src2_im  : 1;
666 	uint32_t src2     : 8;
667 
668 	/* dword1: */
669 	uint32_t dst      : 8;
670 	uint32_t mustbe0  : 1;
671 	uint32_t src_ssbo : 8;
672 	uint32_t pad2     : 3;  // type
673 	uint32_t g        : 1;
674 	uint32_t pad3     : 1;
675 	uint32_t pad4     : 10; // opc/jmp_tgt/sync/opc_cat
676 } instr_cat6ldgb_t;
677 
678 /* stgb, pad0=0, pad3=2
679  */
680 typedef struct PACKED {
681 	/* dword0: */
682 	uint32_t mustbe1  : 1;  // ???
683 	uint32_t src1     : 8;
684 	uint32_t d        : 2;
685 	uint32_t typed    : 1;
686 	uint32_t type_size : 2;
687 	uint32_t pad0     : 9;
688 	uint32_t src2_im  : 1;
689 	uint32_t src2     : 8;
690 
691 	/* dword1: */
692 	uint32_t src3     : 8;
693 	uint32_t src3_im  : 1;
694 	uint32_t dst_ssbo : 8;
695 	uint32_t pad2     : 3;  // type
696 	uint32_t pad3     : 2;
697 	uint32_t pad4     : 10; // opc/jmp_tgt/sync/opc_cat
698 } instr_cat6stgb_t;
699 
700 typedef union PACKED {
701 	instr_cat6a_t a;
702 	instr_cat6b_t b;
703 	instr_cat6c_t c;
704 	instr_cat6d_t d;
705 	instr_cat6ldgb_t ldgb;
706 	instr_cat6stgb_t stgb;
707 	struct PACKED {
708 		/* dword0: */
709 		uint32_t src_off  : 1;
710 		uint32_t pad1     : 31;
711 
712 		/* dword1: */
713 		uint32_t pad2     : 8;
714 		uint32_t dst_off  : 1;
715 		uint32_t pad3     : 8;
716 		uint32_t type     : 3;
717 		uint32_t g        : 1;  /* or in some cases it means dst immed */
718 		uint32_t pad4     : 1;
719 		uint32_t opc      : 5;
720 		uint32_t jmp_tgt  : 1;
721 		uint32_t sync     : 1;
722 		uint32_t opc_cat  : 3;
723 	};
724 } instr_cat6_t;
725 
726 typedef struct PACKED {
727 	/* dword0: */
728 	uint32_t pad1     : 32;
729 
730 	/* dword1: */
731 	uint32_t pad2     : 12;
732 	uint32_t ss       : 1;  /* maybe in the encoding, but blob only uses (sy) */
733 	uint32_t pad3     : 6;
734 	uint32_t w        : 1;  /* write */
735 	uint32_t r        : 1;  /* read */
736 	uint32_t l        : 1;  /* local */
737 	uint32_t g        : 1;  /* global */
738 	uint32_t opc      : 4;  /* presumed, but only a couple known OPCs */
739 	uint32_t jmp_tgt  : 1;  /* (jp) */
740 	uint32_t sync     : 1;  /* (sy) */
741 	uint32_t opc_cat  : 3;
742 } instr_cat7_t;
743 
744 typedef union PACKED {
745 	instr_cat0_t cat0;
746 	instr_cat1_t cat1;
747 	instr_cat2_t cat2;
748 	instr_cat3_t cat3;
749 	instr_cat4_t cat4;
750 	instr_cat5_t cat5;
751 	instr_cat6_t cat6;
752 	instr_cat7_t cat7;
753 	struct PACKED {
754 		/* dword0: */
755 		uint64_t pad1     : 40;
756 		uint32_t repeat   : 3;  /* cat0-cat4 */
757 		uint32_t pad2     : 1;
758 		uint32_t ss       : 1;  /* cat1-cat4 (cat0??) and cat7 (?) */
759 		uint32_t ul       : 1;  /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
760 		uint32_t pad3     : 13;
761 		uint32_t jmp_tgt  : 1;
762 		uint32_t sync     : 1;
763 		uint32_t opc_cat  : 3;
764 
765 	};
766 } instr_t;
767 
instr_opc(instr_t * instr)768 static inline uint32_t instr_opc(instr_t *instr)
769 {
770 	switch (instr->opc_cat) {
771 	case 0:  return instr->cat0.opc;
772 	case 1:  return 0;
773 	case 2:  return instr->cat2.opc;
774 	case 3:  return instr->cat3.opc;
775 	case 4:  return instr->cat4.opc;
776 	case 5:  return instr->cat5.opc;
777 	case 6:  return instr->cat6.opc;
778 	case 7:  return instr->cat7.opc;
779 	default: return 0;
780 	}
781 }
782 
is_mad(opc_t opc)783 static inline bool is_mad(opc_t opc)
784 {
785 	switch (opc) {
786 	case OPC_MAD_U16:
787 	case OPC_MAD_S16:
788 	case OPC_MAD_U24:
789 	case OPC_MAD_S24:
790 	case OPC_MAD_F16:
791 	case OPC_MAD_F32:
792 		return true;
793 	default:
794 		return false;
795 	}
796 }
797 
is_madsh(opc_t opc)798 static inline bool is_madsh(opc_t opc)
799 {
800 	switch (opc) {
801 	case OPC_MADSH_U16:
802 	case OPC_MADSH_M16:
803 		return true;
804 	default:
805 		return false;
806 	}
807 }
808 
is_atomic(opc_t opc)809 static inline bool is_atomic(opc_t opc)
810 {
811 	switch (opc) {
812 	case OPC_ATOMIC_ADD:
813 	case OPC_ATOMIC_SUB:
814 	case OPC_ATOMIC_XCHG:
815 	case OPC_ATOMIC_INC:
816 	case OPC_ATOMIC_DEC:
817 	case OPC_ATOMIC_CMPXCHG:
818 	case OPC_ATOMIC_MIN:
819 	case OPC_ATOMIC_MAX:
820 	case OPC_ATOMIC_AND:
821 	case OPC_ATOMIC_OR:
822 	case OPC_ATOMIC_XOR:
823 		return true;
824 	default:
825 		return false;
826 	}
827 }
828 
is_ssbo(opc_t opc)829 static inline bool is_ssbo(opc_t opc)
830 {
831 	switch (opc) {
832 	case OPC_RESFMT:
833 	case OPC_RESINFO:
834 	case OPC_LDGB:
835 	case OPC_STGB:
836 	case OPC_STIB:
837 		return true;
838 	default:
839 		return false;
840 	}
841 }
842 
843 #endif /* INSTR_A3XX_H_ */
844