1 /*
2  * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #ifndef INSTR_A3XX_H_
25 #define INSTR_A3XX_H_
26 
27 #define PACKED __attribute__((__packed__))
28 
29 #include <stdint.h>
30 #include <assert.h>
31 
32 /* size of largest OPC field of all the instruction categories: */
33 #define NOPC_BITS 6
34 
35 #define _OPC(cat, opc)   (((cat) << NOPC_BITS) | opc)
36 
37 typedef enum {
38 	/* category 0: */
39 	OPC_NOP             = _OPC(0, 0),
40 	OPC_BR              = _OPC(0, 1),
41 	OPC_JUMP            = _OPC(0, 2),
42 	OPC_CALL            = _OPC(0, 3),
43 	OPC_RET             = _OPC(0, 4),
44 	OPC_KILL            = _OPC(0, 5),
45 	OPC_END             = _OPC(0, 6),
46 	OPC_EMIT            = _OPC(0, 7),
47 	OPC_CUT             = _OPC(0, 8),
48 	OPC_CHMASK          = _OPC(0, 9),
49 	OPC_CHSH            = _OPC(0, 10),
50 	OPC_FLOW_REV        = _OPC(0, 11),
51 
52 	/* category 1: */
53 	OPC_MOV             = _OPC(1, 0),
54 
55 	/* category 2: */
56 	OPC_ADD_F           = _OPC(2, 0),
57 	OPC_MIN_F           = _OPC(2, 1),
58 	OPC_MAX_F           = _OPC(2, 2),
59 	OPC_MUL_F           = _OPC(2, 3),
60 	OPC_SIGN_F          = _OPC(2, 4),
61 	OPC_CMPS_F          = _OPC(2, 5),
62 	OPC_ABSNEG_F        = _OPC(2, 6),
63 	OPC_CMPV_F          = _OPC(2, 7),
64 	/* 8 - invalid */
65 	OPC_FLOOR_F         = _OPC(2, 9),
66 	OPC_CEIL_F          = _OPC(2, 10),
67 	OPC_RNDNE_F         = _OPC(2, 11),
68 	OPC_RNDAZ_F         = _OPC(2, 12),
69 	OPC_TRUNC_F         = _OPC(2, 13),
70 	/* 14-15 - invalid */
71 	OPC_ADD_U           = _OPC(2, 16),
72 	OPC_ADD_S           = _OPC(2, 17),
73 	OPC_SUB_U           = _OPC(2, 18),
74 	OPC_SUB_S           = _OPC(2, 19),
75 	OPC_CMPS_U          = _OPC(2, 20),
76 	OPC_CMPS_S          = _OPC(2, 21),
77 	OPC_MIN_U           = _OPC(2, 22),
78 	OPC_MIN_S           = _OPC(2, 23),
79 	OPC_MAX_U           = _OPC(2, 24),
80 	OPC_MAX_S           = _OPC(2, 25),
81 	OPC_ABSNEG_S        = _OPC(2, 26),
82 	/* 27 - invalid */
83 	OPC_AND_B           = _OPC(2, 28),
84 	OPC_OR_B            = _OPC(2, 29),
85 	OPC_NOT_B           = _OPC(2, 30),
86 	OPC_XOR_B           = _OPC(2, 31),
87 	/* 32 - invalid */
88 	OPC_CMPV_U          = _OPC(2, 33),
89 	OPC_CMPV_S          = _OPC(2, 34),
90 	/* 35-47 - invalid */
91 	OPC_MUL_U           = _OPC(2, 48),
92 	OPC_MUL_S           = _OPC(2, 49),
93 	OPC_MULL_U          = _OPC(2, 50),
94 	OPC_BFREV_B         = _OPC(2, 51),
95 	OPC_CLZ_S           = _OPC(2, 52),
96 	OPC_CLZ_B           = _OPC(2, 53),
97 	OPC_SHL_B           = _OPC(2, 54),
98 	OPC_SHR_B           = _OPC(2, 55),
99 	OPC_ASHR_B          = _OPC(2, 56),
100 	OPC_BARY_F          = _OPC(2, 57),
101 	OPC_MGEN_B          = _OPC(2, 58),
102 	OPC_GETBIT_B        = _OPC(2, 59),
103 	OPC_SETRM           = _OPC(2, 60),
104 	OPC_CBITS_B         = _OPC(2, 61),
105 	OPC_SHB             = _OPC(2, 62),
106 	OPC_MSAD            = _OPC(2, 63),
107 
108 	/* category 3: */
109 	OPC_MAD_U16         = _OPC(3, 0),
110 	OPC_MADSH_U16       = _OPC(3, 1),
111 	OPC_MAD_S16         = _OPC(3, 2),
112 	OPC_MADSH_M16       = _OPC(3, 3),   /* should this be .s16? */
113 	OPC_MAD_U24         = _OPC(3, 4),
114 	OPC_MAD_S24         = _OPC(3, 5),
115 	OPC_MAD_F16         = _OPC(3, 6),
116 	OPC_MAD_F32         = _OPC(3, 7),
117 	OPC_SEL_B16         = _OPC(3, 8),
118 	OPC_SEL_B32         = _OPC(3, 9),
119 	OPC_SEL_S16         = _OPC(3, 10),
120 	OPC_SEL_S32         = _OPC(3, 11),
121 	OPC_SEL_F16         = _OPC(3, 12),
122 	OPC_SEL_F32         = _OPC(3, 13),
123 	OPC_SAD_S16         = _OPC(3, 14),
124 	OPC_SAD_S32         = _OPC(3, 15),
125 
126 	/* category 4: */
127 	OPC_RCP             = _OPC(4, 0),
128 	OPC_RSQ             = _OPC(4, 1),
129 	OPC_LOG2            = _OPC(4, 2),
130 	OPC_EXP2            = _OPC(4, 3),
131 	OPC_SIN             = _OPC(4, 4),
132 	OPC_COS             = _OPC(4, 5),
133 	OPC_SQRT            = _OPC(4, 6),
134 	// 7-63 - invalid
135 
136 	/* category 5: */
137 	OPC_ISAM            = _OPC(5, 0),
138 	OPC_ISAML           = _OPC(5, 1),
139 	OPC_ISAMM           = _OPC(5, 2),
140 	OPC_SAM             = _OPC(5, 3),
141 	OPC_SAMB            = _OPC(5, 4),
142 	OPC_SAML            = _OPC(5, 5),
143 	OPC_SAMGQ           = _OPC(5, 6),
144 	OPC_GETLOD          = _OPC(5, 7),
145 	OPC_CONV            = _OPC(5, 8),
146 	OPC_CONVM           = _OPC(5, 9),
147 	OPC_GETSIZE         = _OPC(5, 10),
148 	OPC_GETBUF          = _OPC(5, 11),
149 	OPC_GETPOS          = _OPC(5, 12),
150 	OPC_GETINFO         = _OPC(5, 13),
151 	OPC_DSX             = _OPC(5, 14),
152 	OPC_DSY             = _OPC(5, 15),
153 	OPC_GATHER4R        = _OPC(5, 16),
154 	OPC_GATHER4G        = _OPC(5, 17),
155 	OPC_GATHER4B        = _OPC(5, 18),
156 	OPC_GATHER4A        = _OPC(5, 19),
157 	OPC_SAMGP0          = _OPC(5, 20),
158 	OPC_SAMGP1          = _OPC(5, 21),
159 	OPC_SAMGP2          = _OPC(5, 22),
160 	OPC_SAMGP3          = _OPC(5, 23),
161 	OPC_DSXPP_1         = _OPC(5, 24),
162 	OPC_DSYPP_1         = _OPC(5, 25),
163 	OPC_RGETPOS         = _OPC(5, 26),
164 	OPC_RGETINFO        = _OPC(5, 27),
165 
166 	/* category 6: */
167 	OPC_LDG             = _OPC(6, 0),        /* load-global */
168 	OPC_LDL             = _OPC(6, 1),
169 	OPC_LDP             = _OPC(6, 2),
170 	OPC_STG             = _OPC(6, 3),        /* store-global */
171 	OPC_STL             = _OPC(6, 4),
172 	OPC_STP             = _OPC(6, 5),
173 	OPC_STI             = _OPC(6, 6),
174 	OPC_G2L             = _OPC(6, 7),
175 	OPC_L2G             = _OPC(6, 8),
176 	OPC_PREFETCH        = _OPC(6, 9),
177 	OPC_LDLW            = _OPC(6, 10),
178 	OPC_STLW            = _OPC(6, 11),
179 	OPC_RESFMT          = _OPC(6, 14),
180 	OPC_RESINFO         = _OPC(6, 15),
181 	OPC_ATOMIC_ADD      = _OPC(6, 16),
182 	OPC_ATOMIC_SUB      = _OPC(6, 17),
183 	OPC_ATOMIC_XCHG     = _OPC(6, 18),
184 	OPC_ATOMIC_INC      = _OPC(6, 19),
185 	OPC_ATOMIC_DEC      = _OPC(6, 20),
186 	OPC_ATOMIC_CMPXCHG  = _OPC(6, 21),
187 	OPC_ATOMIC_MIN      = _OPC(6, 22),
188 	OPC_ATOMIC_MAX      = _OPC(6, 23),
189 	OPC_ATOMIC_AND      = _OPC(6, 24),
190 	OPC_ATOMIC_OR       = _OPC(6, 25),
191 	OPC_ATOMIC_XOR      = _OPC(6, 26),
192 	OPC_LDGB_TYPED_4D   = _OPC(6, 27),
193 	OPC_STGB_4D_4       = _OPC(6, 28),
194 	OPC_STIB            = _OPC(6, 29),
195 	OPC_LDC_4           = _OPC(6, 30),
196 	OPC_LDLV            = _OPC(6, 31),
197 
198 	/* meta instructions (category -1): */
199 	/* placeholder instr to mark shader inputs: */
200 	OPC_META_INPUT      = _OPC(-1, 0),
201 	OPC_META_PHI        = _OPC(-1, 1),
202 	/* The "fan-in" and "fan-out" instructions are used for keeping
203 	 * track of instructions that write to multiple dst registers
204 	 * (fan-out) like texture sample instructions, or read multiple
205 	 * consecutive scalar registers (fan-in) (bary.f, texture samp)
206 	 */
207 	OPC_META_FO         = _OPC(-1, 2),
208 	OPC_META_FI         = _OPC(-1, 3),
209 
210 } opc_t;
211 
212 #define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
213 #define opc_op(opc)  ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
214 
215 typedef enum {
216 	TYPE_F16 = 0,
217 	TYPE_F32 = 1,
218 	TYPE_U16 = 2,
219 	TYPE_U32 = 3,
220 	TYPE_S16 = 4,
221 	TYPE_S32 = 5,
222 	TYPE_U8  = 6,
223 	TYPE_S8  = 7,  // XXX I assume?
224 } type_t;
225 
type_size(type_t type)226 static inline uint32_t type_size(type_t type)
227 {
228 	switch (type) {
229 	case TYPE_F32:
230 	case TYPE_U32:
231 	case TYPE_S32:
232 		return 32;
233 	case TYPE_F16:
234 	case TYPE_U16:
235 	case TYPE_S16:
236 		return 16;
237 	case TYPE_U8:
238 	case TYPE_S8:
239 		return 8;
240 	default:
241 		assert(0); /* invalid type */
242 		return 0;
243 	}
244 }
245 
type_float(type_t type)246 static inline int type_float(type_t type)
247 {
248 	return (type == TYPE_F32) || (type == TYPE_F16);
249 }
250 
type_uint(type_t type)251 static inline int type_uint(type_t type)
252 {
253 	return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
254 }
255 
type_sint(type_t type)256 static inline int type_sint(type_t type)
257 {
258 	return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
259 }
260 
261 typedef union PACKED {
262 	/* normal gpr or const src register: */
263 	struct PACKED {
264 		uint32_t comp  : 2;
265 		uint32_t num   : 10;
266 	};
267 	/* for immediate val: */
268 	int32_t  iim_val   : 11;
269 	/* to make compiler happy: */
270 	uint32_t dummy32;
271 	uint32_t dummy10   : 10;
272 	int32_t  idummy10  : 10;
273 	uint32_t dummy11   : 11;
274 	uint32_t dummy12   : 12;
275 	uint32_t dummy13   : 13;
276 	uint32_t dummy8    : 8;
277 } reg_t;
278 
279 /* special registers: */
280 #define REG_A0 61       /* address register */
281 #define REG_P0 62       /* predicate register */
282 
reg_special(reg_t reg)283 static inline int reg_special(reg_t reg)
284 {
285 	return (reg.num == REG_A0) || (reg.num == REG_P0);
286 }
287 
288 typedef struct PACKED {
289 	/* dword0: */
290 	union PACKED {
291 		struct PACKED {
292 			int16_t  immed    : 16;
293 			uint32_t dummy1   : 16;
294 		} a3xx;
295 		struct PACKED {
296 			int32_t  immed    : 20;
297 			uint32_t dummy1   : 12;
298 		} a4xx;
299 		struct PACKED {
300 			uint32_t immed    : 32;
301 		} a5xx;
302 	};
303 
304 	/* dword1: */
305 	uint32_t dummy2   : 8;
306 	uint32_t repeat   : 3;
307 	uint32_t dummy3   : 1;
308 	uint32_t ss       : 1;
309 	uint32_t dummy4   : 7;
310 	uint32_t inv      : 1;
311 	uint32_t comp     : 2;
312 	uint32_t opc      : 4;
313 	uint32_t jmp_tgt  : 1;
314 	uint32_t sync     : 1;
315 	uint32_t opc_cat  : 3;
316 } instr_cat0_t;
317 
318 typedef struct PACKED {
319 	/* dword0: */
320 	union PACKED {
321 		/* for normal src register: */
322 		struct PACKED {
323 			uint32_t src : 11;
324 			/* at least low bit of pad must be zero or it will
325 			 * look like a address relative src
326 			 */
327 			uint32_t pad : 21;
328 		};
329 		/* for address relative: */
330 		struct PACKED {
331 			int32_t  off : 10;
332 			uint32_t src_rel_c : 1;
333 			uint32_t src_rel : 1;
334 			uint32_t unknown : 20;
335 		};
336 		/* for immediate: */
337 		int32_t  iim_val;
338 		uint32_t uim_val;
339 		float    fim_val;
340 	};
341 
342 	/* dword1: */
343 	uint32_t dst        : 8;
344 	uint32_t repeat     : 3;
345 	uint32_t src_r      : 1;
346 	uint32_t ss         : 1;
347 	uint32_t ul         : 1;
348 	uint32_t dst_type   : 3;
349 	uint32_t dst_rel    : 1;
350 	uint32_t src_type   : 3;
351 	uint32_t src_c      : 1;
352 	uint32_t src_im     : 1;
353 	uint32_t even       : 1;
354 	uint32_t pos_inf    : 1;
355 	uint32_t must_be_0  : 2;
356 	uint32_t jmp_tgt    : 1;
357 	uint32_t sync       : 1;
358 	uint32_t opc_cat    : 3;
359 } instr_cat1_t;
360 
361 typedef struct PACKED {
362 	/* dword0: */
363 	union PACKED {
364 		struct PACKED {
365 			uint32_t src1         : 11;
366 			uint32_t must_be_zero1: 2;
367 			uint32_t src1_im      : 1;   /* immediate */
368 			uint32_t src1_neg     : 1;   /* negate */
369 			uint32_t src1_abs     : 1;   /* absolute value */
370 		};
371 		struct PACKED {
372 			uint32_t src1         : 10;
373 			uint32_t src1_c       : 1;   /* relative-const */
374 			uint32_t src1_rel     : 1;   /* relative address */
375 			uint32_t must_be_zero : 1;
376 			uint32_t dummy        : 3;
377 		} rel1;
378 		struct PACKED {
379 			uint32_t src1         : 12;
380 			uint32_t src1_c       : 1;   /* const */
381 			uint32_t dummy        : 3;
382 		} c1;
383 	};
384 
385 	union PACKED {
386 		struct PACKED {
387 			uint32_t src2         : 11;
388 			uint32_t must_be_zero2: 2;
389 			uint32_t src2_im      : 1;   /* immediate */
390 			uint32_t src2_neg     : 1;   /* negate */
391 			uint32_t src2_abs     : 1;   /* absolute value */
392 		};
393 		struct PACKED {
394 			uint32_t src2         : 10;
395 			uint32_t src2_c       : 1;   /* relative-const */
396 			uint32_t src2_rel     : 1;   /* relative address */
397 			uint32_t must_be_zero : 1;
398 			uint32_t dummy        : 3;
399 		} rel2;
400 		struct PACKED {
401 			uint32_t src2         : 12;
402 			uint32_t src2_c       : 1;   /* const */
403 			uint32_t dummy        : 3;
404 		} c2;
405 	};
406 
407 	/* dword1: */
408 	uint32_t dst      : 8;
409 	uint32_t repeat   : 3;
410 	uint32_t src1_r   : 1;
411 	uint32_t ss       : 1;
412 	uint32_t ul       : 1;   /* dunno */
413 	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
414 	uint32_t ei       : 1;
415 	uint32_t cond     : 3;
416 	uint32_t src2_r   : 1;
417 	uint32_t full     : 1;   /* not half */
418 	uint32_t opc      : 6;
419 	uint32_t jmp_tgt  : 1;
420 	uint32_t sync     : 1;
421 	uint32_t opc_cat  : 3;
422 } instr_cat2_t;
423 
424 typedef struct PACKED {
425 	/* dword0: */
426 	union PACKED {
427 		struct PACKED {
428 			uint32_t src1         : 11;
429 			uint32_t must_be_zero1: 2;
430 			uint32_t src2_c       : 1;
431 			uint32_t src1_neg     : 1;
432 			uint32_t src2_r       : 1;
433 		};
434 		struct PACKED {
435 			uint32_t src1         : 10;
436 			uint32_t src1_c       : 1;
437 			uint32_t src1_rel     : 1;
438 			uint32_t must_be_zero : 1;
439 			uint32_t dummy        : 3;
440 		} rel1;
441 		struct PACKED {
442 			uint32_t src1         : 12;
443 			uint32_t src1_c       : 1;
444 			uint32_t dummy        : 3;
445 		} c1;
446 	};
447 
448 	union PACKED {
449 		struct PACKED {
450 			uint32_t src3         : 11;
451 			uint32_t must_be_zero2: 2;
452 			uint32_t src3_r       : 1;
453 			uint32_t src2_neg     : 1;
454 			uint32_t src3_neg     : 1;
455 		};
456 		struct PACKED {
457 			uint32_t src3         : 10;
458 			uint32_t src3_c       : 1;
459 			uint32_t src3_rel     : 1;
460 			uint32_t must_be_zero : 1;
461 			uint32_t dummy        : 3;
462 		} rel2;
463 		struct PACKED {
464 			uint32_t src3         : 12;
465 			uint32_t src3_c       : 1;
466 			uint32_t dummy        : 3;
467 		} c2;
468 	};
469 
470 	/* dword1: */
471 	uint32_t dst      : 8;
472 	uint32_t repeat   : 3;
473 	uint32_t src1_r   : 1;
474 	uint32_t ss       : 1;
475 	uint32_t ul       : 1;
476 	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
477 	uint32_t src2     : 8;
478 	uint32_t opc      : 4;
479 	uint32_t jmp_tgt  : 1;
480 	uint32_t sync     : 1;
481 	uint32_t opc_cat  : 3;
482 } instr_cat3_t;
483 
instr_cat3_full(instr_cat3_t * cat3)484 static inline bool instr_cat3_full(instr_cat3_t *cat3)
485 {
486 	switch (_OPC(3, cat3->opc)) {
487 	case OPC_MAD_F16:
488 	case OPC_MAD_U16:
489 	case OPC_MAD_S16:
490 	case OPC_SEL_B16:
491 	case OPC_SEL_S16:
492 	case OPC_SEL_F16:
493 	case OPC_SAD_S16:
494 	case OPC_SAD_S32:  // really??
495 		return false;
496 	default:
497 		return true;
498 	}
499 }
500 
501 typedef struct PACKED {
502 	/* dword0: */
503 	union PACKED {
504 		struct PACKED {
505 			uint32_t src          : 11;
506 			uint32_t must_be_zero1: 2;
507 			uint32_t src_im       : 1;   /* immediate */
508 			uint32_t src_neg      : 1;   /* negate */
509 			uint32_t src_abs      : 1;   /* absolute value */
510 		};
511 		struct PACKED {
512 			uint32_t src          : 10;
513 			uint32_t src_c        : 1;   /* relative-const */
514 			uint32_t src_rel      : 1;   /* relative address */
515 			uint32_t must_be_zero : 1;
516 			uint32_t dummy        : 3;
517 		} rel;
518 		struct PACKED {
519 			uint32_t src          : 12;
520 			uint32_t src_c        : 1;   /* const */
521 			uint32_t dummy        : 3;
522 		} c;
523 	};
524 	uint32_t dummy1   : 16;  /* seem to be ignored */
525 
526 	/* dword1: */
527 	uint32_t dst      : 8;
528 	uint32_t repeat   : 3;
529 	uint32_t src_r    : 1;
530 	uint32_t ss       : 1;
531 	uint32_t ul       : 1;
532 	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
533 	uint32_t dummy2   : 5;   /* seem to be ignored */
534 	uint32_t full     : 1;   /* not half */
535 	uint32_t opc      : 6;
536 	uint32_t jmp_tgt  : 1;
537 	uint32_t sync     : 1;
538 	uint32_t opc_cat  : 3;
539 } instr_cat4_t;
540 
541 typedef struct PACKED {
542 	/* dword0: */
543 	union PACKED {
544 		/* normal case: */
545 		struct PACKED {
546 			uint32_t full     : 1;   /* not half */
547 			uint32_t src1     : 8;
548 			uint32_t src2     : 8;
549 			uint32_t dummy1   : 4;   /* seem to be ignored */
550 			uint32_t samp     : 4;
551 			uint32_t tex      : 7;
552 		} norm;
553 		/* s2en case: */
554 		struct PACKED {
555 			uint32_t full     : 1;   /* not half */
556 			uint32_t src1     : 8;
557 			uint32_t src2     : 11;
558 			uint32_t dummy1   : 1;
559 			uint32_t src3     : 8;
560 			uint32_t dummy2   : 3;
561 		} s2en;
562 		/* same in either case: */
563 		// XXX I think, confirm this
564 		struct PACKED {
565 			uint32_t full     : 1;   /* not half */
566 			uint32_t src1     : 8;
567 			uint32_t pad      : 23;
568 		};
569 	};
570 
571 	/* dword1: */
572 	uint32_t dst      : 8;
573 	uint32_t wrmask   : 4;   /* write-mask */
574 	uint32_t type     : 3;
575 	uint32_t dummy2   : 1;   /* seems to be ignored */
576 	uint32_t is_3d    : 1;
577 
578 	uint32_t is_a     : 1;
579 	uint32_t is_s     : 1;
580 	uint32_t is_s2en  : 1;
581 	uint32_t is_o     : 1;
582 	uint32_t is_p     : 1;
583 
584 	uint32_t opc      : 5;
585 	uint32_t jmp_tgt  : 1;
586 	uint32_t sync     : 1;
587 	uint32_t opc_cat  : 3;
588 } instr_cat5_t;
589 
590 /* dword0 encoding for src_off: [src1 + off], src2: */
591 typedef struct PACKED {
592 	/* dword0: */
593 	uint32_t mustbe1  : 1;
594 	int32_t  off      : 13;
595 	uint32_t src1     : 8;
596 	uint32_t src1_im  : 1;
597 	uint32_t src2_im  : 1;
598 	uint32_t src2     : 8;
599 
600 	/* dword1: */
601 	uint32_t dword1;
602 } instr_cat6a_t;
603 
604 /* dword0 encoding for !src_off: [src1], src2 */
605 typedef struct PACKED {
606 	/* dword0: */
607 	uint32_t mustbe0  : 1;
608 	uint32_t src1     : 13;
609 	uint32_t ignore0  : 8;
610 	uint32_t src1_im  : 1;
611 	uint32_t src2_im  : 1;
612 	uint32_t src2     : 8;
613 
614 	/* dword1: */
615 	uint32_t dword1;
616 } instr_cat6b_t;
617 
618 /* dword1 encoding for dst_off: */
619 typedef struct PACKED {
620 	/* dword0: */
621 	uint32_t dword0;
622 
623 	/* note: there is some weird stuff going on where sometimes
624 	 * cat6->a.off is involved.. but that seems like a bug in
625 	 * the blob, since it is used even if !cat6->src_off
626 	 * It would make sense for there to be some more bits to
627 	 * bring us to 11 bits worth of offset, but not sure..
628 	 */
629 	int32_t off       : 8;
630 	uint32_t mustbe1  : 1;
631 	uint32_t dst      : 8;
632 	uint32_t pad1     : 15;
633 } instr_cat6c_t;
634 
635 /* dword1 encoding for !dst_off: */
636 typedef struct PACKED {
637 	/* dword0: */
638 	uint32_t dword0;
639 
640 	uint32_t dst      : 8;
641 	uint32_t mustbe0  : 1;
642 	uint32_t pad0     : 23;
643 } instr_cat6d_t;
644 
645 /* I think some of the other cat6 instructions use additional
646  * sub-encodings..
647  */
648 
649 typedef union PACKED {
650 	instr_cat6a_t a;
651 	instr_cat6b_t b;
652 	instr_cat6c_t c;
653 	instr_cat6d_t d;
654 	struct PACKED {
655 		/* dword0: */
656 		uint32_t src_off  : 1;
657 		uint32_t pad1     : 31;
658 
659 		/* dword1: */
660 		uint32_t pad2     : 8;
661 		uint32_t dst_off  : 1;
662 		uint32_t pad3     : 8;
663 		uint32_t type     : 3;
664 		uint32_t g        : 1;  /* or in some cases it means dst immed */
665 		uint32_t pad4     : 1;
666 		uint32_t opc      : 5;
667 		uint32_t jmp_tgt  : 1;
668 		uint32_t sync     : 1;
669 		uint32_t opc_cat  : 3;
670 	};
671 } instr_cat6_t;
672 
673 typedef union PACKED {
674 	instr_cat0_t cat0;
675 	instr_cat1_t cat1;
676 	instr_cat2_t cat2;
677 	instr_cat3_t cat3;
678 	instr_cat4_t cat4;
679 	instr_cat5_t cat5;
680 	instr_cat6_t cat6;
681 	struct PACKED {
682 		/* dword0: */
683 		uint64_t pad1     : 40;
684 		uint32_t repeat   : 3;  /* cat0-cat4 */
685 		uint32_t pad2     : 1;
686 		uint32_t ss       : 1;  /* cat1-cat4 (cat0??) */
687 		uint32_t ul       : 1;  /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
688 		uint32_t pad3     : 13;
689 		uint32_t jmp_tgt  : 1;
690 		uint32_t sync     : 1;
691 		uint32_t opc_cat  : 3;
692 
693 	};
694 } instr_t;
695 
instr_opc(instr_t * instr)696 static inline uint32_t instr_opc(instr_t *instr)
697 {
698 	switch (instr->opc_cat) {
699 	case 0:  return instr->cat0.opc;
700 	case 1:  return 0;
701 	case 2:  return instr->cat2.opc;
702 	case 3:  return instr->cat3.opc;
703 	case 4:  return instr->cat4.opc;
704 	case 5:  return instr->cat5.opc;
705 	case 6:  return instr->cat6.opc;
706 	default: return 0;
707 	}
708 }
709 
is_mad(opc_t opc)710 static inline bool is_mad(opc_t opc)
711 {
712 	switch (opc) {
713 	case OPC_MAD_U16:
714 	case OPC_MAD_S16:
715 	case OPC_MAD_U24:
716 	case OPC_MAD_S24:
717 	case OPC_MAD_F16:
718 	case OPC_MAD_F32:
719 		return true;
720 	default:
721 		return false;
722 	}
723 }
724 
is_madsh(opc_t opc)725 static inline bool is_madsh(opc_t opc)
726 {
727 	switch (opc) {
728 	case OPC_MADSH_U16:
729 	case OPC_MADSH_M16:
730 		return true;
731 	default:
732 		return false;
733 	}
734 }
735 
736 #endif /* INSTR_A3XX_H_ */
737