1 /*
2  * Southern Islands Register documentation
3  *
4  * Copyright (C) 2011  Advanced Micro Devices, Inc.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
20  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #ifndef SID_H
25 #define SID_H
26 
27 #include "amdgfxregs.h"
28 
29 /* si values */
30 #define SI_CONFIG_REG_OFFSET       0x00008000
31 #define SI_CONFIG_REG_END          0x0000B000
32 #define SI_SH_REG_OFFSET           0x0000B000
33 #define SI_SH_REG_END              0x0000C000
34 #define SI_CONTEXT_REG_OFFSET      0x00028000
35 #define SI_CONTEXT_REG_END         0x00030000
36 #define CIK_UCONFIG_REG_OFFSET     0x00030000
37 #define CIK_UCONFIG_REG_END        0x00040000
38 #define SI_UCONFIG_PERF_REG_OFFSET 0x00034000
39 #define SI_UCONFIG_PERF_REG_END    0x00038000
40 
41 /* For register shadowing: */
42 #define SI_SH_REG_SPACE_SIZE           (SI_SH_REG_END - SI_SH_REG_OFFSET)
43 #define SI_CONTEXT_REG_SPACE_SIZE      (SI_CONTEXT_REG_END - SI_CONTEXT_REG_OFFSET)
44 #define SI_UCONFIG_REG_SPACE_SIZE      (CIK_UCONFIG_REG_END - CIK_UCONFIG_REG_OFFSET)
45 #define SI_UCONFIG_PERF_REG_SPACE_SIZE (SI_UCONFIG_PERF_REG_END - SI_UCONFIG_PERF_REG_OFFSET)
46 
47 #define SI_SHADOWED_SH_REG_OFFSET      0
48 #define SI_SHADOWED_CONTEXT_REG_OFFSET SI_SH_REG_SPACE_SIZE
49 #define SI_SHADOWED_UCONFIG_REG_OFFSET (SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE)
50 #define SI_SHADOWED_REG_BUFFER_SIZE                                                                \
51    (SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE + SI_UCONFIG_REG_SPACE_SIZE)
52 
53 #define EVENT_TYPE_CACHE_FLUSH                  0x6
54 #define EVENT_TYPE_PS_PARTIAL_FLUSH             0x10
55 #define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14
56 #define EVENT_TYPE_ZPASS_DONE                   0x15
57 #define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT    0x16
58 #define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH        0x1f
59 #define EVENT_TYPE_SAMPLE_STREAMOUTSTATS        0x20
60 #define EVENT_TYPE(x)                           ((x) << 0)
61 #define EVENT_INDEX(x)                          ((x) << 8)
62 /* 0 - any non-TS event
63  * 1 - ZPASS_DONE
64  * 2 - SAMPLE_PIPELINESTAT
65  * 3 - SAMPLE_STREAMOUTSTAT*
66  * 4 - *S_PARTIAL_FLUSH
67  * 5 - TS events
68  */
69 
70 /* EVENT_WRITE_EOP (SI-VI) & RELEASE_MEM (GFX9) */
71 #define EVENT_TCL1_VOL_ACTION_ENA (1 << 12)
72 #define EVENT_TC_VOL_ACTION_ENA   (1 << 13)
73 #define EVENT_TC_WB_ACTION_ENA    (1 << 15)
74 #define EVENT_TCL1_ACTION_ENA     (1 << 16)
75 #define EVENT_TC_ACTION_ENA       (1 << 17)
76 #define EVENT_TC_NC_ACTION_ENA    (1 << 19) /* GFX9+ */
77 #define EVENT_TC_WC_ACTION_ENA    (1 << 20) /* GFX9+ */
78 #define EVENT_TC_MD_ACTION_ENA    (1 << 21) /* GFX9+ */
79 
80 #define PREDICATION_OP_CLEAR     0x0
81 #define PREDICATION_OP_ZPASS     0x1
82 #define PREDICATION_OP_PRIMCOUNT 0x2
83 #define PREDICATION_OP_BOOL64    0x3
84 
85 #define PRED_OP(x) ((x) << 16)
86 
87 #define PREDICATION_CONTINUE (1 << 31)
88 
89 #define PREDICATION_HINT_WAIT        (0 << 12)
90 #define PREDICATION_HINT_NOWAIT_DRAW (1 << 12)
91 
92 #define PREDICATION_DRAW_NOT_VISIBLE (0 << 8)
93 #define PREDICATION_DRAW_VISIBLE     (1 << 8)
94 
95 #define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7
96 
97 /* All registers defined in this packet section don't exist and the only
98  * purpose of these definitions is to define packet encoding that
99  * the IB parser understands, and also to have an accurate documentation.
100  */
101 #define PKT3_NOP                            0x10
102 #define PKT3_SET_BASE                       0x11
103 #define PKT3_CLEAR_STATE                    0x12
104 #define PKT3_INDEX_BUFFER_SIZE              0x13
105 #define PKT3_DISPATCH_DIRECT                0x15
106 #define PKT3_DISPATCH_INDIRECT              0x16
107 #define PKT3_OCCLUSION_QUERY                0x1F /* new for CIK */
108 #define PKT3_SET_PREDICATION                0x20
109 #define PKT3_COND_EXEC                      0x22
110 #define PKT3_PRED_EXEC                      0x23
111 #define PKT3_DRAW_INDIRECT                  0x24
112 #define PKT3_DRAW_INDEX_INDIRECT            0x25
113 #define PKT3_INDEX_BASE                     0x26
114 #define PKT3_DRAW_INDEX_2                   0x27
115 #define PKT3_CONTEXT_CONTROL                0x28
116 #define CC0_LOAD_GLOBAL_CONFIG(x)           (((unsigned)(x)&0x1) << 0)
117 #define CC0_LOAD_PER_CONTEXT_STATE(x)       (((unsigned)(x)&0x1) << 1)
118 #define CC0_LOAD_GLOBAL_UCONFIG(x)          (((unsigned)(x)&0x1) << 15)
119 #define CC0_LOAD_GFX_SH_REGS(x)             (((unsigned)(x)&0x1) << 16)
120 #define CC0_LOAD_CS_SH_REGS(x)              (((unsigned)(x)&0x1) << 24)
121 #define CC0_LOAD_CE_RAM(x)                  (((unsigned)(x)&0x1) << 28)
122 #define CC0_UPDATE_LOAD_ENABLES(x)          (((unsigned)(x)&0x1) << 31)
123 #define CC1_SHADOW_GLOBAL_CONFIG(x)         (((unsigned)(x)&0x1) << 0)
124 #define CC1_SHADOW_PER_CONTEXT_STATE(x)     (((unsigned)(x)&0x1) << 1)
125 #define CC1_SHADOW_GLOBAL_UCONFIG(x)        (((unsigned)(x)&0x1) << 15)
126 #define CC1_SHADOW_GFX_SH_REGS(x)           (((unsigned)(x)&0x1) << 16)
127 #define CC1_SHADOW_CS_SH_REGS(x)            (((unsigned)(x)&0x1) << 24)
128 #define CC1_UPDATE_SHADOW_ENABLES(x)        (((unsigned)(x)&0x1) << 31)
129 #define PKT3_INDEX_TYPE                     0x2A /* not on GFX9 */
130 #define PKT3_DRAW_INDIRECT_MULTI            0x2C
131 #define R_2C3_DRAW_INDEX_LOC                0x2C3
132 #define S_2C3_COUNT_INDIRECT_ENABLE(x)      (((unsigned)(x)&0x1) << 30)
133 #define S_2C3_DRAW_INDEX_ENABLE(x)          (((unsigned)(x)&0x1) << 31)
134 #define PKT3_DRAW_INDEX_AUTO                0x2D
135 #define PKT3_DRAW_INDEX_IMMD                0x2E /* not on CIK */
136 #define PKT3_NUM_INSTANCES                  0x2F
137 #define PKT3_DRAW_INDEX_MULTI_AUTO          0x30
138 #define PKT3_INDIRECT_BUFFER_SI             0x32 /* not on CIK */
139 #define PKT3_INDIRECT_BUFFER_CONST          0x33
140 #define PKT3_STRMOUT_BUFFER_UPDATE          0x34
141 #define STRMOUT_STORE_BUFFER_FILLED_SIZE    1
142 #define STRMOUT_OFFSET_SOURCE(x)            (((unsigned)(x)&0x3) << 1)
143 #define STRMOUT_OFFSET_FROM_PACKET          0
144 #define STRMOUT_OFFSET_FROM_VGT_FILLED_SIZE 1
145 #define STRMOUT_OFFSET_FROM_MEM             2
146 #define STRMOUT_OFFSET_NONE                 3
147 #define STRMOUT_DATA_TYPE(x)                (((unsigned)(x)&0x1) << 7)
148 #define STRMOUT_SELECT_BUFFER(x)            (((unsigned)(x)&0x3) << 8)
149 #define PKT3_DRAW_INDEX_OFFSET_2            0x35
150 #define PKT3_WRITE_DATA                     0x37
151 #define PKT3_DRAW_INDEX_INDIRECT_MULTI      0x38
152 #define PKT3_MEM_SEMAPHORE                  0x39
153 #define PKT3_MPEG_INDEX                     0x3A /* not on CIK */
154 #define PKT3_WAIT_REG_MEM                   0x3C
155 #define WAIT_REG_MEM_EQUAL                  3
156 #define WAIT_REG_MEM_NOT_EQUAL              4
157 #define WAIT_REG_MEM_GREATER_OR_EQUAL       5
158 #define WAIT_REG_MEM_MEM_SPACE(x)           (((unsigned)(x)&0x3) << 4)
159 #define WAIT_REG_MEM_PFP                    (1 << 8)
160 #define PKT3_MEM_WRITE                      0x3D /* not on CIK */
161 #define PKT3_INDIRECT_BUFFER_CIK            0x3F /* new on CIK */
162 
163 #define PKT3_COPY_DATA                         0x40
164 #define COPY_DATA_SRC_SEL(x)                   ((x)&0xf)
165 #define COPY_DATA_REG                          0
166 #define COPY_DATA_SRC_MEM                      1 /* only valid as source */
167 #define COPY_DATA_TC_L2                        2
168 #define COPY_DATA_GDS                          3
169 #define COPY_DATA_PERF                         4
170 #define COPY_DATA_IMM                          5
171 #define COPY_DATA_TIMESTAMP                    9
172 #define COPY_DATA_DST_SEL(x)                   (((unsigned)(x)&0xf) << 8)
173 #define COPY_DATA_DST_MEM_GRBM                 1 /* sync across GRBM, deprecated */
174 #define COPY_DATA_TC_L2                        2
175 #define COPY_DATA_GDS                          3
176 #define COPY_DATA_PERF                         4
177 #define COPY_DATA_DST_MEM                      5
178 #define COPY_DATA_COUNT_SEL                    (1 << 16)
179 #define COPY_DATA_WR_CONFIRM                   (1 << 20)
180 #define COPY_DATA_ENGINE_PFP                   (1 << 30)
181 #define PKT3_PFP_SYNC_ME                       0x42
182 #define PKT3_SURFACE_SYNC                      0x43 /* deprecated on CIK, use ACQUIRE_MEM */
183 #define PKT3_ME_INITIALIZE                     0x44 /* not on CIK */
184 #define PKT3_COND_WRITE                        0x45
185 #define PKT3_EVENT_WRITE                       0x46
186 #define PKT3_EVENT_WRITE_EOP                   0x47 /* not on GFX9 */
187 #define EOP_DST_SEL(x)                         ((x) << 16)
188 #define EOP_DST_SEL_MEM                        0
189 #define EOP_DST_SEL_TC_L2                      1
190 #define EOP_INT_SEL(x)                         ((x) << 24)
191 #define EOP_INT_SEL_NONE                       0
192 #define EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM 3
193 #define EOP_DATA_SEL(x)                        ((x) << 29)
194 #define EOP_DATA_SEL_DISCARD                   0
195 #define EOP_DATA_SEL_VALUE_32BIT               1
196 #define EOP_DATA_SEL_VALUE_64BIT               2
197 #define EOP_DATA_SEL_TIMESTAMP                 3
198 #define EOP_DATA_SEL_GDS                       5
199 #define EOP_DATA_GDS(dw_offset, num_dwords)    ((dw_offset) | ((unsigned)(num_dwords) << 16))
200 /* CP DMA bug: Any use of CP_DMA.DST_SEL=TC must be avoided when EOS packets
201  * are used. Use DST_SEL=MC instead. For prefetch, use SRC_SEL=TC and
202  * DST_SEL=MC. Only CIK chips are affected.
203  */
204 /* fix CP DMA before uncommenting: */
205 /*#define PKT3_EVENT_WRITE_EOS                   0x48*/ /* not on GFX9 */
206 #define PKT3_RELEASE_MEM            0x49 /* GFX9+ [any ring] or GFX8 [compute ring only] */
207 #define PKT3_CONTEXT_REG_RMW        0x51 /* older firmware versions on older chips don't have this */
208 #define PKT3_ONE_REG_WRITE          0x57 /* not on CIK */
209 #define PKT3_ACQUIRE_MEM            0x58 /* new for CIK */
210 #define PKT3_REWIND                 0x59 /* VI+ [any ring] or CIK [compute ring only] */
211 #define PKT3_LOAD_UCONFIG_REG       0x5E /* GFX7+ */
212 #define PKT3_LOAD_SH_REG            0x5F
213 #define PKT3_LOAD_CONTEXT_REG       0x61
214 #define PKT3_SET_CONFIG_REG         0x68
215 #define PKT3_SET_CONTEXT_REG        0x69
216 #define PKT3_SET_SH_REG             0x76
217 #define PKT3_SET_SH_REG_OFFSET      0x77
218 #define PKT3_SET_UCONFIG_REG        0x79 /* new for CIK */
219 #define PKT3_SET_UCONFIG_REG_INDEX  0x7A /* new for GFX9, CP ucode version >= 26 */
220 #define PKT3_LOAD_CONST_RAM         0x80
221 #define PKT3_WRITE_CONST_RAM        0x81
222 #define PKT3_DUMP_CONST_RAM         0x83
223 #define PKT3_INCREMENT_CE_COUNTER   0x84
224 #define PKT3_INCREMENT_DE_COUNTER   0x85
225 #define PKT3_WAIT_ON_CE_COUNTER     0x86
226 #define PKT3_SET_SH_REG_INDEX       0x9B
227 #define PKT3_LOAD_CONTEXT_REG_INDEX 0x9F /* new for VI */
228 
229 #define PKT_TYPE_S(x)         (((unsigned)(x)&0x3) << 30)
230 #define PKT_TYPE_G(x)         (((x) >> 30) & 0x3)
231 #define PKT_TYPE_C            0x3FFFFFFF
232 #define PKT_COUNT_S(x)        (((unsigned)(x)&0x3FFF) << 16)
233 #define PKT_COUNT_G(x)        (((x) >> 16) & 0x3FFF)
234 #define PKT_COUNT_C           0xC000FFFF
235 #define PKT0_BASE_INDEX_S(x)  (((unsigned)(x)&0xFFFF) << 0)
236 #define PKT0_BASE_INDEX_G(x)  (((x) >> 0) & 0xFFFF)
237 #define PKT0_BASE_INDEX_C     0xFFFF0000
238 #define PKT3_IT_OPCODE_S(x)   (((unsigned)(x)&0xFF) << 8)
239 #define PKT3_IT_OPCODE_G(x)   (((x) >> 8) & 0xFF)
240 #define PKT3_IT_OPCODE_C      0xFFFF00FF
241 #define PKT3_PREDICATE(x)     (((x) >> 0) & 0x1)
242 #define PKT3_SHADER_TYPE_S(x) (((unsigned)(x)&0x1) << 1)
243 #define PKT0(index, count)    (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count))
244 #define PKT3(op, count, predicate)                                                                 \
245    (PKT_TYPE_S(3) | PKT_COUNT_S(count) | PKT3_IT_OPCODE_S(op) | PKT3_PREDICATE(predicate))
246 
247 #define PKT2_NOP_PAD PKT_TYPE_S(2)
248 #define PKT3_NOP_PAD PKT3(PKT3_NOP, 0x3fff, 0) /* header-only version */
249 
250 #define PKT3_CP_DMA 0x41
251 /* 1. header
252  * 2. SRC_ADDR_LO [31:0] or DATA [31:0]
253  * 3. CP_SYNC [31] | SRC_SEL [30:29] | ENGINE [27] | DST_SEL [21:20] | SRC_ADDR_HI [15:0]
254  * 4. DST_ADDR_LO [31:0]
255  * 5. DST_ADDR_HI [15:0]
256  * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
257  */
258 
259 #define PKT3_DMA_DATA 0x50 /* new for CIK */
260 /* 1. header
261  * 2. CP_SYNC [31] | SRC_SEL [30:29] | DST_SEL [21:20] | ENGINE [0]
262  * 2. SRC_ADDR_LO [31:0] or DATA [31:0]
263  * 3. SRC_ADDR_HI [31:0]
264  * 4. DST_ADDR_LO [31:0]
265  * 5. DST_ADDR_HI [31:0]
266  * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
267  */
268 
269 /* SI async DMA packets */
270 #define SI_DMA_PACKET(cmd, sub_cmd, n)                                                             \
271    ((((unsigned)(cmd)&0xF) << 28) | (((unsigned)(sub_cmd)&0xFF) << 20) |                           \
272     (((unsigned)(n)&0xFFFFF) << 0))
273 /* SI async DMA Packet types */
274 #define SI_DMA_PACKET_WRITE               0x2
275 #define SI_DMA_PACKET_COPY                0x3
276 #define SI_DMA_COPY_MAX_BYTE_ALIGNED_SIZE 0xfffe0
277 /* The documentation says 0xffff8 is the maximum size in dwords, which is
278  * 0x3fffe0 in bytes. */
279 #define SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE 0x3fffe0
280 #define SI_DMA_COPY_DWORD_ALIGNED          0x00
281 #define SI_DMA_COPY_BYTE_ALIGNED           0x40
282 #define SI_DMA_COPY_TILED                  0x8
283 #define SI_DMA_PACKET_INDIRECT_BUFFER      0x4
284 #define SI_DMA_PACKET_SEMAPHORE            0x5
285 #define SI_DMA_PACKET_FENCE                0x6
286 #define SI_DMA_PACKET_TRAP                 0x7
287 #define SI_DMA_PACKET_SRBM_WRITE           0x9
288 #define SI_DMA_PACKET_CONSTANT_FILL        0xd
289 #define SI_DMA_PACKET_NOP                  0xf
290 
291 /* CIK async DMA packets */
292 #define CIK_SDMA_PACKET(op, sub_op, n)                                                             \
293    ((((unsigned)(n)&0xFFFF) << 16) | (((unsigned)(sub_op)&0xFF) << 8) |                            \
294     (((unsigned)(op)&0xFF) << 0))
295 /* CIK async DMA packet types */
296 #define CIK_SDMA_OPCODE_NOP                        0x0
297 #define CIK_SDMA_OPCODE_COPY                       0x1
298 #define CIK_SDMA_COPY_SUB_OPCODE_LINEAR            0x0
299 #define CIK_SDMA_COPY_SUB_OPCODE_TILED             0x1
300 #define CIK_SDMA_COPY_SUB_OPCODE_SOA               0x3
301 #define CIK_SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW 0x4
302 #define CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW  0x5
303 #define CIK_SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW    0x6
304 #define CIK_SDMA_OPCODE_WRITE                      0x2
305 #define SDMA_WRITE_SUB_OPCODE_LINEAR               0x0
306 #define SDMA_WRTIE_SUB_OPCODE_TILED                0x1
307 #define CIK_SDMA_OPCODE_INDIRECT_BUFFER            0x4
308 #define CIK_SDMA_PACKET_FENCE                      0x5
309 #define CIK_SDMA_PACKET_TRAP                       0x6
310 #define CIK_SDMA_PACKET_SEMAPHORE                  0x7
311 #define CIK_SDMA_PACKET_CONSTANT_FILL              0xb
312 #define CIK_SDMA_OPCODE_TIMESTAMP                  0xd
313 #define SDMA_TS_SUB_OPCODE_SET_LOCAL_TIMESTAMP     0x0
314 #define SDMA_TS_SUB_OPCODE_GET_LOCAL_TIMESTAMP     0x1
315 #define SDMA_TS_SUB_OPCODE_GET_GLOBAL_TIMESTAMP    0x2
316 #define CIK_SDMA_PACKET_SRBM_WRITE                 0xe
317 /* There is apparently an undocumented HW limitation that
318    prevents the HW from copying the last 255 bytes of (1 << 22) - 1 */
319 #define CIK_SDMA_COPY_MAX_SIZE    0x3fff00   /* almost 4 MB*/
320 #define GFX103_SDMA_COPY_MAX_SIZE 0x3fffff00 /* almost 1 GB */
321 
322 enum amd_cmp_class_flags
323 {
324    S_NAN = 1 << 0,       // Signaling NaN
325    Q_NAN = 1 << 1,       // Quiet NaN
326    N_INFINITY = 1 << 2,  // Negative infinity
327    N_NORMAL = 1 << 3,    // Negative normal
328    N_SUBNORMAL = 1 << 4, // Negative subnormal
329    N_ZERO = 1 << 5,      // Negative zero
330    P_ZERO = 1 << 6,      // Positive zero
331    P_SUBNORMAL = 1 << 7, // Positive subnormal
332    P_NORMAL = 1 << 8,    // Positive normal
333    P_INFINITY = 1 << 9   // Positive infinity
334 };
335 
336 #endif /* _SID_H */
337