1 /*
2  * Copyright 2017 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22 */
23 
24 #include "CUnit/Basic.h"
25 
26 #include "amdgpu_test.h"
27 #include "amdgpu_drm.h"
28 #include "amdgpu_internal.h"
29 #include <unistd.h>
30 #include <fcntl.h>
31 #include <stdio.h>
32 #include "xf86drm.h"
33 #include <limits.h>
34 
35 #define PATH_SIZE PATH_MAX
36 
37 #define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
38 
39 const char *ras_block_string[] = {
40 	"umc",
41 	"sdma",
42 	"gfx",
43 	"mmhub",
44 	"athub",
45 	"pcie_bif",
46 	"hdp",
47 	"xgmi_wafl",
48 	"df",
49 	"smn",
50 	"sem",
51 	"mp0",
52 	"mp1",
53 	"fuse",
54 };
55 
56 #define ras_block_str(i) (ras_block_string[i])
57 
58 enum amdgpu_ras_block {
59 	AMDGPU_RAS_BLOCK__UMC = 0,
60 	AMDGPU_RAS_BLOCK__SDMA,
61 	AMDGPU_RAS_BLOCK__GFX,
62 	AMDGPU_RAS_BLOCK__MMHUB,
63 	AMDGPU_RAS_BLOCK__ATHUB,
64 	AMDGPU_RAS_BLOCK__PCIE_BIF,
65 	AMDGPU_RAS_BLOCK__HDP,
66 	AMDGPU_RAS_BLOCK__XGMI_WAFL,
67 	AMDGPU_RAS_BLOCK__DF,
68 	AMDGPU_RAS_BLOCK__SMN,
69 	AMDGPU_RAS_BLOCK__SEM,
70 	AMDGPU_RAS_BLOCK__MP0,
71 	AMDGPU_RAS_BLOCK__MP1,
72 	AMDGPU_RAS_BLOCK__FUSE,
73 
74 	AMDGPU_RAS_BLOCK__LAST
75 };
76 
77 #define AMDGPU_RAS_BLOCK_COUNT  AMDGPU_RAS_BLOCK__LAST
78 #define AMDGPU_RAS_BLOCK_MASK   ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1)
79 
80 enum amdgpu_ras_gfx_subblock {
81 	/* CPC */
82 	AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
83 	AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH =
84 		AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START,
85 	AMDGPU_RAS_BLOCK__GFX_CPC_UCODE,
86 	AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1,
87 	AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
88 	AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1,
89 	AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2,
90 	AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
91 	AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
92 	AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END =
93 		AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
94 	/* CPF */
95 	AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
96 	AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 =
97 		AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
98 	AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1,
99 	AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
100 	AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
101 	/* CPG */
102 	AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
103 	AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ =
104 		AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
105 	AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG,
106 	AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
107 	AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
108 	/* GDS */
109 	AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
110 	AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
111 	AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
112 	AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
113 	AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
114 	AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
115 	AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END =
116 		AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
117 	/* SPI */
118 	AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM,
119 	/* SQ */
120 	AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
121 	AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
122 	AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D,
123 	AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I,
124 	AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
125 	AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
126 	/* SQC (3 ranges) */
127 	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
128 	/* SQC range 0 */
129 	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START =
130 		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
131 	AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
132 		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START,
133 	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
134 	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
135 	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
136 	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
137 	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
138 	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
139 	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END =
140 		AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
141 	/* SQC range 1 */
142 	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
143 	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
144 		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
145 	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
146 	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
147 	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
148 	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
149 	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
150 	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
151 	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
152 	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
153 	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END =
154 		AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
155 	/* SQC range 2 */
156 	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
157 	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
158 		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
159 	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
160 	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
161 	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
162 	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
163 	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
164 	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
165 	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
166 	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
167 	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END =
168 		AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
169 	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END =
170 		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END,
171 	/* TA */
172 	AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
173 	AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO =
174 		AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
175 	AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO,
176 	AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO,
177 	AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO,
178 	AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
179 	AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
180 	/* TCA */
181 	AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
182 	AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO =
183 		AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
184 	AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
185 	AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END =
186 		AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
187 	/* TCC (5 sub-ranges) */
188 	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
189 	/* TCC range 0 */
190 	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START =
191 		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
192 	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA =
193 		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START,
194 	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
195 	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
196 	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
197 	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
198 	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
199 	AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
200 	AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
201 	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END =
202 		AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
203 	/* TCC range 1 */
204 	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
205 	AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC =
206 		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
207 	AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
208 	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END =
209 		AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
210 	/* TCC range 2 */
211 	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
212 	AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA =
213 		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
214 	AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
215 	AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
216 	AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
217 	AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
218 	AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO,
219 	AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
220 	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
221 	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END =
222 		AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
223 	/* TCC range 3 */
224 	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
225 	AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO =
226 		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
227 	AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
228 	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END =
229 		AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
230 	/* TCC range 4 */
231 	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
232 	AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
233 		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
234 	AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
235 	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END =
236 		AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
237 	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END =
238 		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END,
239 	/* TCI */
240 	AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM,
241 	/* TCP */
242 	AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
243 	AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM =
244 		AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
245 	AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
246 	AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO,
247 	AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO,
248 	AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM,
249 	AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
250 	AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
251 	AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END =
252 		AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
253 	/* TD */
254 	AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
255 	AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO =
256 		AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
257 	AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
258 	AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
259 	AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
260 	/* EA (3 sub-ranges) */
261 	AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
262 	/* EA range 0 */
263 	AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START =
264 		AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
265 	AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM =
266 		AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START,
267 	AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
268 	AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
269 	AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
270 	AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
271 	AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
272 	AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
273 	AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
274 	AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END =
275 		AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
276 	/* EA range 1 */
277 	AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
278 	AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM =
279 		AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
280 	AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
281 	AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
282 	AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
283 	AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
284 	AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
285 	AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
286 	AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END =
287 		AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
288 	/* EA range 2 */
289 	AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
290 	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM =
291 		AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
292 	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM,
293 	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM,
294 	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
295 	AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END =
296 		AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
297 	AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END =
298 		AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END,
299 	/* UTC VM L2 bank */
300 	AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE,
301 	/* UTC VM walker */
302 	AMDGPU_RAS_BLOCK__UTC_VML2_WALKER,
303 	/* UTC ATC L2 2MB cache */
304 	AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
305 	/* UTC ATC L2 4KB cache */
306 	AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
307 	AMDGPU_RAS_BLOCK__GFX_MAX
308 };
309 
310 enum amdgpu_ras_error_type {
311 	AMDGPU_RAS_ERROR__NONE					= 0,
312 	AMDGPU_RAS_ERROR__PARITY				= 1,
313 	AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE			= 2,
314 	AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE			= 4,
315 	AMDGPU_RAS_ERROR__POISON				= 8,
316 };
317 
318 struct ras_inject_test_config {
319 	char name[64];
320 	char block[32];
321 	int sub_block;
322 	enum amdgpu_ras_error_type type;
323 	uint64_t address;
324 	uint64_t value;
325 };
326 
327 struct ras_common_if {
328 	enum amdgpu_ras_block block;
329 	enum amdgpu_ras_error_type type;
330 	uint32_t sub_block_index;
331 	char name[32];
332 };
333 
334 struct ras_inject_if {
335 	struct ras_common_if head;
336 	uint64_t address;
337 	uint64_t value;
338 };
339 
340 struct ras_debug_if {
341 	union {
342 		struct ras_common_if head;
343 		struct ras_inject_if inject;
344 	};
345 	int op;
346 };
347 /* for now, only umc, gfx, sdma has implemented. */
348 #define DEFAULT_RAS_BLOCK_MASK_INJECT ((1 << AMDGPU_RAS_BLOCK__UMC) |\
349 		(1 << AMDGPU_RAS_BLOCK__GFX))
350 #define DEFAULT_RAS_BLOCK_MASK_QUERY ((1 << AMDGPU_RAS_BLOCK__UMC) |\
351 		(1 << AMDGPU_RAS_BLOCK__GFX))
352 #define DEFAULT_RAS_BLOCK_MASK_BASIC (1 << AMDGPU_RAS_BLOCK__UMC |\
353 		(1 << AMDGPU_RAS_BLOCK__SDMA) |\
354 		(1 << AMDGPU_RAS_BLOCK__GFX))
355 
356 static uint32_t ras_block_mask_inject = DEFAULT_RAS_BLOCK_MASK_INJECT;
357 static uint32_t ras_block_mask_query = DEFAULT_RAS_BLOCK_MASK_INJECT;
358 static uint32_t ras_block_mask_basic = DEFAULT_RAS_BLOCK_MASK_BASIC;
359 
360 struct ras_test_mask {
361 	uint32_t inject_mask;
362 	uint32_t query_mask;
363 	uint32_t basic_mask;
364 };
365 
366 struct amdgpu_ras_data {
367 	amdgpu_device_handle device_handle;
368 	uint32_t  id;
369 	uint32_t  capability;
370 	struct ras_test_mask test_mask;
371 };
372 
373 /* all devices who has ras supported */
374 static struct amdgpu_ras_data devices[MAX_CARDS_SUPPORTED];
375 static int devices_count;
376 
377 struct ras_DID_test_mask{
378 	uint16_t device_id;
379 	uint16_t revision_id;
380 	struct ras_test_mask test_mask;
381 };
382 
383 /* white list for inject test. */
384 #define RAS_BLOCK_MASK_ALL {\
385 	DEFAULT_RAS_BLOCK_MASK_INJECT,\
386 	DEFAULT_RAS_BLOCK_MASK_QUERY,\
387 	DEFAULT_RAS_BLOCK_MASK_BASIC\
388 }
389 
390 #define RAS_BLOCK_MASK_QUERY_BASIC {\
391 	0,\
392 	DEFAULT_RAS_BLOCK_MASK_QUERY,\
393 	DEFAULT_RAS_BLOCK_MASK_BASIC\
394 }
395 
396 static const struct ras_inject_test_config umc_ras_inject_test[] = {
397 	{"ras_umc.1.0", "umc", 0, AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
398 };
399 
400 static const struct ras_inject_test_config gfx_ras_inject_test[] = {
401 	{"ras_gfx.2.0", "gfx", AMDGPU_RAS_BLOCK__GFX_CPC_UCODE,
402 		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
403 	{"ras_gfx.2.1", "gfx", AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
404 		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
405 	{"ras_gfx.2.2", "gfx", AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
406 		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
407 	{"ras_gfx.2.3", "gfx", AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D,
408 		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
409 	{"ras_gfx.2.4", "gfx", AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
410 		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
411 	{"ras_gfx.2.5", "gfx", AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM,
412 		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
413 	{"ras_gfx.2.6", "gfx", AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM,
414 		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
415 	{"ras_gfx.2.7", "gfx", AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO,
416 		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
417 	{"ras_gfx.2.8", "gfx", AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA,
418 		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
419 	{"ras_gfx.2.9", "gfx", AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
420 		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
421 	{"ras_gfx.2.10", "gfx", AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
422 		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
423 	{"ras_gfx.2.11", "gfx", AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
424 		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
425 	{"ras_gfx.2.12", "gfx", AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM,
426 		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
427 	{"ras_gfx.2.13", "gfx", AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO,
428 		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
429 	{"ras_gfx.2.14", "gfx", AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM,
430 		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
431 };
432 
433 static const struct ras_DID_test_mask ras_DID_array[] = {
434 	{0x66a1, 0x00, RAS_BLOCK_MASK_ALL},
435 	{0x66a1, 0x01, RAS_BLOCK_MASK_ALL},
436 	{0x66a1, 0x04, RAS_BLOCK_MASK_ALL},
437 };
438 
amdgpu_ras_find_block_id_by_name(const char * name)439 static uint32_t amdgpu_ras_find_block_id_by_name(const char *name)
440 {
441 	int i;
442 
443 	for (i = 0; i < ARRAY_SIZE(ras_block_string); i++) {
444 		if (strcmp(name, ras_block_string[i]) == 0)
445 			return i;
446 	}
447 
448 	return ARRAY_SIZE(ras_block_string);
449 }
450 
amdgpu_ras_get_error_type_id(enum amdgpu_ras_error_type type)451 static char *amdgpu_ras_get_error_type_id(enum amdgpu_ras_error_type type)
452 {
453 	switch (type) {
454 	case AMDGPU_RAS_ERROR__PARITY:
455 		return "parity";
456 	case AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE:
457 		return "single_correctable";
458 	case AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE:
459 		return "multi_uncorrectable";
460 	case AMDGPU_RAS_ERROR__POISON:
461 		return "poison";
462 	case AMDGPU_RAS_ERROR__NONE:
463 	default:
464 		return NULL;
465 	}
466 }
467 
amdgpu_ras_get_test_mask(drmDevicePtr device)468 static struct ras_test_mask amdgpu_ras_get_test_mask(drmDevicePtr device)
469 {
470 	int i;
471 	static struct ras_test_mask default_test_mask = RAS_BLOCK_MASK_QUERY_BASIC;
472 
473 	for (i = 0; i < sizeof(ras_DID_array) / sizeof(ras_DID_array[0]); i++) {
474 		if (ras_DID_array[i].device_id == device->deviceinfo.pci->device_id &&
475 				ras_DID_array[i].revision_id == device->deviceinfo.pci->revision_id)
476 			return ras_DID_array[i].test_mask;
477 	}
478 	return default_test_mask;
479 }
480 
amdgpu_ras_lookup_capability(amdgpu_device_handle device_handle)481 static uint32_t amdgpu_ras_lookup_capability(amdgpu_device_handle device_handle)
482 {
483 	union {
484 		uint64_t feature_mask;
485 		struct {
486 			uint32_t enabled_features;
487 			uint32_t supported_features;
488 		};
489 	} features = { 0 };
490 	int ret;
491 
492 	ret = amdgpu_query_info(device_handle, AMDGPU_INFO_RAS_ENABLED_FEATURES,
493 			sizeof(features), &features);
494 	if (ret)
495 		return 0;
496 
497 	return features.supported_features;
498 }
499 
500 static int get_file_contents(char *file, char *buf, int size);
501 
amdgpu_ras_lookup_id(drmDevicePtr device)502 static int amdgpu_ras_lookup_id(drmDevicePtr device)
503 {
504 	char path[PATH_SIZE];
505 	char str[128];
506 	drmPciBusInfo info;
507 	int i;
508 	int ret;
509 
510 	for (i = 0; i < MAX_CARDS_SUPPORTED; i++) {
511 		memset(str, 0, sizeof(str));
512 		memset(&info, 0, sizeof(info));
513 		snprintf(path, PATH_SIZE, "/sys/kernel/debug/dri/%d/name", i);
514 		if (get_file_contents(path, str, sizeof(str)) <= 0)
515 			continue;
516 
517 		ret = sscanf(str, "amdgpu dev=%04hx:%02hhx:%02hhx.%01hhx",
518 				&info.domain, &info.bus, &info.dev, &info.func);
519 		if (ret != 4)
520 			continue;
521 
522 		if (memcmp(&info, device->businfo.pci, sizeof(info)) == 0)
523 				return i;
524 	}
525 	return -1;
526 }
527 
528 //helpers
529 
530 static int test_card;
531 static char sysfs_path[PATH_SIZE];
532 static char debugfs_path[PATH_SIZE];
533 static uint32_t ras_mask;
534 static amdgpu_device_handle device_handle;
535 
set_test_card(int card)536 static void set_test_card(int card)
537 {
538 	test_card = card;
539 	snprintf(sysfs_path, PATH_SIZE, "/sys/class/drm/card%d/device/ras/", devices[card].id);
540 	snprintf(debugfs_path, PATH_SIZE,  "/sys/kernel/debug/dri/%d/ras/", devices[card].id);
541 	ras_mask = devices[card].capability;
542 	device_handle = devices[card].device_handle;
543 	ras_block_mask_inject = devices[card].test_mask.inject_mask;
544 	ras_block_mask_query = devices[card].test_mask.query_mask;
545 	ras_block_mask_basic = devices[card].test_mask.basic_mask;
546 }
547 
get_ras_sysfs_root(void)548 static const char *get_ras_sysfs_root(void)
549 {
550 	return sysfs_path;
551 }
552 
get_ras_debugfs_root(void)553 static const char *get_ras_debugfs_root(void)
554 {
555 	return debugfs_path;
556 }
557 
set_file_contents(char * file,char * buf,int size)558 static int set_file_contents(char *file, char *buf, int size)
559 {
560 	int n, fd;
561 	fd = open(file, O_WRONLY);
562 	if (fd == -1)
563 		return -1;
564 	n = write(fd, buf, size);
565 	close(fd);
566 	return n;
567 }
568 
get_file_contents(char * file,char * buf,int size)569 static int get_file_contents(char *file, char *buf, int size)
570 {
571 	int n, fd;
572 	fd = open(file, O_RDONLY);
573 	if (fd == -1)
574 		return -1;
575 	n = read(fd, buf, size);
576 	close(fd);
577 	return n;
578 }
579 
is_file_ok(char * file,int flags)580 static int is_file_ok(char *file, int flags)
581 {
582 	int fd;
583 
584 	fd = open(file, flags);
585 	if (fd == -1)
586 		return -1;
587 	close(fd);
588 	return 0;
589 }
590 
amdgpu_ras_is_feature_enabled(enum amdgpu_ras_block block)591 static int amdgpu_ras_is_feature_enabled(enum amdgpu_ras_block block)
592 {
593 	uint32_t feature_mask;
594 	int ret;
595 
596 	ret = amdgpu_query_info(device_handle, AMDGPU_INFO_RAS_ENABLED_FEATURES,
597 			sizeof(feature_mask), &feature_mask);
598 	if (ret)
599 		return -1;
600 
601 	return (1 << block) & feature_mask;
602 }
603 
amdgpu_ras_is_feature_supported(enum amdgpu_ras_block block)604 static int amdgpu_ras_is_feature_supported(enum amdgpu_ras_block block)
605 {
606 	return (1 << block) & ras_mask;
607 }
608 
amdgpu_ras_invoke(struct ras_debug_if * data)609 static int amdgpu_ras_invoke(struct ras_debug_if *data)
610 {
611 	char path[PATH_SIZE];
612 	int ret;
613 
614 	snprintf(path, sizeof(path), "%s", get_ras_debugfs_root());
615 	strncat(path, "ras_ctrl", sizeof(path) - strlen(path));
616 
617 	ret = set_file_contents(path, (char *)data, sizeof(*data))
618 		- sizeof(*data);
619 	return ret;
620 }
621 
amdgpu_ras_query_err_count(enum amdgpu_ras_block block,unsigned long * ue,unsigned long * ce)622 static int amdgpu_ras_query_err_count(enum amdgpu_ras_block block,
623 		unsigned long *ue, unsigned long *ce)
624 {
625 	char buf[64];
626 	char name[PATH_SIZE];
627 
628 	*ue = *ce = 0;
629 
630 	if (amdgpu_ras_is_feature_supported(block) <= 0)
631 		return -1;
632 
633 	snprintf(name, sizeof(name), "%s", get_ras_sysfs_root());
634 	strncat(name, ras_block_str(block), sizeof(name) - strlen(name));
635 	strncat(name, "_err_count", sizeof(name) - strlen(name));
636 
637 	if (is_file_ok(name, O_RDONLY))
638 		return 0;
639 
640 	if (get_file_contents(name, buf, sizeof(buf)) <= 0)
641 		return -1;
642 
643 	if (sscanf(buf, "ue: %lu\nce: %lu", ue, ce) != 2)
644 		return -1;
645 
646 	return 0;
647 }
648 
amdgpu_ras_inject(enum amdgpu_ras_block block,uint32_t sub_block,enum amdgpu_ras_error_type type,uint64_t address,uint64_t value)649 static int amdgpu_ras_inject(enum amdgpu_ras_block block,
650 		uint32_t sub_block, enum amdgpu_ras_error_type type,
651 		uint64_t address, uint64_t value)
652 {
653 	struct ras_debug_if data = { .op = 2, };
654 	struct ras_inject_if *inject = &data.inject;
655 	int ret;
656 
657 	if (amdgpu_ras_is_feature_enabled(block) <= 0) {
658 		fprintf(stderr, "block id(%d) is not valid\n", block);
659 		return -1;
660 	}
661 
662 	inject->head.block = block;
663 	inject->head.type = type;
664 	inject->head.sub_block_index = sub_block;
665 	strncpy(inject->head.name, ras_block_str(block), sizeof(inject->head.name)-1);
666 	inject->address = address;
667 	inject->value = value;
668 
669 	ret = amdgpu_ras_invoke(&data);
670 	CU_ASSERT_EQUAL(ret, 0);
671 	if (ret)
672 		return -1;
673 
674 	return 0;
675 }
676 
677 //tests
amdgpu_ras_features_test(int enable)678 static void amdgpu_ras_features_test(int enable)
679 {
680 	struct ras_debug_if data;
681 	int ret;
682 	int i;
683 
684 	data.op = enable;
685 	for (i = 0; i < AMDGPU_RAS_BLOCK__LAST; i++) {
686 		struct ras_common_if head = {
687 			.block = i,
688 			.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
689 			.sub_block_index = 0,
690 			.name = "",
691 		};
692 
693 		if (amdgpu_ras_is_feature_supported(i) <= 0)
694 			continue;
695 
696 		data.head = head;
697 
698 		ret = amdgpu_ras_invoke(&data);
699 		CU_ASSERT_EQUAL(ret, 0);
700 
701 		if (ret)
702 			continue;
703 
704 		ret = enable ^ amdgpu_ras_is_feature_enabled(i);
705 		CU_ASSERT_EQUAL(ret, 0);
706 	}
707 }
708 
amdgpu_ras_disable_test(void)709 static void amdgpu_ras_disable_test(void)
710 {
711 	int i;
712 	for (i = 0; i < devices_count; i++) {
713 		set_test_card(i);
714 		amdgpu_ras_features_test(0);
715 	}
716 }
717 
amdgpu_ras_enable_test(void)718 static void amdgpu_ras_enable_test(void)
719 {
720 	int i;
721 	for (i = 0; i < devices_count; i++) {
722 		set_test_card(i);
723 		amdgpu_ras_features_test(1);
724 	}
725 }
726 
__amdgpu_ras_ip_inject_test(const struct ras_inject_test_config * ip_test,uint32_t size)727 static void __amdgpu_ras_ip_inject_test(const struct ras_inject_test_config *ip_test,
728 					uint32_t size)
729 {
730 	int i, ret;
731 	unsigned long old_ue, old_ce;
732 	unsigned long ue, ce;
733 	uint32_t block;
734 	int timeout;
735 	bool pass;
736 
737 	for (i = 0; i < size; i++) {
738 		timeout = 3;
739 		pass = false;
740 
741 		block = amdgpu_ras_find_block_id_by_name(ip_test[i].block);
742 
743 		/* Ensure one valid ip block */
744 		if (block == ARRAY_SIZE(ras_block_string))
745 			break;
746 
747 		/* Ensure RAS feature for the IP block is enabled by kernel */
748 		if (amdgpu_ras_is_feature_supported(block) <= 0)
749 			break;
750 
751 		ret = amdgpu_ras_query_err_count(block, &old_ue, &old_ce);
752 		CU_ASSERT_EQUAL(ret, 0);
753 		if (ret)
754 			break;
755 
756 		ret = amdgpu_ras_inject(block,
757 					ip_test[i].sub_block,
758 					ip_test[i].type,
759 					ip_test[i].address,
760 					ip_test[i].value);
761 		CU_ASSERT_EQUAL(ret, 0);
762 		if (ret)
763 			break;
764 
765 		while (timeout > 0) {
766 			sleep(5);
767 
768 			ret = amdgpu_ras_query_err_count(block, &ue, &ce);
769 			CU_ASSERT_EQUAL(ret, 0);
770 			if (ret)
771 				break;
772 
773 			if (old_ue != ue || old_ce != ce) {
774 				pass = true;
775 				sleep(20);
776 				break;
777 			}
778 			timeout -= 1;
779 		}
780 		printf("\t Test %s@block %s, subblock %d, error_type %s, address %ld, value %ld: %s\n",
781 			ip_test[i].name,
782 			ip_test[i].block,
783 			ip_test[i].sub_block,
784 			amdgpu_ras_get_error_type_id(ip_test[i].type),
785 			ip_test[i].address,
786 			ip_test[i].value,
787 			pass ? "Pass" : "Fail");
788 	}
789 }
790 
__amdgpu_ras_inject_test(void)791 static void __amdgpu_ras_inject_test(void)
792 {
793 	printf("...\n");
794 
795 	/* run UMC ras inject test */
796 	__amdgpu_ras_ip_inject_test(umc_ras_inject_test,
797 		ARRAY_SIZE(umc_ras_inject_test));
798 
799 	/* run GFX ras inject test */
800 	__amdgpu_ras_ip_inject_test(gfx_ras_inject_test,
801 		ARRAY_SIZE(gfx_ras_inject_test));
802 }
803 
amdgpu_ras_inject_test(void)804 static void amdgpu_ras_inject_test(void)
805 {
806 	int i;
807 	for (i = 0; i < devices_count; i++) {
808 		set_test_card(i);
809 		__amdgpu_ras_inject_test();
810 	}
811 }
812 
__amdgpu_ras_query_test(void)813 static void __amdgpu_ras_query_test(void)
814 {
815 	unsigned long ue, ce;
816 	int ret;
817 	int i;
818 
819 	for (i = 0; i < AMDGPU_RAS_BLOCK__LAST; i++) {
820 		if (amdgpu_ras_is_feature_supported(i) <= 0)
821 			continue;
822 
823 		if (!((1 << i) & ras_block_mask_query))
824 			continue;
825 
826 		ret = amdgpu_ras_query_err_count(i, &ue, &ce);
827 		CU_ASSERT_EQUAL(ret, 0);
828 	}
829 }
830 
amdgpu_ras_query_test(void)831 static void amdgpu_ras_query_test(void)
832 {
833 	int i;
834 	for (i = 0; i < devices_count; i++) {
835 		set_test_card(i);
836 		__amdgpu_ras_query_test();
837 	}
838 }
839 
amdgpu_ras_basic_test(void)840 static void amdgpu_ras_basic_test(void)
841 {
842 	int ret;
843 	int i;
844 	int j;
845 	uint32_t features;
846 	char path[PATH_SIZE];
847 
848 	ret = is_file_ok("/sys/module/amdgpu/parameters/ras_mask", O_RDONLY);
849 	CU_ASSERT_EQUAL(ret, 0);
850 
851 	for (i = 0; i < devices_count; i++) {
852 		set_test_card(i);
853 
854 		ret = amdgpu_query_info(device_handle, AMDGPU_INFO_RAS_ENABLED_FEATURES,
855 				sizeof(features), &features);
856 		CU_ASSERT_EQUAL(ret, 0);
857 
858 		snprintf(path, sizeof(path), "%s", get_ras_debugfs_root());
859 		strncat(path, "ras_ctrl", sizeof(path) - strlen(path));
860 
861 		ret = is_file_ok(path, O_WRONLY);
862 		CU_ASSERT_EQUAL(ret, 0);
863 
864 		snprintf(path, sizeof(path), "%s", get_ras_sysfs_root());
865 		strncat(path, "features", sizeof(path) - strlen(path));
866 
867 		ret = is_file_ok(path, O_RDONLY);
868 		CU_ASSERT_EQUAL(ret, 0);
869 
870 		for (j = 0; j < AMDGPU_RAS_BLOCK__LAST; j++) {
871 			ret = amdgpu_ras_is_feature_supported(j);
872 			if (ret <= 0)
873 				continue;
874 
875 			if (!((1 << j) & ras_block_mask_basic))
876 				continue;
877 
878 			snprintf(path, sizeof(path), "%s", get_ras_sysfs_root());
879 			strncat(path, ras_block_str(j), sizeof(path) -  strlen(path));
880 			strncat(path, "_err_count", sizeof(path) - strlen(path));
881 
882 			ret = is_file_ok(path, O_RDONLY);
883 			CU_ASSERT_EQUAL(ret, 0);
884 
885 			snprintf(path, sizeof(path), "%s", get_ras_debugfs_root());
886 			strncat(path, ras_block_str(j), sizeof(path) - strlen(path));
887 			strncat(path, "_err_inject", sizeof(path) - strlen(path));
888 
889 			ret = is_file_ok(path, O_WRONLY);
890 			CU_ASSERT_EQUAL(ret, 0);
891 		}
892 	}
893 }
894 
895 CU_TestInfo ras_tests[] = {
896 	{ "ras basic test",	amdgpu_ras_basic_test },
897 	{ "ras query test",	amdgpu_ras_query_test },
898 	{ "ras inject test",	amdgpu_ras_inject_test },
899 	{ "ras disable test",	amdgpu_ras_disable_test },
900 	{ "ras enable test",	amdgpu_ras_enable_test },
901 	CU_TEST_INFO_NULL,
902 };
903 
suite_ras_tests_enable(void)904 CU_BOOL suite_ras_tests_enable(void)
905 {
906 	amdgpu_device_handle device_handle;
907 	uint32_t  major_version;
908 	uint32_t  minor_version;
909 	int i;
910 	drmDevicePtr device;
911 
912 	for (i = 0; i < MAX_CARDS_SUPPORTED && drm_amdgpu[i] >= 0; i++) {
913 		if (amdgpu_device_initialize(drm_amdgpu[i], &major_version,
914 					&minor_version, &device_handle))
915 			continue;
916 
917 		if (drmGetDevice2(drm_amdgpu[i],
918 					DRM_DEVICE_GET_PCI_REVISION,
919 					&device))
920 			continue;
921 
922 		if (device->bustype == DRM_BUS_PCI &&
923 				amdgpu_ras_lookup_capability(device_handle)) {
924 			amdgpu_device_deinitialize(device_handle);
925 			return CU_TRUE;
926 		}
927 
928 		if (amdgpu_device_deinitialize(device_handle))
929 			continue;
930 	}
931 
932 	return CU_FALSE;
933 }
934 
suite_ras_tests_init(void)935 int suite_ras_tests_init(void)
936 {
937 	drmDevicePtr device;
938 	amdgpu_device_handle device_handle;
939 	uint32_t  major_version;
940 	uint32_t  minor_version;
941 	uint32_t  capability;
942 	struct ras_test_mask test_mask;
943 	int id;
944 	int i;
945 	int r;
946 
947 	for (i = 0; i < MAX_CARDS_SUPPORTED && drm_amdgpu[i] >= 0; i++) {
948 		r = amdgpu_device_initialize(drm_amdgpu[i], &major_version,
949 				&minor_version, &device_handle);
950 		if (r)
951 			continue;
952 
953 		if (drmGetDevice2(drm_amdgpu[i],
954 					DRM_DEVICE_GET_PCI_REVISION,
955 					&device)) {
956 			amdgpu_device_deinitialize(device_handle);
957 			continue;
958 		}
959 
960 		if (device->bustype != DRM_BUS_PCI) {
961 			amdgpu_device_deinitialize(device_handle);
962 			continue;
963 		}
964 
965 		capability = amdgpu_ras_lookup_capability(device_handle);
966 		if (capability == 0) {
967 			amdgpu_device_deinitialize(device_handle);
968 			continue;
969 
970 		}
971 
972 		id = amdgpu_ras_lookup_id(device);
973 		if (id == -1) {
974 			amdgpu_device_deinitialize(device_handle);
975 			continue;
976 		}
977 
978 		test_mask = amdgpu_ras_get_test_mask(device);
979 
980 		devices[devices_count++] = (struct amdgpu_ras_data) {
981 			device_handle, id, capability, test_mask,
982 		};
983 	}
984 
985 	if (devices_count == 0)
986 		return CUE_SINIT_FAILED;
987 
988 	return CUE_SUCCESS;
989 }
990 
suite_ras_tests_clean(void)991 int suite_ras_tests_clean(void)
992 {
993 	int r;
994 	int i;
995 	int ret = CUE_SUCCESS;
996 
997 	for (i = 0; i < devices_count; i++) {
998 		r = amdgpu_device_deinitialize(devices[i].device_handle);
999 		if (r)
1000 			ret = CUE_SCLEAN_FAILED;
1001 	}
1002 	return ret;
1003 }
1004