1 // Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 /*
6  * The purpose of this test is to exercise the GPU failure path.
7  * We craft an erroneous GPU command packet and send it to the GPU,
8  * and wait for a udev event notifying us of a GPU hang.
9  * If the event doesn't come back, the test fails.
10  *
11  * This test must run with ui stopped.
12  */
13 
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <unistd.h>
17 
OUTPUT_INFO(char * msg)18 void OUTPUT_INFO(char *msg) {
19 	printf("INFO: %s\n", msg);
20 	fflush(0);
21 }
OUTPUT_WARNING(char * msg)22 void OUTPUT_WARNING(char *msg) {
23 	printf("WARNING: %s\n", msg);
24 	fflush(0);
25 }
OUTPUT_ERROR(char * msg)26 void OUTPUT_ERROR(char *msg) {
27 	printf("ERROR: %s\n", msg);
28 	fflush(0);
29 }
OUTPUT_RUN()30 void OUTPUT_RUN() {
31 	printf("[ RUN      ] graphics_GpuReset\n");
32 	fflush(0);
33 }
EXIT(int code)34 void EXIT(int code) {
35 	// Sleep a bit. This is not strictly required but will avoid the case where
36 	// we call the test back to back and the kernel thinks the GPU is toast.
37 	OUTPUT_INFO("sleep(10) to prevent the kernel from thinking the GPU is completely locked.");
38 	sleep(10);
39 	exit(code);
40 }
OUTPUT_PASS_AND_EXIT()41 void OUTPUT_PASS_AND_EXIT() {
42 	printf("[       OK ] graphics_GpuReset\n");
43 	fflush(0);
44 	EXIT(0);
45 }
OUTPUT_FAIL_AND_EXIT(char * msg)46 void OUTPUT_FAIL_AND_EXIT(char *msg) {
47 	printf("[  FAILED  ] graphics_GpuReset %s\n", msg);
48 	fflush(0);
49 	EXIT(-1);
50 }
51 
52 #if !defined(__INTEL_GPU__)
53 
54 #pragma message "Compiling for GPU other than Intel."
55 
main(int argc,char ** argv)56 int main(int argc, char **argv)
57 {
58 	OUTPUT_RUN();
59 	OUTPUT_WARNING("The gpureset test is defined for some Intel GPUs only.");
60 	OUTPUT_PASS_AND_EXIT();
61 	return 0;
62 }
63 
64 #else
65 
66 #pragma message "Compiling for Intel GPU."
67 
68 #include <assert.h>
69 #include <errno.h>
70 #include <fcntl.h>
71 #include <fnmatch.h>
72 #define LIBUDEV_I_KNOW_THE_API_IS_SUBJECT_TO_CHANGE
73 #include <libudev.h>
74 #include <stdbool.h>
75 #include <string.h>
76 #include <sys/ioctl.h>
77 #include <sys/types.h>
78 #include <sys/select.h>
79 #include <sys/stat.h>
80 
81 #include "xf86drm.h"
82 #include "i915_drm.h"
83 #include "intel_bufmgr.h"
84 
85 #define DRM_TEST_MASTER 0x01
86 
87 
is_master(int fd)88 static int is_master(int fd)
89 {
90 	drm_client_t client;
91 	int ret;
92 
93 	/* Check that we're the only opener and authed. */
94 	client.idx = 0;
95 	ret = ioctl(fd, DRM_IOCTL_GET_CLIENT, &client);
96 	assert (ret == 0);
97 	if (!client.auth)
98 		return 0;
99 	client.idx = 1;
100 	ret = ioctl(fd, DRM_IOCTL_GET_CLIENT, &client);
101 	if (ret != -1 || errno != EINVAL)
102 		return 0;
103 
104 	return 1;
105 }
106 
107 /** Open the first DRM device matching the criteria. */
drm_open_matching(const char * pci_glob,int flags)108 int drm_open_matching(const char *pci_glob, int flags)
109 {
110 	struct udev *udev;
111 	struct udev_enumerate *e;
112 	struct udev_device *device, *parent;
113         struct udev_list_entry *entry;
114 	const char *pci_id, *path;
115 	const char *usub, *dnode;
116 	int fd;
117 
118 	udev = udev_new();
119 	if (udev == NULL)
120 		return -1;
121 
122 	fd = -1;
123 	e = udev_enumerate_new(udev);
124 	udev_enumerate_add_match_subsystem(e, "drm");
125         udev_enumerate_scan_devices(e);
126         udev_list_entry_foreach(entry, udev_enumerate_get_list_entry(e)) {
127 		path = udev_list_entry_get_name(entry);
128 		device = udev_device_new_from_syspath(udev, path);
129 		parent = udev_device_get_parent(device);
130 		usub = udev_device_get_subsystem(parent);
131 		/* Filter out KMS output devices. */
132 		if (!usub || (strcmp(usub, "pci") != 0))
133 			continue;
134 		pci_id = udev_device_get_property_value(parent, "PCI_ID");
135 		if (fnmatch(pci_glob, pci_id, 0) != 0)
136 			continue;
137 		dnode = udev_device_get_devnode(device);
138 		if (strstr(dnode, "control"))
139 			continue;
140 		fd = open(dnode, O_RDWR);
141 		if (fd < 0)
142 			continue;
143 		if ((flags & DRM_TEST_MASTER) && !is_master(fd)) {
144 			close(fd);
145 			fd = -1;
146 			continue;
147 		}
148 
149 		break;
150 	}
151         udev_enumerate_unref(e);
152 	udev_unref(udev);
153 
154 	return fd;
155 }
156 
udev_init()157 struct udev_monitor* udev_init()
158 {
159 	char* subsystem = "drm";
160 	struct udev* udev;
161 	// Create the udev object.
162 	udev = udev_new();
163 	if (!udev) {
164 		OUTPUT_ERROR("Can't get create udev object.");
165 		return NULL;
166 	}
167 
168 	// Create the udev monitor structure.
169 	struct udev_monitor* monitor = udev_monitor_new_from_netlink(udev, "udev");
170 	if (!monitor) {
171 		OUTPUT_ERROR("Can't get create udev monitor");
172 		udev_unref(udev);
173 		return NULL;
174 	}
175 
176 	udev_monitor_filter_add_match_subsystem_devtype(monitor,
177 			subsystem,
178 			NULL);
179 	udev_monitor_enable_receiving(monitor);
180 
181 	return monitor;
182 }
183 
udev_wait(struct udev_monitor * monitor)184 int udev_wait(struct udev_monitor* monitor)
185 {
186 	fd_set fds;
187 	struct timeval tv;
188 	int ret;
189 
190 	int fd = udev_monitor_get_fd(monitor);
191 
192 	FD_ZERO(&fds);
193 	FD_SET(fd, &fds);
194 
195 	// Wait for at most 20 seconds for the event to come back.
196 	tv.tv_sec = 20;
197 	tv.tv_usec = 0;
198 
199 	ret = select(fd+1, &fds, NULL, NULL, &tv);
200 
201 	if (ret>0)
202 	{
203 		struct udev_device* dev = udev_monitor_receive_device(monitor);
204 		if (dev) {
205 		  // TODO(ihf): variable args to INFO function.
206 			printf("INFO: Event on (%s|%s|%s) Action %s\n",
207 					udev_device_get_devnode(dev),
208 					udev_device_get_subsystem(dev),
209 					udev_device_get_devtype(dev),
210 					udev_device_get_action(dev));
211 			udev_device_unref(dev);
212 			return 1;
213 		} else {
214 			OUTPUT_ERROR("Can't get receive_device().");
215 			return 0;
216 		}
217 	} else {
218 		OUTPUT_ERROR("Timed out waiting for udev event to come back.");
219 		return 0;
220 	}
221 }
222 
main(int argc,char ** argv)223 int main(int argc, char **argv)
224 {
225 	int fd;
226 	int ret;
227 	drmVersionPtr v;
228 
229 	OUTPUT_RUN();
230 	OUTPUT_INFO("The GPU reset test *must* be run with 'stop ui'.");
231 	OUTPUT_INFO("Otherwise following tests will likely hang/crash the machine.");
232 	OUTPUT_INFO("sleep(10) to make sure UI has time to stop.");
233 	sleep(10);
234 
235 	fd = drm_open_matching("*:*", 0);
236 
237 	if (fd < 0) {
238 		OUTPUT_FAIL_AND_EXIT("Failed to open any drm device.");
239 	}
240 
241 	v = drmGetVersion(fd);
242 	assert(strlen(v->name) != 0);
243 	if (strcmp(v->name, "i915") == 0) {
244 		assert(v->version_major >= 1);
245 	} else {
246 		OUTPUT_WARNING("Can't find Intel GPU.");
247 		OUTPUT_PASS_AND_EXIT();
248 	}
249 
250 	unsigned int pci_id;
251 	struct drm_i915_getparam gp;
252 	gp.param = I915_PARAM_CHIPSET_ID;
253 	gp.value = (int*)&pci_id;
254 	ret = ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp, sizeof(gp));
255 
256 	if (ret) {
257 		OUTPUT_FAIL_AND_EXIT("Can't get the i915 pci_id.");
258 	}
259 
260 	// TODO(ihf): variable args to INFO function.
261 	printf("INFO: i915 pci_id=0x%x.\n", pci_id);
262 	switch(pci_id) {
263 		// sandy bridge
264 		case 0x102:
265                 case 0x106: // Butterfly, Lumpy.
266 		case 0x116:
267 		case 0x126:
268 		// ivy bridge
269                 case 0x156: // Stout.
270                 case 0x166: // Link.
271                 // haswell
272                 case 0xa06: // GT1, Peppy, Falco.
273                 case 0xa16: // GT2.
274                 case 0xa26: // GT3.
275 			break;
276 		default:
277 		{
278 			OUTPUT_WARNING("Intel GPU detected, but model doesn't support reset.");
279 			OUTPUT_PASS_AND_EXIT();
280 		}
281 	}
282 
283 	struct udev_monitor* monitor = udev_init();
284 	if (!monitor) {
285 		OUTPUT_FAIL_AND_EXIT("udev init failed.");
286 	}
287 
288 	drm_intel_bufmgr* bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
289 
290 	drm_intel_bo* bo;
291 	bo = drm_intel_bo_alloc(bufmgr, "bogus cmdbuffer", 4096, 4096);
292 
293 	uint32_t invalid_buf[8] =
294 	{
295 		0x00000000, // NOOP
296 		0xd00dd00d, // invalid command
297 		0x00000000, // NOOP
298 		0x00000000, // NOOP
299 		0x05000000, // BATCHBUFFER_END
300 		0x05000000, // BATCHBUFFER_END
301 		0x05000000, // BATCHBUFFER_END
302 		0x05000000, // BATCHBUFFER_END
303 	};
304 
305 	// Copy our invalid cmd buffer into the bo.
306 	ret = drm_intel_bo_subdata(bo, 0, sizeof(invalid_buf), invalid_buf);
307 	if (ret != 0) {
308 		OUTPUT_FAIL_AND_EXIT("bo_subdata failed.");
309 	}
310 
311 	// Submit our invalid buffer.
312 	ret = drm_intel_bo_exec(bo, sizeof(invalid_buf), NULL, 0, 0);
313 	if (ret != 0) {
314 		OUTPUT_FAIL_AND_EXIT("bo_exec failed.");
315 	}
316 	OUTPUT_INFO("Sent bogus buffer, waiting for event.");
317 	// Submit our invalid buffer.
318 	drm_intel_bo_wait_rendering(bo);
319 
320 	int res = udev_wait(monitor);
321 
322 	drmFree(v);
323 	close(fd);
324 
325 	if (res) {
326 		OUTPUT_PASS_AND_EXIT();
327 	}
328 	else {
329 		OUTPUT_FAIL_AND_EXIT("GPU reset event did not come back.");
330 	}
331 
332 	return 0;
333 }
334 
335 #endif // defined(__arm__) ||  !defined(__INTEL_GPU__)
336