1// Copyright 2015 syzkaller project authors. All rights reserved.
2// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
3
4package host
5
6import (
7	"bytes"
8	"fmt"
9	"io/ioutil"
10	"os"
11	"os/exec"
12	"runtime"
13	"strconv"
14	"strings"
15	"sync"
16	"syscall"
17	"time"
18	"unsafe"
19
20	"github.com/google/syzkaller/pkg/osutil"
21	"github.com/google/syzkaller/prog"
22	"github.com/google/syzkaller/sys/linux"
23)
24
25func isSupported(c *prog.Syscall, sandbox string) (bool, string) {
26	if strings.HasPrefix(c.CallName, "syz_") {
27		return isSupportedSyzkall(sandbox, c)
28	}
29	if strings.HasPrefix(c.Name, "socket$") ||
30		strings.HasPrefix(c.Name, "socketpair$") {
31		return isSupportedSocket(c)
32	}
33	if strings.HasPrefix(c.Name, "openat$") {
34		return isSupportedOpenAt(c)
35	}
36	if strings.HasPrefix(c.Name, "mount$") {
37		return isSupportedMount(c, sandbox)
38	}
39	// There are 3 possible strategies for detecting supported syscalls:
40	// 1. Executes all syscalls with presumably invalid arguments and check for ENOprog.
41	//    But not all syscalls are safe to execute. For example, pause will hang,
42	//    while setpgrp will push the process into own process group.
43	// 2. Check presence of /sys/kernel/debug/tracing/events/syscalls/sys_enter_* files.
44	//    This requires root and CONFIG_FTRACE_SYSCALLS. Also it lies for some syscalls.
45	//    For example, on x86_64 it says that sendfile is not present (only sendfile64).
46	// 3. Check sys_syscallname in /proc/kallsyms.
47	//    Requires CONFIG_KALLSYMS.
48	// Kallsyms seems to be the most reliable and fast. That's what we use first.
49	// If kallsyms is not present, we fallback to execution of syscalls.
50	kallsymsOnce.Do(func() {
51		kallsyms, _ = ioutil.ReadFile("/proc/kallsyms")
52	})
53	if !testFallback && len(kallsyms) != 0 {
54		return isSupportedKallsyms(c)
55	}
56	return isSupportedTrial(c)
57}
58
59func isSupportedKallsyms(c *prog.Syscall) (bool, string) {
60	name := c.CallName
61	if newname := kallsymsMap[name]; newname != "" {
62		name = newname
63	}
64	if !bytes.Contains(kallsyms, []byte(" T sys_"+name+"\n")) &&
65		!bytes.Contains(kallsyms, []byte(" T ksys_"+name+"\n")) &&
66		!bytes.Contains(kallsyms, []byte(" T __ia32_sys_"+name+"\n")) &&
67		!bytes.Contains(kallsyms, []byte(" T __x64_sys_"+name+"\n")) {
68		return false, fmt.Sprintf("sys_%v is not present in /proc/kallsyms", name)
69	}
70	return true, ""
71}
72
73func isSupportedTrial(c *prog.Syscall) (bool, string) {
74	switch c.CallName {
75	// These known to cause hangs.
76	case "exit", "pause":
77		return true, ""
78	}
79	trialMu.Lock()
80	defer trialMu.Unlock()
81	if res, ok := trialSupported[c.NR]; ok {
82		return res, "ENOSYS"
83	}
84	cmd := osutil.Command(os.Args[0])
85	cmd.Env = []string{fmt.Sprintf("SYZ_TRIAL_TEST=%v", c.NR)}
86	_, err := osutil.Run(10*time.Second, cmd)
87	res := err != nil
88	trialSupported[c.NR] = res
89	return res, "ENOSYS"
90}
91
92func init() {
93	str := os.Getenv("SYZ_TRIAL_TEST")
94	if str == "" {
95		return
96	}
97	nr, err := strconv.Atoi(str)
98	if err != nil {
99		panic(err)
100	}
101	arg := ^uintptr(0) - 1e4 // something as invalid as possible
102	_, _, err = syscall.Syscall6(uintptr(nr), arg, arg, arg, arg, arg, arg)
103	if err == syscall.ENOSYS {
104		os.Exit(0)
105	}
106	os.Exit(1)
107}
108
109// Some syscall names diverge in __NR_* consts and kallsyms.
110// umount2 is renamed to umount in arch/x86/entry/syscalls/syscall_64.tbl.
111// Where umount is renamed to oldumount is unclear.
112var (
113	kallsyms     []byte
114	kallsymsOnce sync.Once
115	kallsymsMap  = map[string]string{
116		"umount":  "oldumount",
117		"umount2": "umount",
118	}
119	trialMu         sync.Mutex
120	trialSupported  = make(map[uint64]bool)
121	filesystems     []byte
122	filesystemsOnce sync.Once
123)
124
125// The function is lengthy as it handles all pseudo-syscalls,
126// but it does not seem to cause comprehension problems as there is no shared state.
127// Splitting this per-syscall will only increase code size.
128// nolint: gocyclo
129func isSupportedSyzkall(sandbox string, c *prog.Syscall) (bool, string) {
130	switch c.CallName {
131	case "syz_open_dev":
132		if _, ok := c.Args[0].(*prog.ConstType); ok {
133			// This is for syz_open_dev$char/block.
134			// They are currently commented out, but in case one enables them.
135			return true, ""
136		}
137		fname, ok := extractStringConst(c.Args[0])
138		if !ok {
139			panic("first open arg is not a pointer to string const")
140		}
141		var check func(dev string) bool
142		check = func(dev string) bool {
143			if !strings.Contains(dev, "#") {
144				// Note: don't try to open them all, some can hang (e.g. /dev/snd/pcmC#D#p).
145				return osutil.IsExist(dev)
146			}
147			for i := 0; i < 10; i++ {
148				if check(strings.Replace(dev, "#", strconv.Itoa(i), 1)) {
149					return true
150				}
151			}
152			return false
153		}
154		if !check(fname) {
155			return false, fmt.Sprintf("file %v does not exist", fname)
156		}
157		return onlySandboxNoneOrNamespace(sandbox)
158	case "syz_open_procfs":
159		return true, ""
160	case "syz_open_pts":
161		return true, ""
162	case "syz_emit_ethernet", "syz_extract_tcp_res":
163		reason := checkNetworkInjection()
164		return reason == "", reason
165	case "syz_kvm_setup_cpu":
166		switch c.Name {
167		case "syz_kvm_setup_cpu$x86":
168			if runtime.GOARCH == "amd64" || runtime.GOARCH == "386" {
169				return true, ""
170			}
171		case "syz_kvm_setup_cpu$arm64":
172			if runtime.GOARCH == "arm64" {
173				return true, ""
174			}
175		}
176		return false, "unsupported arch"
177	case "syz_init_net_socket":
178		// Unfortunately this only works with sandbox none at the moment.
179		// The problem is that setns of a network namespace requires CAP_SYS_ADMIN
180		// in the target namespace, and we've lost all privs in the init namespace
181		// during creation of a user namespace.
182		if ok, reason := onlySandboxNone(sandbox); !ok {
183			return false, reason
184		}
185		return isSupportedSocket(c)
186	case "syz_genetlink_get_family_id":
187		fd, err := syscall.Socket(syscall.AF_NETLINK, syscall.SOCK_RAW, syscall.NETLINK_GENERIC)
188		if fd == -1 {
189			return false, fmt.Sprintf("socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC) failed: %v", err)
190		}
191		syscall.Close(fd)
192		return true, ""
193	case "syz_mount_image":
194		if ok, reason := onlySandboxNone(sandbox); !ok {
195			return ok, reason
196		}
197		fstype, ok := extractStringConst(c.Args[0])
198		if !ok {
199			panic("syz_mount_image arg is not string")
200		}
201		return isSupportedFilesystem(fstype)
202	case "syz_read_part_table":
203		return onlySandboxNone(sandbox)
204	}
205	panic("unknown syzkall: " + c.Name)
206}
207
208func onlySandboxNone(sandbox string) (bool, string) {
209	if syscall.Getuid() != 0 || sandbox != "none" {
210		return false, "only supported under root with sandbox=none"
211	}
212	return true, ""
213}
214
215func onlySandboxNoneOrNamespace(sandbox string) (bool, string) {
216	if syscall.Getuid() != 0 || sandbox == "setuid" {
217		return false, "only supported under root with sandbox=none/namespace"
218	}
219	return true, ""
220}
221
222func isSupportedSocket(c *prog.Syscall) (bool, string) {
223	af, ok := c.Args[0].(*prog.ConstType)
224	if !ok {
225		panic("socket family is not const")
226	}
227	fd, err := syscall.Socket(int(af.Val), 0, 0)
228	if fd != -1 {
229		syscall.Close(fd)
230	}
231	if err == syscall.ENOSYS {
232		return false, "socket syscall returns ENOSYS"
233	}
234	if err == syscall.EAFNOSUPPORT {
235		return false, "socket family is not supported (EAFNOSUPPORT)"
236	}
237	proto, ok := c.Args[2].(*prog.ConstType)
238	if !ok {
239		return true, ""
240	}
241	var typ uint64
242	if arg, ok := c.Args[1].(*prog.ConstType); ok {
243		typ = arg.Val
244	} else if arg, ok := c.Args[1].(*prog.FlagsType); ok {
245		typ = arg.Vals[0]
246	} else {
247		return true, ""
248	}
249	fd, err = syscall.Socket(int(af.Val), int(typ), int(proto.Val))
250	if fd != -1 {
251		syscall.Close(fd)
252		return true, ""
253	}
254	return false, err.Error()
255}
256
257func isSupportedOpenAt(c *prog.Syscall) (bool, string) {
258	fname, ok := extractStringConst(c.Args[1])
259	if !ok || len(fname) == 0 || fname[0] != '/' {
260		return true, ""
261	}
262	fd, err := syscall.Open(fname, syscall.O_RDONLY, 0)
263	if fd != -1 {
264		syscall.Close(fd)
265	}
266	if err != nil {
267		return false, fmt.Sprintf("open(%v) failed: %v", fname, err)
268	}
269	return true, ""
270}
271
272func isSupportedMount(c *prog.Syscall, sandbox string) (bool, string) {
273	fstype, ok := extractStringConst(c.Args[2])
274	if !ok {
275		panic(fmt.Sprintf("%v: filesystem is not string const", c.Name))
276	}
277	if ok, reason := isSupportedFilesystem(fstype); !ok {
278		return ok, reason
279	}
280	switch fstype {
281	case "fuse", "fuseblk":
282		if err := osutil.IsAccessible("/dev/fuse"); err != nil {
283			return false, err.Error()
284		}
285		return onlySandboxNoneOrNamespace(sandbox)
286	default:
287		return onlySandboxNone(sandbox)
288	}
289}
290
291func isSupportedFilesystem(fstype string) (bool, string) {
292	filesystemsOnce.Do(func() {
293		filesystems, _ = ioutil.ReadFile("/proc/filesystems")
294	})
295	if !bytes.Contains(filesystems, []byte("\t"+fstype+"\n")) {
296		return false, fmt.Sprintf("/proc/filesystems does not contain %v", fstype)
297	}
298	return true, ""
299}
300
301func extractStringConst(typ prog.Type) (string, bool) {
302	ptr, ok := typ.(*prog.PtrType)
303	if !ok {
304		panic("first open arg is not a pointer to string const")
305	}
306	str, ok := ptr.Type.(*prog.BufferType)
307	if !ok || str.Kind != prog.BufferString || len(str.Values) == 0 {
308		return "", false
309	}
310	v := str.Values[0]
311	for len(v) != 0 && v[len(v)-1] == 0 {
312		v = v[:len(v)-1] // string terminating \x00
313	}
314	return v, true
315}
316
317func init() {
318	checkFeature[FeatureCoverage] = checkCoverage
319	checkFeature[FeatureComparisons] = checkComparisons
320	checkFeature[FeatureSandboxSetuid] = unconditionallyEnabled
321	checkFeature[FeatureSandboxNamespace] = checkSandboxNamespace
322	checkFeature[FeatureFaultInjection] = checkFaultInjection
323	setupFeature[FeatureFaultInjection] = setupFaultInjection
324	checkFeature[FeatureLeakChecking] = checkLeakChecking
325	setupFeature[FeatureLeakChecking] = setupLeakChecking
326	callbFeature[FeatureLeakChecking] = callbackLeakChecking
327	checkFeature[FeatureNetworkInjection] = checkNetworkInjection
328	checkFeature[FeatureNetworkDevices] = checkNetworkDevices
329}
330
331func checkCoverage() string {
332	if reason := checkDebugFS(); reason != "" {
333		return reason
334	}
335	if !osutil.IsExist("/sys/kernel/debug/kcov") {
336		return "CONFIG_KCOV is not enabled"
337	}
338	if err := osutil.IsAccessible("/sys/kernel/debug/kcov"); err != nil {
339		return err.Error()
340	}
341	return ""
342}
343
344func checkComparisons() (reason string) {
345	if reason = checkDebugFS(); reason != "" {
346		return reason
347	}
348	// TODO(dvyukov): this should run under target arch.
349	// E.g. KCOV ioctls were initially not supported on 386 (missing compat_ioctl),
350	// and a 386 executor won't be able to use them, but an amd64 fuzzer will be.
351	fd, err := syscall.Open("/sys/kernel/debug/kcov", syscall.O_RDWR, 0)
352	if err != nil {
353		return "CONFIG_KCOV is not enabled"
354	}
355	defer syscall.Close(fd)
356	// Trigger host target lazy initialization, it will fill linux.KCOV_INIT_TRACE.
357	// It's all wrong and needs to be refactored.
358	if _, err := prog.GetTarget(runtime.GOOS, runtime.GOARCH); err != nil {
359		return fmt.Sprintf("failed to get target: %v", err)
360	}
361	coverSize := uintptr(64 << 10)
362	_, _, errno := syscall.Syscall(
363		syscall.SYS_IOCTL, uintptr(fd), linux.KCOV_INIT_TRACE, coverSize)
364	if errno != 0 {
365		return fmt.Sprintf("ioctl(KCOV_INIT_TRACE) failed: %v", errno)
366	}
367	mem, err := syscall.Mmap(fd, 0, int(coverSize*unsafe.Sizeof(uintptr(0))),
368		syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED)
369	if err != nil {
370		return fmt.Sprintf("KCOV mmap failed: %v", err)
371	}
372	defer func() {
373		if err := syscall.Munmap(mem); err != nil {
374			reason = fmt.Sprintf("munmap failed: %v", err)
375		}
376	}()
377	_, _, errno = syscall.Syscall(syscall.SYS_IOCTL,
378		uintptr(fd), linux.KCOV_ENABLE, linux.KCOV_TRACE_CMP)
379	if errno != 0 {
380		if errno == 524 { // ENOTSUPP
381			return "CONFIG_KCOV_ENABLE_COMPARISONS is not enabled"
382		}
383		return fmt.Sprintf("ioctl(KCOV_TRACE_CMP) failed: %v", errno)
384	}
385	defer func() {
386		_, _, errno = syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), linux.KCOV_DISABLE, 0)
387		if errno != 0 {
388			reason = fmt.Sprintf("ioctl(KCOV_DISABLE) failed: %v", errno)
389		}
390	}()
391	return ""
392}
393
394func checkFaultInjection() string {
395	if err := osutil.IsAccessible("/proc/self/make-it-fail"); err != nil {
396		return "CONFIG_FAULT_INJECTION is not enabled"
397	}
398	if err := osutil.IsAccessible("/proc/thread-self/fail-nth"); err != nil {
399		return "kernel does not have systematic fault injection support"
400	}
401	if reason := checkDebugFS(); reason != "" {
402		return reason
403	}
404	if err := osutil.IsAccessible("/sys/kernel/debug/failslab/ignore-gfp-wait"); err != nil {
405		return "CONFIG_FAULT_INJECTION_DEBUG_FS is not enabled"
406	}
407	return ""
408}
409
410func setupFaultInjection() error {
411	if err := osutil.WriteFile("/sys/kernel/debug/failslab/ignore-gfp-wait", []byte("N")); err != nil {
412		return fmt.Errorf("failed to write /failslab/ignore-gfp-wait: %v", err)
413	}
414	if err := osutil.WriteFile("/sys/kernel/debug/fail_futex/ignore-private", []byte("N")); err != nil {
415		return fmt.Errorf("failed to write /fail_futex/ignore-private: %v", err)
416	}
417	if err := osutil.WriteFile("/sys/kernel/debug/fail_page_alloc/ignore-gfp-highmem", []byte("N")); err != nil {
418		return fmt.Errorf("failed to write /fail_page_alloc/ignore-gfp-highmem: %v", err)
419	}
420	if err := osutil.WriteFile("/sys/kernel/debug/fail_page_alloc/ignore-gfp-wait", []byte("N")); err != nil {
421		return fmt.Errorf("failed to write /fail_page_alloc/ignore-gfp-wait: %v", err)
422	}
423	if err := osutil.WriteFile("/sys/kernel/debug/fail_page_alloc/min-order", []byte("0")); err != nil {
424		return fmt.Errorf("failed to write /fail_page_alloc/min-order: %v", err)
425	}
426	return nil
427}
428
429func checkLeakChecking() string {
430	if reason := checkDebugFS(); reason != "" {
431		return reason
432	}
433	if err := osutil.IsAccessible("/sys/kernel/debug/kmemleak"); err != nil {
434		return "CONFIG_DEBUG_KMEMLEAK is not enabled"
435	}
436	return ""
437}
438
439func setupLeakChecking() error {
440	fd, err := syscall.Open("/sys/kernel/debug/kmemleak", syscall.O_RDWR, 0)
441	if err != nil {
442		return fmt.Errorf("failed to open /sys/kernel/debug/kmemleak: %v", err)
443	}
444	defer syscall.Close(fd)
445	if _, err := syscall.Write(fd, []byte("scan=off")); err != nil {
446		// kmemleak returns EBUSY when kmemleak is already turned off.
447		if err != syscall.EBUSY {
448			return fmt.Errorf("write(kmemleak, scan=off) failed: %v", err)
449		}
450	}
451	// Flush boot leaks.
452	if _, err := syscall.Write(fd, []byte("scan")); err != nil {
453		return fmt.Errorf("write(kmemleak, scan) failed: %v", err)
454	}
455	time.Sleep(5 * time.Second) // account for MSECS_MIN_AGE
456	if _, err := syscall.Write(fd, []byte("scan")); err != nil {
457		return fmt.Errorf("write(kmemleak, scan) failed: %v", err)
458	}
459	if _, err := syscall.Write(fd, []byte("clear")); err != nil {
460		return fmt.Errorf("write(kmemleak, clear) failed: %v", err)
461	}
462	return nil
463}
464
465func callbackLeakChecking() {
466	start := time.Now()
467	fd, err := syscall.Open("/sys/kernel/debug/kmemleak", syscall.O_RDWR, 0)
468	if err != nil {
469		panic(err)
470	}
471	defer syscall.Close(fd)
472	// KMEMLEAK has false positives. To mitigate most of them, it checksums
473	// potentially leaked objects, and reports them only on the next scan
474	// iff the checksum does not change. Because of that we do the following
475	// intricate dance:
476	// Scan, sleep, scan again. At this point we can get some leaks.
477	// If there are leaks, we sleep and scan again, this can remove
478	// false leaks. Then, read kmemleak again. If we get leaks now, then
479	// hopefully these are true positives during the previous testing cycle.
480	if _, err := syscall.Write(fd, []byte("scan")); err != nil {
481		panic(err)
482	}
483	time.Sleep(time.Second)
484	// Account for MSECS_MIN_AGE
485	// (1 second less because scanning will take at least a second).
486	for time.Since(start) < 4*time.Second {
487		time.Sleep(time.Second)
488	}
489	if _, err := syscall.Write(fd, []byte("scan")); err != nil {
490		panic(err)
491	}
492	buf := make([]byte, 128<<10)
493	n, err := syscall.Read(fd, buf)
494	if err != nil {
495		panic(err)
496	}
497	if n != 0 {
498		time.Sleep(time.Second)
499		if _, err := syscall.Write(fd, []byte("scan")); err != nil {
500			panic(err)
501		}
502		n, err := syscall.Read(fd, buf)
503		if err != nil {
504			panic(err)
505		}
506		nleaks := 0
507		for buf = buf[:n]; len(buf) != 0; {
508			end := bytes.Index(buf[1:], []byte("unreferenced object"))
509			if end != -1 {
510				end++
511			} else {
512				end = len(buf)
513			}
514			report := buf[:end]
515			buf = buf[end:]
516			if kmemleakIgnore(report) {
517				continue
518			}
519			// BUG in output should be recognized by manager.
520			fmt.Printf("BUG: memory leak\n%s\n", report)
521			nleaks++
522		}
523		if nleaks != 0 {
524			os.Exit(1)
525		}
526	}
527	if _, err := syscall.Write(fd, []byte("clear")); err != nil {
528		panic(err)
529	}
530}
531
532func kmemleakIgnore(report []byte) bool {
533	// kmemleak has a bunch of false positives (at least what looks like
534	// false positives at first glance). So we are conservative with what we report.
535	// First, we filter out any allocations that don't come from executor processes.
536	// Second, we ignore a bunch of functions entirely.
537	// Ideally, someone should debug/fix all these cases and remove ignores.
538	if !bytes.Contains(report, []byte(`comm "syz-executor`)) {
539		return true
540	}
541	for _, ignore := range []string{
542		" copy_process",
543		" do_execveat_common",
544		" __ext4_",
545		" get_empty_filp",
546		" do_filp_open",
547		" new_inode",
548	} {
549		if bytes.Contains(report, []byte(ignore)) {
550			return true
551		}
552	}
553	return false
554}
555
556func checkSandboxNamespace() string {
557	if err := osutil.IsAccessible("/proc/self/ns/user"); err != nil {
558		return err.Error()
559	}
560	return ""
561}
562
563func checkNetworkInjection() string {
564	if err := osutil.IsAccessible("/dev/net/tun"); err != nil {
565		return err.Error()
566	}
567	return checkNetworkDevices()
568}
569
570func checkNetworkDevices() string {
571	if _, err := exec.LookPath("ip"); err != nil {
572		return "ip command is not found"
573	}
574	return ""
575}
576
577func checkDebugFS() string {
578	if err := osutil.IsAccessible("/sys/kernel/debug"); err != nil {
579		return "debugfs is not enabled or not mounted"
580	}
581	return ""
582}
583