1// Copyright 2017 syzkaller project authors. All rights reserved.
2// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
3
4package main
5
6import (
7	"fmt"
8	"io/ioutil"
9	"os"
10	"path/filepath"
11	"time"
12
13	"github.com/google/syzkaller/dashboard/dashapi"
14	"github.com/google/syzkaller/pkg/build"
15	"github.com/google/syzkaller/pkg/instance"
16	"github.com/google/syzkaller/pkg/log"
17	"github.com/google/syzkaller/pkg/mgrconfig"
18	"github.com/google/syzkaller/pkg/osutil"
19	"github.com/google/syzkaller/pkg/vcs"
20)
21
22type JobProcessor struct {
23	name            string
24	managers        []*Manager
25	dash            *dashapi.Dashboard
26	syzkallerRepo   string
27	syzkallerBranch string
28}
29
30func newJobProcessor(cfg *Config, managers []*Manager) *JobProcessor {
31	jp := &JobProcessor{
32		name:            fmt.Sprintf("%v-job", cfg.Name),
33		managers:        managers,
34		syzkallerRepo:   cfg.SyzkallerRepo,
35		syzkallerBranch: cfg.SyzkallerBranch,
36	}
37	if cfg.DashboardAddr != "" && cfg.DashboardClient != "" {
38		jp.dash = dashapi.New(cfg.DashboardClient, cfg.DashboardAddr, cfg.DashboardKey)
39	}
40	return jp
41}
42
43func (jp *JobProcessor) loop(stop chan struct{}) {
44	if jp.dash == nil {
45		return
46	}
47	ticker := time.NewTicker(time.Minute)
48	defer ticker.Stop()
49	for {
50		select {
51		case <-ticker.C:
52			jp.poll()
53		case <-stop:
54			log.Logf(0, "job loop stopped")
55			return
56		}
57	}
58}
59
60func (jp *JobProcessor) poll() {
61	var names []string
62	for _, mgr := range jp.managers {
63		names = append(names, mgr.name)
64	}
65	req, err := jp.dash.JobPoll(names)
66	if err != nil {
67		jp.Errorf("failed to poll jobs: %v", err)
68		return
69	}
70	if req.ID == "" {
71		return
72	}
73	var mgr *Manager
74	for _, m := range jp.managers {
75		if m.name == req.Manager {
76			mgr = m
77			break
78		}
79	}
80	if mgr == nil {
81		jp.Errorf("got job for unknown manager: %v", req.Manager)
82		return
83	}
84	job := &Job{
85		req: req,
86		mgr: mgr,
87	}
88	log.Logf(0, "starting job %v for manager %v on %v/%v",
89		req.ID, req.Manager, req.KernelRepo, req.KernelBranch)
90	resp := jp.process(job)
91	log.Logf(0, "done job %v: commit %v, crash %q, error: %s",
92		resp.ID, resp.Build.KernelCommit, resp.CrashTitle, resp.Error)
93	if err := jp.dash.JobDone(resp); err != nil {
94		jp.Errorf("failed to mark job as done: %v", err)
95		return
96	}
97}
98
99type Job struct {
100	req  *dashapi.JobPollResp
101	resp *dashapi.JobDoneReq
102	mgr  *Manager
103}
104
105func (jp *JobProcessor) process(job *Job) *dashapi.JobDoneReq {
106	req, mgr := job.req, job.mgr
107	build := dashapi.Build{
108		Manager:         mgr.name,
109		ID:              req.ID,
110		OS:              mgr.managercfg.TargetOS,
111		Arch:            mgr.managercfg.TargetArch,
112		VMArch:          mgr.managercfg.TargetVMArch,
113		CompilerID:      mgr.compilerID,
114		KernelRepo:      req.KernelRepo,
115		KernelBranch:    req.KernelBranch,
116		KernelCommit:    "[unknown]",
117		SyzkallerCommit: "[unknown]",
118	}
119	job.resp = &dashapi.JobDoneReq{
120		ID:    req.ID,
121		Build: build,
122	}
123	required := []struct {
124		name string
125		ok   bool
126	}{
127		{"kernel repository", req.KernelRepo != ""},
128		{"kernel branch", req.KernelBranch != ""},
129		{"kernel config", len(req.KernelConfig) != 0},
130		{"syzkaller commit", req.SyzkallerCommit != ""},
131		{"reproducer options", len(req.ReproOpts) != 0},
132		{"reproducer program", len(req.ReproSyz) != 0},
133	}
134	for _, req := range required {
135		if !req.ok {
136			job.resp.Error = []byte(req.name + " is empty")
137			jp.Errorf("%s", job.resp.Error)
138			return job.resp
139		}
140	}
141	// TODO(dvyukov): this will only work for qemu/gce,
142	// because e.g. adb requires unique device IDs and we can't use what
143	// manager already uses. For qemu/gce this is also bad, because we
144	// override resource limits specified in config (e.g. can OOM), but works.
145	switch typ := mgr.managercfg.Type; typ {
146	case "gce", "qemu":
147	default:
148		job.resp.Error = []byte(fmt.Sprintf("testing is not yet supported for %v machine type.", typ))
149		jp.Errorf("%s", job.resp.Error)
150		return job.resp
151	}
152	if err := jp.test(job); err != nil {
153		job.resp.Error = []byte(err.Error())
154	}
155	return job.resp
156}
157
158func (jp *JobProcessor) test(job *Job) error {
159	kernelBuildSem <- struct{}{}
160	defer func() { <-kernelBuildSem }()
161	req, resp, mgr := job.req, job.resp, job.mgr
162
163	dir := osutil.Abs(filepath.Join("jobs", mgr.managercfg.TargetOS))
164	kernelDir := filepath.Join(dir, "kernel")
165
166	mgrcfg := new(mgrconfig.Config)
167	*mgrcfg = *mgr.managercfg
168	mgrcfg.Name += "-job"
169	mgrcfg.Workdir = filepath.Join(dir, "workdir")
170	mgrcfg.KernelSrc = kernelDir
171	mgrcfg.Syzkaller = filepath.Join(dir, "gopath", "src", "github.com", "google", "syzkaller")
172
173	os.RemoveAll(mgrcfg.Workdir)
174	defer os.RemoveAll(mgrcfg.Workdir)
175
176	env, err := instance.NewEnv(mgrcfg)
177	if err != nil {
178		return err
179	}
180	log.Logf(0, "job: building syzkaller on %v...", req.SyzkallerCommit)
181	resp.Build.SyzkallerCommit = req.SyzkallerCommit
182	if err := env.BuildSyzkaller(jp.syzkallerRepo, req.SyzkallerCommit); err != nil {
183		return err
184	}
185
186	log.Logf(0, "job: fetching kernel...")
187	repo, err := vcs.NewRepo(mgrcfg.TargetOS, mgrcfg.Type, kernelDir)
188	if err != nil {
189		return fmt.Errorf("failed to create kernel repo: %v", err)
190	}
191	var kernelCommit *vcs.Commit
192	if vcs.CheckCommitHash(req.KernelBranch) {
193		kernelCommit, err = repo.CheckoutCommit(req.KernelRepo, req.KernelBranch)
194		if err != nil {
195			return fmt.Errorf("failed to checkout kernel repo %v on commit %v: %v",
196				req.KernelRepo, req.KernelBranch, err)
197		}
198		resp.Build.KernelBranch = ""
199	} else {
200		kernelCommit, err = repo.CheckoutBranch(req.KernelRepo, req.KernelBranch)
201		if err != nil {
202			return fmt.Errorf("failed to checkout kernel repo %v/%v: %v",
203				req.KernelRepo, req.KernelBranch, err)
204		}
205	}
206	resp.Build.KernelCommit = kernelCommit.Hash
207	resp.Build.KernelCommitTitle = kernelCommit.Title
208	resp.Build.KernelCommitDate = kernelCommit.Date
209
210	if err := build.Clean(mgrcfg.TargetOS, mgrcfg.TargetVMArch, mgrcfg.Type, kernelDir); err != nil {
211		return fmt.Errorf("kernel clean failed: %v", err)
212	}
213	if len(req.Patch) != 0 {
214		if err := vcs.Patch(kernelDir, req.Patch); err != nil {
215			return err
216		}
217	}
218
219	log.Logf(0, "job: building kernel...")
220	if err := env.BuildKernel(mgr.mgrcfg.Compiler, mgr.mgrcfg.Userspace, mgr.mgrcfg.KernelCmdline,
221		mgr.mgrcfg.KernelSysctl, req.KernelConfig); err != nil {
222		return err
223	}
224	resp.Build.KernelConfig, err = ioutil.ReadFile(filepath.Join(mgrcfg.KernelSrc, ".config"))
225	if err != nil {
226		return fmt.Errorf("failed to read config file: %v", err)
227	}
228
229	log.Logf(0, "job: testing...")
230	results, err := env.Test(3, req.ReproSyz, req.ReproOpts, req.ReproC)
231	if err != nil {
232		return err
233	}
234	// We can have transient errors and other errors of different types.
235	// We need to avoid reporting transient "failed to boot" or "failed to copy binary" errors.
236	// If any of the instances crash during testing, we report this with the highest priority.
237	// Then if any of the runs succeed, we report that (to avoid transient errors).
238	// If all instances failed to boot, then we report one of these errors.
239	anySuccess := false
240	var anyErr, testErr error
241	for _, res := range results {
242		if res == nil {
243			anySuccess = true
244			continue
245		}
246		anyErr = res
247		switch err := res.(type) {
248		case *instance.TestError:
249			// We should not put rep into resp.CrashTitle/CrashReport,
250			// because that will be treated as patch not fixing the bug.
251			if rep := err.Report; rep != nil {
252				testErr = fmt.Errorf("%v\n\n%s\n\n%s", rep.Title, rep.Report, rep.Output)
253			} else {
254				testErr = fmt.Errorf("%v\n\n%s", err.Title, err.Output)
255			}
256		case *instance.CrashError:
257			resp.CrashTitle = err.Report.Title
258			resp.CrashReport = err.Report.Report
259			resp.CrashLog = err.Report.Output
260			return nil
261		}
262	}
263	if anySuccess {
264		return nil
265	}
266	if testErr != nil {
267		return testErr
268	}
269	return anyErr
270}
271
272// Errorf logs non-fatal error and sends it to dashboard.
273func (jp *JobProcessor) Errorf(msg string, args ...interface{}) {
274	log.Logf(0, "job: "+msg, args...)
275	if jp.dash != nil {
276		jp.dash.LogError(jp.name, msg, args...)
277	}
278}
279