1// Copyright 2017 syzkaller project authors. All rights reserved. 2// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4package main 5 6import ( 7 "fmt" 8 "io/ioutil" 9 "os" 10 "path/filepath" 11 "time" 12 13 "github.com/google/syzkaller/dashboard/dashapi" 14 "github.com/google/syzkaller/pkg/build" 15 "github.com/google/syzkaller/pkg/instance" 16 "github.com/google/syzkaller/pkg/log" 17 "github.com/google/syzkaller/pkg/mgrconfig" 18 "github.com/google/syzkaller/pkg/osutil" 19 "github.com/google/syzkaller/pkg/vcs" 20) 21 22type JobProcessor struct { 23 name string 24 managers []*Manager 25 dash *dashapi.Dashboard 26 syzkallerRepo string 27 syzkallerBranch string 28} 29 30func newJobProcessor(cfg *Config, managers []*Manager) *JobProcessor { 31 jp := &JobProcessor{ 32 name: fmt.Sprintf("%v-job", cfg.Name), 33 managers: managers, 34 syzkallerRepo: cfg.SyzkallerRepo, 35 syzkallerBranch: cfg.SyzkallerBranch, 36 } 37 if cfg.DashboardAddr != "" && cfg.DashboardClient != "" { 38 jp.dash = dashapi.New(cfg.DashboardClient, cfg.DashboardAddr, cfg.DashboardKey) 39 } 40 return jp 41} 42 43func (jp *JobProcessor) loop(stop chan struct{}) { 44 if jp.dash == nil { 45 return 46 } 47 ticker := time.NewTicker(time.Minute) 48 defer ticker.Stop() 49 for { 50 select { 51 case <-ticker.C: 52 jp.poll() 53 case <-stop: 54 log.Logf(0, "job loop stopped") 55 return 56 } 57 } 58} 59 60func (jp *JobProcessor) poll() { 61 var names []string 62 for _, mgr := range jp.managers { 63 names = append(names, mgr.name) 64 } 65 req, err := jp.dash.JobPoll(names) 66 if err != nil { 67 jp.Errorf("failed to poll jobs: %v", err) 68 return 69 } 70 if req.ID == "" { 71 return 72 } 73 var mgr *Manager 74 for _, m := range jp.managers { 75 if m.name == req.Manager { 76 mgr = m 77 break 78 } 79 } 80 if mgr == nil { 81 jp.Errorf("got job for unknown manager: %v", req.Manager) 82 return 83 } 84 job := &Job{ 85 req: req, 86 mgr: mgr, 87 } 88 log.Logf(0, "starting job %v for manager %v on %v/%v", 89 req.ID, req.Manager, req.KernelRepo, req.KernelBranch) 90 resp := jp.process(job) 91 log.Logf(0, "done job %v: commit %v, crash %q, error: %s", 92 resp.ID, resp.Build.KernelCommit, resp.CrashTitle, resp.Error) 93 if err := jp.dash.JobDone(resp); err != nil { 94 jp.Errorf("failed to mark job as done: %v", err) 95 return 96 } 97} 98 99type Job struct { 100 req *dashapi.JobPollResp 101 resp *dashapi.JobDoneReq 102 mgr *Manager 103} 104 105func (jp *JobProcessor) process(job *Job) *dashapi.JobDoneReq { 106 req, mgr := job.req, job.mgr 107 build := dashapi.Build{ 108 Manager: mgr.name, 109 ID: req.ID, 110 OS: mgr.managercfg.TargetOS, 111 Arch: mgr.managercfg.TargetArch, 112 VMArch: mgr.managercfg.TargetVMArch, 113 CompilerID: mgr.compilerID, 114 KernelRepo: req.KernelRepo, 115 KernelBranch: req.KernelBranch, 116 KernelCommit: "[unknown]", 117 SyzkallerCommit: "[unknown]", 118 } 119 job.resp = &dashapi.JobDoneReq{ 120 ID: req.ID, 121 Build: build, 122 } 123 required := []struct { 124 name string 125 ok bool 126 }{ 127 {"kernel repository", req.KernelRepo != ""}, 128 {"kernel branch", req.KernelBranch != ""}, 129 {"kernel config", len(req.KernelConfig) != 0}, 130 {"syzkaller commit", req.SyzkallerCommit != ""}, 131 {"reproducer options", len(req.ReproOpts) != 0}, 132 {"reproducer program", len(req.ReproSyz) != 0}, 133 } 134 for _, req := range required { 135 if !req.ok { 136 job.resp.Error = []byte(req.name + " is empty") 137 jp.Errorf("%s", job.resp.Error) 138 return job.resp 139 } 140 } 141 // TODO(dvyukov): this will only work for qemu/gce, 142 // because e.g. adb requires unique device IDs and we can't use what 143 // manager already uses. For qemu/gce this is also bad, because we 144 // override resource limits specified in config (e.g. can OOM), but works. 145 switch typ := mgr.managercfg.Type; typ { 146 case "gce", "qemu": 147 default: 148 job.resp.Error = []byte(fmt.Sprintf("testing is not yet supported for %v machine type.", typ)) 149 jp.Errorf("%s", job.resp.Error) 150 return job.resp 151 } 152 if err := jp.test(job); err != nil { 153 job.resp.Error = []byte(err.Error()) 154 } 155 return job.resp 156} 157 158func (jp *JobProcessor) test(job *Job) error { 159 kernelBuildSem <- struct{}{} 160 defer func() { <-kernelBuildSem }() 161 req, resp, mgr := job.req, job.resp, job.mgr 162 163 dir := osutil.Abs(filepath.Join("jobs", mgr.managercfg.TargetOS)) 164 kernelDir := filepath.Join(dir, "kernel") 165 166 mgrcfg := new(mgrconfig.Config) 167 *mgrcfg = *mgr.managercfg 168 mgrcfg.Name += "-job" 169 mgrcfg.Workdir = filepath.Join(dir, "workdir") 170 mgrcfg.KernelSrc = kernelDir 171 mgrcfg.Syzkaller = filepath.Join(dir, "gopath", "src", "github.com", "google", "syzkaller") 172 173 os.RemoveAll(mgrcfg.Workdir) 174 defer os.RemoveAll(mgrcfg.Workdir) 175 176 env, err := instance.NewEnv(mgrcfg) 177 if err != nil { 178 return err 179 } 180 log.Logf(0, "job: building syzkaller on %v...", req.SyzkallerCommit) 181 resp.Build.SyzkallerCommit = req.SyzkallerCommit 182 if err := env.BuildSyzkaller(jp.syzkallerRepo, req.SyzkallerCommit); err != nil { 183 return err 184 } 185 186 log.Logf(0, "job: fetching kernel...") 187 repo, err := vcs.NewRepo(mgrcfg.TargetOS, mgrcfg.Type, kernelDir) 188 if err != nil { 189 return fmt.Errorf("failed to create kernel repo: %v", err) 190 } 191 var kernelCommit *vcs.Commit 192 if vcs.CheckCommitHash(req.KernelBranch) { 193 kernelCommit, err = repo.CheckoutCommit(req.KernelRepo, req.KernelBranch) 194 if err != nil { 195 return fmt.Errorf("failed to checkout kernel repo %v on commit %v: %v", 196 req.KernelRepo, req.KernelBranch, err) 197 } 198 resp.Build.KernelBranch = "" 199 } else { 200 kernelCommit, err = repo.CheckoutBranch(req.KernelRepo, req.KernelBranch) 201 if err != nil { 202 return fmt.Errorf("failed to checkout kernel repo %v/%v: %v", 203 req.KernelRepo, req.KernelBranch, err) 204 } 205 } 206 resp.Build.KernelCommit = kernelCommit.Hash 207 resp.Build.KernelCommitTitle = kernelCommit.Title 208 resp.Build.KernelCommitDate = kernelCommit.Date 209 210 if err := build.Clean(mgrcfg.TargetOS, mgrcfg.TargetVMArch, mgrcfg.Type, kernelDir); err != nil { 211 return fmt.Errorf("kernel clean failed: %v", err) 212 } 213 if len(req.Patch) != 0 { 214 if err := vcs.Patch(kernelDir, req.Patch); err != nil { 215 return err 216 } 217 } 218 219 log.Logf(0, "job: building kernel...") 220 if err := env.BuildKernel(mgr.mgrcfg.Compiler, mgr.mgrcfg.Userspace, mgr.mgrcfg.KernelCmdline, 221 mgr.mgrcfg.KernelSysctl, req.KernelConfig); err != nil { 222 return err 223 } 224 resp.Build.KernelConfig, err = ioutil.ReadFile(filepath.Join(mgrcfg.KernelSrc, ".config")) 225 if err != nil { 226 return fmt.Errorf("failed to read config file: %v", err) 227 } 228 229 log.Logf(0, "job: testing...") 230 results, err := env.Test(3, req.ReproSyz, req.ReproOpts, req.ReproC) 231 if err != nil { 232 return err 233 } 234 // We can have transient errors and other errors of different types. 235 // We need to avoid reporting transient "failed to boot" or "failed to copy binary" errors. 236 // If any of the instances crash during testing, we report this with the highest priority. 237 // Then if any of the runs succeed, we report that (to avoid transient errors). 238 // If all instances failed to boot, then we report one of these errors. 239 anySuccess := false 240 var anyErr, testErr error 241 for _, res := range results { 242 if res == nil { 243 anySuccess = true 244 continue 245 } 246 anyErr = res 247 switch err := res.(type) { 248 case *instance.TestError: 249 // We should not put rep into resp.CrashTitle/CrashReport, 250 // because that will be treated as patch not fixing the bug. 251 if rep := err.Report; rep != nil { 252 testErr = fmt.Errorf("%v\n\n%s\n\n%s", rep.Title, rep.Report, rep.Output) 253 } else { 254 testErr = fmt.Errorf("%v\n\n%s", err.Title, err.Output) 255 } 256 case *instance.CrashError: 257 resp.CrashTitle = err.Report.Title 258 resp.CrashReport = err.Report.Report 259 resp.CrashLog = err.Report.Output 260 return nil 261 } 262 } 263 if anySuccess { 264 return nil 265 } 266 if testErr != nil { 267 return testErr 268 } 269 return anyErr 270} 271 272// Errorf logs non-fatal error and sends it to dashboard. 273func (jp *JobProcessor) Errorf(msg string, args ...interface{}) { 274 log.Logf(0, "job: "+msg, args...) 275 if jp.dash != nil { 276 jp.dash.LogError(jp.name, msg, args...) 277 } 278} 279