1// Copyright 2017 syzkaller project authors. All rights reserved.
2// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
3
4package main
5
6import (
7	"fmt"
8	"io/ioutil"
9	"os"
10	"path/filepath"
11	"time"
12
13	"github.com/google/syzkaller/dashboard/dashapi"
14	"github.com/google/syzkaller/pkg/build"
15	"github.com/google/syzkaller/pkg/config"
16	"github.com/google/syzkaller/pkg/hash"
17	"github.com/google/syzkaller/pkg/instance"
18	"github.com/google/syzkaller/pkg/log"
19	"github.com/google/syzkaller/pkg/mgrconfig"
20	"github.com/google/syzkaller/pkg/osutil"
21	"github.com/google/syzkaller/pkg/report"
22	"github.com/google/syzkaller/pkg/vcs"
23)
24
25// This is especially slightly longer than syzkaller rebuild period.
26// If we set kernelRebuildPeriod = syzkallerRebuildPeriod and both are changed
27// during that period (or around that period), we can rebuild kernel, restart
28// manager and then instantly shutdown everything for syzkaller update.
29// Instead we rebuild syzkaller, restart and then rebuild kernel.
30const kernelRebuildPeriod = syzkallerRebuildPeriod + time.Hour
31
32// List of required files in kernel build (contents of latest/current dirs).
33var imageFiles = map[string]bool{
34	"tag":                   true,  // serialized BuildInfo
35	"kernel.config":         false, // kernel config used for build
36	"image":                 true,  // kernel image
37	"kernel":                false,
38	"initrd":                false,
39	"key":                   false, // root ssh key for the image
40	"obj/vmlinux":           false, // Linux object file with debug info
41	"obj/zircon.elf":        false, // Zircon object file with debug info
42	"obj/akaros-kernel-64b": false, // Akaros object file with debug info
43}
44
45// Manager represents a single syz-manager instance.
46// Handles kernel polling, image rebuild and manager process management.
47// As syzkaller builder, it maintains 2 builds:
48//  - latest: latest known good kernel build
49//  - current: currently used kernel build
50type Manager struct {
51	name            string
52	workDir         string
53	kernelDir       string
54	currentDir      string
55	latestDir       string
56	compilerID      string
57	syzkallerCommit string
58	configTag       string
59	configData      []byte
60	cfg             *Config
61	repo            vcs.Repo
62	mgrcfg          *ManagerConfig
63	managercfg      *mgrconfig.Config
64	cmd             *ManagerCmd
65	dash            *dashapi.Dashboard
66	stop            chan struct{}
67}
68
69func createManager(cfg *Config, mgrcfg *ManagerConfig, stop chan struct{}) *Manager {
70	dir := osutil.Abs(filepath.Join("managers", mgrcfg.Name))
71	if err := osutil.MkdirAll(dir); err != nil {
72		log.Fatal(err)
73	}
74	if mgrcfg.RepoAlias == "" {
75		mgrcfg.RepoAlias = mgrcfg.Repo
76	}
77
78	var dash *dashapi.Dashboard
79	if cfg.DashboardAddr != "" && mgrcfg.DashboardClient != "" {
80		dash = dashapi.New(mgrcfg.DashboardClient, cfg.DashboardAddr, mgrcfg.DashboardKey)
81	}
82
83	// Assume compiler and config don't change underneath us.
84	compilerID, err := build.CompilerIdentity(mgrcfg.Compiler)
85	if err != nil {
86		log.Fatal(err)
87	}
88	var configData []byte
89	if mgrcfg.KernelConfig != "" {
90		if configData, err = ioutil.ReadFile(mgrcfg.KernelConfig); err != nil {
91			log.Fatal(err)
92		}
93	}
94	syzkallerCommit, _ := readTag(filepath.FromSlash("syzkaller/current/tag"))
95	if syzkallerCommit == "" {
96		log.Fatalf("no tag in syzkaller/current/tag")
97	}
98
99	// Prepare manager config skeleton (other fields are filled in writeConfig).
100	managercfg, err := mgrconfig.LoadPartialData(mgrcfg.ManagerConfig)
101	if err != nil {
102		log.Fatalf("failed to load manager %v config: %v", mgrcfg.Name, err)
103	}
104	managercfg.Name = cfg.Name + "-" + mgrcfg.Name
105	managercfg.Syzkaller = filepath.FromSlash("syzkaller/current")
106
107	kernelDir := filepath.Join(dir, "kernel")
108	repo, err := vcs.NewRepo(managercfg.TargetOS, managercfg.Type, kernelDir)
109	if err != nil {
110		log.Fatalf("failed to create repo for %v: %v", mgrcfg.Name, err)
111	}
112
113	mgr := &Manager{
114		name:            managercfg.Name,
115		workDir:         filepath.Join(dir, "workdir"),
116		kernelDir:       kernelDir,
117		currentDir:      filepath.Join(dir, "current"),
118		latestDir:       filepath.Join(dir, "latest"),
119		compilerID:      compilerID,
120		syzkallerCommit: syzkallerCommit,
121		configTag:       hash.String(configData),
122		configData:      configData,
123		cfg:             cfg,
124		repo:            repo,
125		mgrcfg:          mgrcfg,
126		managercfg:      managercfg,
127		dash:            dash,
128		stop:            stop,
129	}
130	os.RemoveAll(mgr.currentDir)
131	return mgr
132}
133
134// Gates kernel builds.
135// Kernel builds take whole machine, so we don't run more than one at a time.
136// Also current image build script uses some global resources (/dev/nbd0) and can't run in parallel.
137var kernelBuildSem = make(chan struct{}, 1)
138
139func (mgr *Manager) loop() {
140	lastCommit := ""
141	nextBuildTime := time.Now()
142	var managerRestartTime time.Time
143	latestInfo := mgr.checkLatest()
144	if latestInfo != nil && time.Since(latestInfo.Time) < kernelRebuildPeriod/2 {
145		// If we have a reasonably fresh build,
146		// start manager straight away and don't rebuild kernel for a while.
147		log.Logf(0, "%v: using latest image built on %v", mgr.name, latestInfo.KernelCommit)
148		managerRestartTime = latestInfo.Time
149		nextBuildTime = time.Now().Add(kernelRebuildPeriod)
150		mgr.restartManager()
151	} else if latestInfo != nil {
152		log.Logf(0, "%v: latest image is on %v", mgr.name, latestInfo.KernelCommit)
153	}
154
155	ticker := time.NewTicker(buildRetryPeriod)
156	defer ticker.Stop()
157
158loop:
159	for {
160		if time.Since(nextBuildTime) >= 0 {
161			rebuildAfter := buildRetryPeriod
162			commit, err := mgr.repo.Poll(mgr.mgrcfg.Repo, mgr.mgrcfg.Branch)
163			if err != nil {
164				mgr.Errorf("failed to poll: %v", err)
165			} else {
166				log.Logf(0, "%v: poll: %v", mgr.name, commit.Hash)
167				if commit.Hash != lastCommit &&
168					(latestInfo == nil ||
169						commit.Hash != latestInfo.KernelCommit ||
170						mgr.compilerID != latestInfo.CompilerID ||
171						mgr.configTag != latestInfo.KernelConfigTag) {
172					lastCommit = commit.Hash
173					select {
174					case kernelBuildSem <- struct{}{}:
175						log.Logf(0, "%v: building kernel...", mgr.name)
176						if err := mgr.build(commit); err != nil {
177							log.Logf(0, "%v: %v", mgr.name, err)
178						} else {
179							log.Logf(0, "%v: build successful, [re]starting manager", mgr.name)
180							rebuildAfter = kernelRebuildPeriod
181							latestInfo = mgr.checkLatest()
182							if latestInfo == nil {
183								mgr.Errorf("failed to read build info after build")
184							}
185						}
186						<-kernelBuildSem
187					case <-mgr.stop:
188						break loop
189					}
190				}
191			}
192			nextBuildTime = time.Now().Add(rebuildAfter)
193		}
194
195		select {
196		case <-mgr.stop:
197			break loop
198		default:
199		}
200
201		if latestInfo != nil && (latestInfo.Time != managerRestartTime || mgr.cmd == nil) {
202			managerRestartTime = latestInfo.Time
203			mgr.restartManager()
204		}
205
206		select {
207		case <-ticker.C:
208		case <-mgr.stop:
209			break loop
210		}
211	}
212
213	if mgr.cmd != nil {
214		mgr.cmd.Close()
215		mgr.cmd = nil
216	}
217	log.Logf(0, "%v: stopped", mgr.name)
218}
219
220// BuildInfo characterizes a kernel build.
221type BuildInfo struct {
222	Time              time.Time // when the build was done
223	Tag               string    // unique tag combined from compiler id, kernel commit and config tag
224	CompilerID        string    // compiler identity string (e.g. "gcc 7.1.1")
225	KernelRepo        string
226	KernelBranch      string
227	KernelCommit      string // git hash of kernel checkout
228	KernelCommitTitle string
229	KernelCommitDate  time.Time
230	KernelConfigTag   string // SHA1 hash of .config contents
231}
232
233func loadBuildInfo(dir string) (*BuildInfo, error) {
234	info := new(BuildInfo)
235	if err := config.LoadFile(filepath.Join(dir, "tag"), info); err != nil {
236		return nil, err
237	}
238	return info, nil
239}
240
241// checkLatest checks if we have a good working latest build and returns its build info.
242// If the build is missing/broken, nil is returned.
243func (mgr *Manager) checkLatest() *BuildInfo {
244	if !osutil.FilesExist(mgr.latestDir, imageFiles) {
245		return nil
246	}
247	info, _ := loadBuildInfo(mgr.latestDir)
248	return info
249}
250
251func (mgr *Manager) build(kernelCommit *vcs.Commit) error {
252	var tagData []byte
253	tagData = append(tagData, mgr.name...)
254	tagData = append(tagData, kernelCommit.Hash...)
255	tagData = append(tagData, mgr.compilerID...)
256	tagData = append(tagData, mgr.configTag...)
257	info := &BuildInfo{
258		Time:              time.Now(),
259		Tag:               hash.String(tagData),
260		CompilerID:        mgr.compilerID,
261		KernelRepo:        mgr.mgrcfg.Repo,
262		KernelBranch:      mgr.mgrcfg.Branch,
263		KernelCommit:      kernelCommit.Hash,
264		KernelCommitTitle: kernelCommit.Title,
265		KernelCommitDate:  kernelCommit.Date,
266		KernelConfigTag:   mgr.configTag,
267	}
268
269	// We first form the whole image in tmp dir and then rename it to latest.
270	tmpDir := mgr.latestDir + ".tmp"
271	if err := os.RemoveAll(tmpDir); err != nil {
272		return fmt.Errorf("failed to remove tmp dir: %v", err)
273	}
274	if err := osutil.MkdirAll(tmpDir); err != nil {
275		return fmt.Errorf("failed to create tmp dir: %v", err)
276	}
277	if err := config.SaveFile(filepath.Join(tmpDir, "tag"), info); err != nil {
278		return fmt.Errorf("failed to write tag file: %v", err)
279	}
280	if err := build.Image(mgr.managercfg.TargetOS, mgr.managercfg.TargetVMArch, mgr.managercfg.Type,
281		mgr.kernelDir, tmpDir, mgr.mgrcfg.Compiler, mgr.mgrcfg.Userspace,
282		mgr.mgrcfg.KernelCmdline, mgr.mgrcfg.KernelSysctl, mgr.configData); err != nil {
283		if _, ok := err.(build.KernelBuildError); ok {
284			rep := &report.Report{
285				Title:  fmt.Sprintf("%v build error", mgr.mgrcfg.RepoAlias),
286				Output: []byte(err.Error()),
287			}
288			if err := mgr.reportBuildError(rep, info, tmpDir); err != nil {
289				mgr.Errorf("failed to report image error: %v", err)
290			}
291		}
292		return fmt.Errorf("kernel build failed: %v", err)
293	}
294
295	if err := mgr.testImage(tmpDir, info); err != nil {
296		return err
297	}
298
299	// Now try to replace latest with our tmp dir as atomically as we can get on Linux.
300	if err := os.RemoveAll(mgr.latestDir); err != nil {
301		return fmt.Errorf("failed to remove latest dir: %v", err)
302	}
303	return os.Rename(tmpDir, mgr.latestDir)
304}
305
306func (mgr *Manager) restartManager() {
307	if !osutil.FilesExist(mgr.latestDir, imageFiles) {
308		mgr.Errorf("can't start manager, image files missing")
309		return
310	}
311	if mgr.cmd != nil {
312		mgr.cmd.Close()
313		mgr.cmd = nil
314	}
315	if err := osutil.LinkFiles(mgr.latestDir, mgr.currentDir, imageFiles); err != nil {
316		mgr.Errorf("failed to create current image dir: %v", err)
317		return
318	}
319	info, err := loadBuildInfo(mgr.currentDir)
320	if err != nil {
321		mgr.Errorf("failed to load build info: %v", err)
322		return
323	}
324	buildTag, err := mgr.uploadBuild(info, mgr.currentDir)
325	if err != nil {
326		mgr.Errorf("failed to upload build: %v", err)
327		return
328	}
329	cfgFile, err := mgr.writeConfig(buildTag)
330	if err != nil {
331		mgr.Errorf("failed to create manager config: %v", err)
332		return
333	}
334	bin := filepath.FromSlash("syzkaller/current/bin/syz-manager")
335	logFile := filepath.Join(mgr.currentDir, "manager.log")
336	mgr.cmd = NewManagerCmd(mgr.name, logFile, mgr.Errorf, bin, "-config", cfgFile)
337}
338
339func (mgr *Manager) testImage(imageDir string, info *BuildInfo) error {
340	log.Logf(0, "%v: testing image...", mgr.name)
341	mgrcfg, err := mgr.createTestConfig(imageDir, info)
342	if err != nil {
343		return fmt.Errorf("failed to create manager config: %v", err)
344	}
345	defer os.RemoveAll(mgrcfg.Workdir)
346	switch typ := mgrcfg.Type; typ {
347	case "gce", "qemu", "gvisor":
348	default:
349		// Other types don't support creating machines out of thin air.
350		return nil
351	}
352	env, err := instance.NewEnv(mgrcfg)
353	if err != nil {
354		return err
355	}
356	const (
357		testVMs     = 3
358		maxFailures = 1
359	)
360	results, err := env.Test(testVMs, nil, nil, nil)
361	if err != nil {
362		return err
363	}
364	failures := 0
365	var failureErr error
366	for _, res := range results {
367		if res == nil {
368			continue
369		}
370		failures++
371		switch err := res.(type) {
372		case *instance.TestError:
373			if rep := err.Report; rep != nil {
374				rep.Report = append([]byte(rep.Title), rep.Report...)
375				if err.Boot {
376					rep.Title = fmt.Sprintf("%v boot error", mgr.mgrcfg.RepoAlias)
377				} else {
378					rep.Title = fmt.Sprintf("%v test error", mgr.mgrcfg.RepoAlias)
379				}
380				if err := mgr.reportBuildError(rep, info, imageDir); err != nil {
381					mgr.Errorf("failed to report image error: %v", err)
382				}
383			}
384			if err.Boot {
385				failureErr = fmt.Errorf("VM boot failed with: %v", err)
386			} else {
387				failureErr = fmt.Errorf("VM testing failed with: %v", err)
388			}
389		default:
390			failureErr = res
391		}
392	}
393	if failures > maxFailures {
394		return failureErr
395	}
396	return nil
397}
398
399func (mgr *Manager) reportBuildError(rep *report.Report, info *BuildInfo, imageDir string) error {
400	if mgr.dash == nil {
401		log.Logf(0, "%v: image testing failed: %v\n\n%s\n\n%s\n",
402			mgr.name, rep.Title, rep.Report, rep.Output)
403		return nil
404	}
405	build, err := mgr.createDashboardBuild(info, imageDir, "error")
406	if err != nil {
407		return err
408	}
409	req := &dashapi.BuildErrorReq{
410		Build: *build,
411		Crash: dashapi.Crash{
412			Title:       rep.Title,
413			Corrupted:   false, // Otherwise they get merged with other corrupted reports.
414			Maintainers: rep.Maintainers,
415			Log:         rep.Output,
416			Report:      rep.Report,
417		},
418	}
419	return mgr.dash.ReportBuildError(req)
420}
421
422func (mgr *Manager) createTestConfig(imageDir string, info *BuildInfo) (*mgrconfig.Config, error) {
423	mgrcfg := new(mgrconfig.Config)
424	*mgrcfg = *mgr.managercfg
425	mgrcfg.Name += "-test"
426	mgrcfg.Tag = info.KernelCommit
427	mgrcfg.Workdir = filepath.Join(imageDir, "workdir")
428	if err := instance.SetConfigImage(mgrcfg, imageDir); err != nil {
429		return nil, err
430	}
431	mgrcfg.KernelSrc = mgr.kernelDir
432	if err := mgrconfig.Complete(mgrcfg); err != nil {
433		return nil, fmt.Errorf("bad manager config: %v", err)
434	}
435	return mgrcfg, nil
436}
437
438func (mgr *Manager) writeConfig(buildTag string) (string, error) {
439	mgrcfg := new(mgrconfig.Config)
440	*mgrcfg = *mgr.managercfg
441
442	if mgr.dash != nil {
443		mgrcfg.DashboardClient = mgr.dash.Client
444		mgrcfg.DashboardAddr = mgr.dash.Addr
445		mgrcfg.DashboardKey = mgr.dash.Key
446	}
447	if mgr.cfg.HubAddr != "" {
448		mgrcfg.HubClient = mgr.cfg.Name
449		mgrcfg.HubAddr = mgr.cfg.HubAddr
450		mgrcfg.HubKey = mgr.cfg.HubKey
451	}
452	mgrcfg.Tag = buildTag
453	mgrcfg.Workdir = mgr.workDir
454	if err := instance.SetConfigImage(mgrcfg, mgr.currentDir); err != nil {
455		return "", err
456	}
457	// Strictly saying this is somewhat racy as builder can concurrently
458	// update the source, or even delete and re-clone. If this causes
459	// problems, we need to make a copy of sources after build.
460	mgrcfg.KernelSrc = mgr.kernelDir
461	if err := mgrconfig.Complete(mgrcfg); err != nil {
462		return "", fmt.Errorf("bad manager config: %v", err)
463	}
464	configFile := filepath.Join(mgr.currentDir, "manager.cfg")
465	if err := config.SaveFile(configFile, mgrcfg); err != nil {
466		return "", err
467	}
468	return configFile, nil
469}
470
471func (mgr *Manager) uploadBuild(info *BuildInfo, imageDir string) (string, error) {
472	if mgr.dash == nil {
473		// Dashboard identifies builds by unique tags that are combined
474		// from kernel tag, compiler tag and config tag.
475		// This combined tag is meaningless without dashboard,
476		// so we use kenrel tag (commit tag) because it communicates
477		// at least some useful information.
478		return info.KernelCommit, nil
479	}
480
481	build, err := mgr.createDashboardBuild(info, imageDir, "normal")
482	if err != nil {
483		return "", err
484	}
485	commitTitles, fixCommits, err := mgr.pollCommits(info.KernelCommit)
486	if err != nil {
487		// This is not critical for operation.
488		mgr.Errorf("failed to poll commits: %v", err)
489	}
490	build.Commits = commitTitles
491	build.FixCommits = fixCommits
492	if err := mgr.dash.UploadBuild(build); err != nil {
493		return "", err
494	}
495	return build.ID, nil
496}
497
498func (mgr *Manager) createDashboardBuild(info *BuildInfo, imageDir, typ string) (*dashapi.Build, error) {
499	var kernelConfig []byte
500	if kernelConfigFile := filepath.Join(imageDir, "kernel.config"); osutil.IsExist(kernelConfigFile) {
501		var err error
502		if kernelConfig, err = ioutil.ReadFile(kernelConfigFile); err != nil {
503			return nil, fmt.Errorf("failed to read kernel.config: %v", err)
504		}
505	}
506	// Resulting build depends on both kernel build tag and syzkaller commmit.
507	// Also mix in build type, so that image error builds are not merged into normal builds.
508	var tagData []byte
509	tagData = append(tagData, info.Tag...)
510	tagData = append(tagData, mgr.syzkallerCommit...)
511	tagData = append(tagData, typ...)
512	build := &dashapi.Build{
513		Manager:           mgr.name,
514		ID:                hash.String(tagData),
515		OS:                mgr.managercfg.TargetOS,
516		Arch:              mgr.managercfg.TargetArch,
517		VMArch:            mgr.managercfg.TargetVMArch,
518		SyzkallerCommit:   mgr.syzkallerCommit,
519		CompilerID:        info.CompilerID,
520		KernelRepo:        info.KernelRepo,
521		KernelBranch:      info.KernelBranch,
522		KernelCommit:      info.KernelCommit,
523		KernelCommitTitle: info.KernelCommitTitle,
524		KernelCommitDate:  info.KernelCommitDate,
525		KernelConfig:      kernelConfig,
526	}
527	return build, nil
528}
529
530// pollCommits asks dashboard what commits it is interested in (i.e. fixes for
531// open bugs) and returns subset of these commits that are present in a build
532// on commit buildCommit.
533func (mgr *Manager) pollCommits(buildCommit string) ([]string, []dashapi.FixCommit, error) {
534	resp, err := mgr.dash.BuilderPoll(mgr.name)
535	if err != nil || len(resp.PendingCommits) == 0 && resp.ReportEmail == "" {
536		return nil, nil, err
537	}
538	var present []string
539	if len(resp.PendingCommits) != 0 {
540		commits, err := mgr.repo.ListRecentCommits(buildCommit)
541		if err != nil {
542			return nil, nil, err
543		}
544		m := make(map[string]bool, len(commits))
545		for _, com := range commits {
546			m[vcs.CanonicalizeCommit(com)] = true
547		}
548		for _, com := range resp.PendingCommits {
549			if m[vcs.CanonicalizeCommit(com)] {
550				present = append(present, com)
551			}
552		}
553	}
554	var fixCommits []dashapi.FixCommit
555	if resp.ReportEmail != "" {
556		// TODO(dvyukov): mmots contains weird squashed commits titled "linux-next" or "origin",
557		// which contain hundreds of other commits. This makes fix attribution totally broken.
558		if mgr.mgrcfg.Repo != "git://git.cmpxchg.org/linux-mmots.git" {
559			commits, err := mgr.repo.ExtractFixTagsFromCommits(buildCommit, resp.ReportEmail)
560			if err != nil {
561				return nil, nil, err
562			}
563			for _, com := range commits {
564				fixCommits = append(fixCommits, dashapi.FixCommit{
565					Title: com.Title,
566					BugID: com.Tag,
567				})
568			}
569		}
570	}
571	return present, fixCommits, nil
572}
573
574// Errorf logs non-fatal error and sends it to dashboard.
575func (mgr *Manager) Errorf(msg string, args ...interface{}) {
576	log.Logf(0, mgr.name+": "+msg, args...)
577	if mgr.dash != nil {
578		mgr.dash.LogError(mgr.name, msg, args...)
579	}
580}
581