1// Copyright 2016 syzkaller project authors. All rights reserved.
2// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
3
4// Package report contains functions that process kernel output,
5// detect/extract crash messages, symbolize them, etc.
6package report
7
8import (
9	"bufio"
10	"bytes"
11	"fmt"
12	"regexp"
13	"strings"
14
15	"github.com/google/syzkaller/pkg/mgrconfig"
16)
17
18type Reporter interface {
19	// ContainsCrash searches kernel console output for oops messages.
20	ContainsCrash(output []byte) bool
21
22	// Parse extracts information about oops from console output.
23	// Returns nil if no oops found.
24	Parse(output []byte) *Report
25
26	// Symbolize symbolizes rep.Report and fills in Maintainers.
27	Symbolize(rep *Report) error
28}
29
30type Report struct {
31	// Title contains a representative description of the first oops.
32	Title string
33	// Report contains whole oops text.
34	Report []byte
35	// Output contains whole raw console output as passed to Reporter.Parse.
36	Output []byte
37	// StartPos/EndPos denote region of output with oops message(s).
38	StartPos int
39	EndPos   int
40	// Suppressed indicates whether the report should not be reported to user.
41	Suppressed bool
42	// Corrupted indicates whether the report is truncated of corrupted in some other way.
43	Corrupted bool
44	// CorruptedReason contains reason why the report is marked as corrupted.
45	CorruptedReason string
46	// Maintainers is list of maintainer emails.
47	Maintainers []string
48}
49
50// NewReporter creates reporter for the specified OS/Type.
51func NewReporter(cfg *mgrconfig.Config) (Reporter, error) {
52	typ := cfg.TargetOS
53	if cfg.Type == "gvisor" {
54		typ = cfg.Type
55	}
56	ctor := ctors[typ]
57	if ctor == nil {
58		return nil, fmt.Errorf("unknown OS: %v", typ)
59	}
60	ignores, err := compileRegexps(cfg.Ignores)
61	if err != nil {
62		return nil, err
63	}
64	rep, suppressions, err := ctor(cfg.KernelSrc, cfg.KernelObj, ignores)
65	if err != nil {
66		return nil, err
67	}
68	supps, err := compileRegexps(append(suppressions, cfg.Suppressions...))
69	if err != nil {
70		return nil, err
71	}
72	return &reporterWrapper{rep, supps}, nil
73}
74
75var ctors = map[string]fn{
76	"akaros":  ctorAkaros,
77	"linux":   ctorLinux,
78	"gvisor":  ctorGvisor,
79	"freebsd": ctorFreebsd,
80	"netbsd":  ctorNetbsd,
81	"fuchsia": ctorFuchsia,
82	"windows": ctorStub,
83}
84
85type fn func(string, string, []*regexp.Regexp) (Reporter, []string, error)
86
87func compileRegexps(list []string) ([]*regexp.Regexp, error) {
88	compiled := make([]*regexp.Regexp, len(list))
89	for i, str := range list {
90		re, err := regexp.Compile(str)
91		if err != nil {
92			return nil, fmt.Errorf("failed to compile %q: %v", str, err)
93		}
94		compiled[i] = re
95	}
96	return compiled, nil
97}
98
99type reporterWrapper struct {
100	Reporter
101	suppressions []*regexp.Regexp
102}
103
104func (wrap *reporterWrapper) Parse(output []byte) *Report {
105	rep := wrap.Reporter.Parse(output)
106	if rep == nil {
107		return nil
108	}
109	rep.Title = sanitizeTitle(replaceTable(dynamicTitleReplacement, rep.Title))
110	rep.Suppressed = matchesAny(rep.Output, wrap.suppressions)
111	return rep
112}
113
114func IsSuppressed(reporter Reporter, output []byte) bool {
115	return matchesAny(output, reporter.(*reporterWrapper).suppressions)
116}
117
118type replacement struct {
119	match       *regexp.Regexp
120	replacement string
121}
122
123func replaceTable(replacements []replacement, str string) string {
124	for _, repl := range replacements {
125		str = repl.match.ReplaceAllString(str, repl.replacement)
126	}
127	return str
128}
129
130var dynamicTitleReplacement = []replacement{
131	{
132		// Executor PIDs are not interesting.
133		regexp.MustCompile(`syz-executor[0-9]+((/|:)[0-9]+)?`),
134		"syz-executor",
135	},
136	{
137		// syzkaller binaries are coming from repro.
138		regexp.MustCompile(`syzkaller[0-9]+((/|:)[0-9]+)?`),
139		"syzkaller",
140	},
141	{
142		// Replace that everything looks like an address with "ADDR",
143		// addresses in descriptions can't be good regardless of the oops regexps.
144		regexp.MustCompile(`([^a-zA-Z])(?:0x)?[0-9a-f]{6,}`),
145		"${1}ADDR",
146	},
147	{
148		// Replace that everything looks like a decimal number with "NUM".
149		regexp.MustCompile(`([^a-zA-Z])[0-9]{5,}`),
150		"${1}NUM",
151	},
152	{
153		// Replace that everything looks like a file line number with "LINE".
154		regexp.MustCompile(`(:[0-9]+)+`),
155		":LINE",
156	},
157	{
158		// Replace all raw references to runctions (e.g. "ip6_fragment+0x1052/0x2d80")
159		// with just function name ("ip6_fragment"). Offsets and sizes are not stable.
160		regexp.MustCompile(`([a-zA-Z][a-zA-Z0-9_.]+)\+0x[0-9a-z]+/0x[0-9a-z]+`),
161		"${1}",
162	},
163	{
164		// CPU numbers are not interesting.
165		regexp.MustCompile(`CPU#[0-9]+`),
166		"CPU",
167	},
168}
169
170func sanitizeTitle(title string) string {
171	const maxTitleLen = 120 // Corrupted/intermixed lines can be very long.
172	res := make([]byte, 0, len(title))
173	prev := byte(' ')
174	for i := 0; i < len(title) && i < maxTitleLen; i++ {
175		ch := title[i]
176		switch {
177		case ch == '\t':
178			ch = ' '
179		case ch < 0x20 || ch >= 0x7f:
180			continue
181		}
182		if ch == ' ' && prev == ' ' {
183			continue
184		}
185		res = append(res, ch)
186		prev = ch
187	}
188	return strings.TrimSpace(string(res))
189}
190
191type guilter interface {
192	extractGuiltyFile([]byte) string
193}
194
195func (wrap reporterWrapper) extractGuiltyFile(report []byte) string {
196	if g, ok := wrap.Reporter.(guilter); ok {
197		return g.extractGuiltyFile(report)
198	}
199	panic("not implemented")
200}
201
202type oops struct {
203	header       []byte
204	formats      []oopsFormat
205	suppressions []*regexp.Regexp
206}
207
208type oopsFormat struct {
209	title *regexp.Regexp
210	// If title is matched but report is not, the report is considered corrupted.
211	report *regexp.Regexp
212	// Format string to create report title.
213	// Strings captured by title (or by report if present) are passed as input.
214	// If stack is not nil, extracted function name is passed as an additional last argument.
215	fmt string
216	// If not nil, a function name is extracted from the report and passed to fmt.
217	// If not nil but frame extraction fails, the report is considered corrupted.
218	stack        *stackFmt
219	noStackTrace bool
220	corrupted    bool
221}
222
223type stackFmt struct {
224	// parts describe how guilty stack frame must be extracted from the report.
225	// parts are matched consecutively potentially capturing frames.
226	// parts can be of 3 types:
227	//  - non-capturing regexp, matched against report and advances current position
228	//  - capturing regexp, same as above, but also yields a frame
229	//  - special value parseStackTrace means that a stack trace must be parsed
230	//    starting from current position
231	parts []*regexp.Regexp
232	// If parts2 is present it is tried when parts matching fails.
233	parts2 []*regexp.Regexp
234	// Skip these functions in stack traces (matched as substring).
235	skip []string
236}
237
238var parseStackTrace *regexp.Regexp
239
240func compile(re string) *regexp.Regexp {
241	re = strings.Replace(re, "{{ADDR}}", "0x[0-9a-f]+", -1)
242	re = strings.Replace(re, "{{PC}}", "\\[\\<(?:0x)?[0-9a-f]+\\>\\]", -1)
243	re = strings.Replace(re, "{{FUNC}}", "([a-zA-Z0-9_]+)(?:\\.|\\+)", -1)
244	re = strings.Replace(re, "{{SRC}}", "([a-zA-Z0-9-_/.]+\\.[a-z]+:[0-9]+)", -1)
245	return regexp.MustCompile(re)
246}
247
248func containsCrash(output []byte, oopses []*oops, ignores []*regexp.Regexp) bool {
249	for pos := 0; pos < len(output); {
250		next := bytes.IndexByte(output[pos:], '\n')
251		if next != -1 {
252			next += pos
253		} else {
254			next = len(output)
255		}
256		for _, oops := range oopses {
257			match := matchOops(output[pos:next], oops, ignores)
258			if match == -1 {
259				continue
260			}
261			return true
262		}
263		pos = next + 1
264	}
265	return false
266}
267
268func matchOops(line []byte, oops *oops, ignores []*regexp.Regexp) int {
269	match := bytes.Index(line, oops.header)
270	if match == -1 {
271		return -1
272	}
273	if matchesAny(line, oops.suppressions) {
274		return -1
275	}
276	if matchesAny(line, ignores) {
277		return -1
278	}
279	return match
280}
281
282func extractDescription(output []byte, oops *oops, params *stackParams) (
283	desc string, corrupted string, format oopsFormat) {
284	startPos := len(output)
285	matchedTitle := false
286	for _, f := range oops.formats {
287		match := f.title.FindSubmatchIndex(output)
288		if match == nil || match[0] > startPos {
289			continue
290		}
291		if match[0] == startPos && desc != "" {
292			continue
293		}
294		if match[0] < startPos {
295			desc = ""
296			format = oopsFormat{}
297			startPos = match[0]
298		}
299		matchedTitle = true
300		if f.report != nil {
301			match = f.report.FindSubmatchIndex(output)
302			if match == nil {
303				continue
304			}
305		}
306		var args []interface{}
307		for i := 2; i < len(match); i += 2 {
308			args = append(args, string(output[match[i]:match[i+1]]))
309		}
310		corrupted = ""
311		if f.stack != nil {
312			frame := ""
313			frame, corrupted = extractStackFrame(params, f.stack, output[match[0]:])
314			if frame == "" {
315				frame = "corrupted"
316				if corrupted == "" {
317					corrupted = "extracted no stack frame"
318				}
319			}
320			args = append(args, frame)
321		}
322		desc = fmt.Sprintf(f.fmt, args...)
323		format = f
324	}
325	if len(desc) == 0 {
326		// If we are here and matchedTitle is set, it means that we've matched
327		// a title of an oops but not full report regexp or stack trace,
328		// which means the report was corrupted.
329		if matchedTitle {
330			corrupted = "matched title but not report regexp"
331		}
332		pos := bytes.Index(output, oops.header)
333		if pos == -1 {
334			return
335		}
336		end := bytes.IndexByte(output[pos:], '\n')
337		if end == -1 {
338			end = len(output)
339		} else {
340			end += pos
341		}
342		desc = string(output[pos:end])
343	}
344	if corrupted == "" && format.corrupted {
345		corrupted = "report format is marked as corrupted"
346	}
347	return
348}
349
350type stackParams struct {
351	// stackStartRes matches start of stack traces.
352	stackStartRes []*regexp.Regexp
353	// frameRes match different formats of lines containing kernel frames (capture function name).
354	frameRes []*regexp.Regexp
355	// skipPatterns match functions that must be unconditionally skipped.
356	skipPatterns []string
357	// If we looked at any lines that match corruptedLines during report analysis,
358	// then the report is marked as corrupted.
359	corruptedLines []*regexp.Regexp
360}
361
362func extractStackFrame(params *stackParams, stack *stackFmt, output []byte) (string, string) {
363	skip := append([]string{}, params.skipPatterns...)
364	skip = append(skip, stack.skip...)
365	var skipRe *regexp.Regexp
366	if len(skip) != 0 {
367		skipRe = regexp.MustCompile(strings.Join(skip, "|"))
368	}
369	frame, corrupted := extractStackFrameImpl(params, output, skipRe, stack.parts)
370	if frame != "" || len(stack.parts2) == 0 {
371		return frame, corrupted
372	}
373	return extractStackFrameImpl(params, output, skipRe, stack.parts2)
374}
375
376func extractStackFrameImpl(params *stackParams, output []byte, skipRe *regexp.Regexp,
377	parts []*regexp.Regexp) (string, string) {
378	corrupted := ""
379	s := bufio.NewScanner(bytes.NewReader(output))
380nextPart:
381	for _, part := range parts {
382		if part == parseStackTrace {
383			for s.Scan() {
384				ln := bytes.Trim(s.Bytes(), "\r")
385				if corrupted == "" && matchesAny(ln, params.corruptedLines) {
386					corrupted = "corrupted line in report (1)"
387				}
388				if matchesAny(ln, params.stackStartRes) {
389					continue nextPart
390				}
391				var match []int
392				for _, re := range params.frameRes {
393					match = re.FindSubmatchIndex(ln)
394					if match != nil {
395						break
396					}
397				}
398				if match == nil {
399					continue
400				}
401				frame := ln[match[2]:match[3]]
402				if skipRe == nil || !skipRe.Match(frame) {
403					return string(frame), corrupted
404				}
405			}
406		} else {
407			for s.Scan() {
408				ln := bytes.Trim(s.Bytes(), "\r")
409				if corrupted == "" && matchesAny(ln, params.corruptedLines) {
410					corrupted = "corrupted line in report (2)"
411				}
412				match := part.FindSubmatchIndex(ln)
413				if match == nil {
414					continue
415				}
416				if len(match) == 4 && match[2] != -1 {
417					frame := ln[match[2]:match[3]]
418					if skipRe == nil || !skipRe.Match(frame) {
419						return string(frame), corrupted
420					}
421				}
422				break
423			}
424		}
425	}
426	return "", corrupted
427}
428
429func simpleLineParser(output []byte, oopses []*oops, params *stackParams, ignores []*regexp.Regexp) *Report {
430	rep := &Report{
431		Output: output,
432	}
433	var oops *oops
434	for pos := 0; pos < len(output); {
435		next := bytes.IndexByte(output[pos:], '\n')
436		if next != -1 {
437			next += pos
438		} else {
439			next = len(output)
440		}
441		line := output[pos:next]
442		for _, oops1 := range oopses {
443			match := matchOops(line, oops1, ignores)
444			if match != -1 {
445				oops = oops1
446				rep.StartPos = pos
447				break
448			}
449		}
450		if oops != nil {
451			break
452		}
453		pos = next + 1
454	}
455	if oops == nil {
456		return nil
457	}
458	title, corrupted, _ := extractDescription(output[rep.StartPos:], oops, params)
459	rep.Title = title
460	rep.Report = output[rep.StartPos:]
461	rep.Corrupted = corrupted != ""
462	rep.CorruptedReason = corrupted
463	return rep
464}
465
466func matchesAny(line []byte, res []*regexp.Regexp) bool {
467	for _, re := range res {
468		if re.Match(line) {
469			return true
470		}
471	}
472	return false
473}
474
475// replace replaces [start:end] in where with what, inplace.
476func replace(where []byte, start, end int, what []byte) []byte {
477	if len(what) >= end-start {
478		where = append(where, what[end-start:]...)
479		copy(where[start+len(what):], where[end:])
480		copy(where[start:], what)
481	} else {
482		copy(where[start+len(what):], where[end:])
483		where = where[:len(where)-(end-start-len(what))]
484		copy(where[start:], what)
485	}
486	return where
487}
488
489var (
490	filenameRe = regexp.MustCompile(`[a-zA-Z0-9_\-\./]*[a-zA-Z0-9_\-]+\.(c|h):[0-9]+`)
491)
492