1// Copyright 2015/2016 syzkaller project authors. All rights reserved.
2// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
3
4package prog
5
6import (
7	"fmt"
8	"math/rand"
9	"sort"
10)
11
12// Calulation of call-to-call priorities.
13// For a given pair of calls X and Y, the priority is our guess as to whether
14// additional of call Y into a program containing call X is likely to give
15// new coverage or not.
16// The current algorithm has two components: static and dynamic.
17// The static component is based on analysis of argument types. For example,
18// if call X and call Y both accept fd[sock], then they are more likely to give
19// new coverage together.
20// The dynamic component is based on frequency of occurrence of a particular
21// pair of syscalls in a single program in corpus. For example, if socket and
22// connect frequently occur in programs together, we give higher priority to
23// this pair of syscalls.
24// Note: the current implementation is very basic, there is no theory behind any
25// constants.
26
27func (target *Target) CalculatePriorities(corpus []*Prog) [][]float32 {
28	static := target.calcStaticPriorities()
29	dynamic := target.calcDynamicPrio(corpus)
30	for i, prios := range static {
31		for j, p := range prios {
32			dynamic[i][j] *= p
33		}
34	}
35	return dynamic
36}
37
38func (target *Target) calcStaticPriorities() [][]float32 {
39	uses := target.calcResourceUsage()
40	prios := make([][]float32, len(target.Syscalls))
41	for i := range prios {
42		prios[i] = make([]float32, len(target.Syscalls))
43	}
44	for _, calls := range uses {
45		for c0, w0 := range calls {
46			for c1, w1 := range calls {
47				if c0 == c1 {
48					// Self-priority is assigned below.
49					continue
50				}
51				prios[c0][c1] += w0 * w1
52			}
53		}
54	}
55
56	// Self-priority (call wrt itself) is assigned to the maximum priority
57	// this call has wrt other calls. This way the priority is high, but not too high.
58	for c0, pp := range prios {
59		var max float32
60		for _, p := range pp {
61			if max < p {
62				max = p
63			}
64		}
65		pp[c0] = max
66	}
67	normalizePrio(prios)
68	return prios
69}
70
71func (target *Target) calcResourceUsage() map[string]map[int]float32 {
72	uses := make(map[string]map[int]float32)
73	for _, c := range target.Syscalls {
74		ForeachType(c, func(t Type) {
75			switch a := t.(type) {
76			case *ResourceType:
77				if a.Desc.Name == "pid" || a.Desc.Name == "uid" || a.Desc.Name == "gid" {
78					// Pid/uid/gid usually play auxiliary role,
79					// but massively happen in some structs.
80					noteUsage(uses, c, 0.1, "res%v", a.Desc.Name)
81				} else {
82					str := "res"
83					for i, k := range a.Desc.Kind {
84						str += "-" + k
85						w := 1.0
86						if i < len(a.Desc.Kind)-1 {
87							w = 0.2
88						}
89						noteUsage(uses, c, float32(w), str)
90					}
91				}
92			case *PtrType:
93				if _, ok := a.Type.(*StructType); ok {
94					noteUsage(uses, c, 1.0, "ptrto-%v", a.Type.Name())
95				}
96				if _, ok := a.Type.(*UnionType); ok {
97					noteUsage(uses, c, 1.0, "ptrto-%v", a.Type.Name())
98				}
99				if arr, ok := a.Type.(*ArrayType); ok {
100					noteUsage(uses, c, 1.0, "ptrto-%v", arr.Type.Name())
101				}
102			case *BufferType:
103				switch a.Kind {
104				case BufferBlobRand, BufferBlobRange, BufferText:
105				case BufferString:
106					if a.SubKind != "" {
107						noteUsage(uses, c, 0.2, fmt.Sprintf("str-%v", a.SubKind))
108					}
109				case BufferFilename:
110					noteUsage(uses, c, 1.0, "filename")
111				default:
112					panic("unknown buffer kind")
113				}
114			case *VmaType:
115				noteUsage(uses, c, 0.5, "vma")
116			case *IntType:
117				switch a.Kind {
118				case IntPlain, IntFileoff, IntRange:
119				default:
120					panic("unknown int kind")
121				}
122			}
123		})
124	}
125	return uses
126}
127
128func noteUsage(uses map[string]map[int]float32, c *Syscall, weight float32, str string, args ...interface{}) {
129	id := fmt.Sprintf(str, args...)
130	if uses[id] == nil {
131		uses[id] = make(map[int]float32)
132	}
133	old := uses[id][c.ID]
134	if weight > old {
135		uses[id][c.ID] = weight
136	}
137}
138
139func (target *Target) calcDynamicPrio(corpus []*Prog) [][]float32 {
140	prios := make([][]float32, len(target.Syscalls))
141	for i := range prios {
142		prios[i] = make([]float32, len(target.Syscalls))
143	}
144	for _, p := range corpus {
145		for _, c0 := range p.Calls {
146			for _, c1 := range p.Calls {
147				id0 := c0.Meta.ID
148				id1 := c1.Meta.ID
149				prios[id0][id1] += 1.0
150			}
151		}
152	}
153	normalizePrio(prios)
154	return prios
155}
156
157// normalizePrio assigns some minimal priorities to calls with zero priority,
158// and then normalizes priorities to 0.1..1 range.
159func normalizePrio(prios [][]float32) {
160	for _, prio := range prios {
161		max := float32(0)
162		min := float32(1e10)
163		nzero := 0
164		for _, p := range prio {
165			if max < p {
166				max = p
167			}
168			if p != 0 && min > p {
169				min = p
170			}
171			if p == 0 {
172				nzero++
173			}
174		}
175		if nzero != 0 {
176			min /= 2 * float32(nzero)
177		}
178		for i, p := range prio {
179			if max == 0 {
180				prio[i] = 1
181				continue
182			}
183			if p == 0 {
184				p = min
185			}
186			p = (p-min)/(max-min)*0.9 + 0.1
187			if p > 1 {
188				p = 1
189			}
190			prio[i] = p
191		}
192	}
193}
194
195// ChooseTable allows to do a weighted choice of a syscall for a given syscall
196// based on call-to-call priorities and a set of enabled syscalls.
197type ChoiceTable struct {
198	target       *Target
199	run          [][]int
200	enabledCalls []*Syscall
201	enabled      map[*Syscall]bool
202}
203
204func (target *Target) BuildChoiceTable(prios [][]float32, enabled map[*Syscall]bool) *ChoiceTable {
205	if enabled == nil {
206		enabled = make(map[*Syscall]bool)
207		for _, c := range target.Syscalls {
208			enabled[c] = true
209		}
210	}
211	var enabledCalls []*Syscall
212	for c := range enabled {
213		enabledCalls = append(enabledCalls, c)
214	}
215	run := make([][]int, len(target.Syscalls))
216	for i := range run {
217		if !enabled[target.Syscalls[i]] {
218			continue
219		}
220		run[i] = make([]int, len(target.Syscalls))
221		sum := 0
222		for j := range run[i] {
223			if enabled[target.Syscalls[j]] {
224				w := 1
225				if prios != nil {
226					w = int(prios[i][j] * 1000)
227				}
228				sum += w
229			}
230			run[i][j] = sum
231		}
232	}
233	return &ChoiceTable{target, run, enabledCalls, enabled}
234}
235
236func (ct *ChoiceTable) Choose(r *rand.Rand, call int) int {
237	if call < 0 {
238		return ct.enabledCalls[r.Intn(len(ct.enabledCalls))].ID
239	}
240	run := ct.run[call]
241	if run == nil {
242		return ct.enabledCalls[r.Intn(len(ct.enabledCalls))].ID
243	}
244	for {
245		x := r.Intn(run[len(run)-1]) + 1
246		i := sort.SearchInts(run, x)
247		if ct.enabled[ct.target.Syscalls[i]] {
248			return i
249		}
250	}
251}
252