1// Copyright 2017 syzkaller project authors. All rights reserved.
2// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
3
4package ast
5
6import (
7	"fmt"
8	"os"
9	"strconv"
10)
11
12type token int
13
14const (
15	tokIllegal token = iota
16	tokComment
17	tokIdent
18	tokInclude
19	tokIncdir
20	tokDefine
21	tokResource
22	tokString
23	tokCExpr
24	tokInt
25
26	tokNewLine
27	tokLParen
28	tokRParen
29	tokLBrack
30	tokRBrack
31	tokLBrace
32	tokRBrace
33	tokEq
34	tokComma
35	tokColon
36
37	tokEOF
38)
39
40var punctuation = [256]token{
41	'\n': tokNewLine,
42	'(':  tokLParen,
43	')':  tokRParen,
44	'[':  tokLBrack,
45	']':  tokRBrack,
46	'{':  tokLBrace,
47	'}':  tokRBrace,
48	'=':  tokEq,
49	',':  tokComma,
50	':':  tokColon,
51}
52
53var tok2str = [...]string{
54	tokIllegal:  "ILLEGAL",
55	tokComment:  "comment",
56	tokIdent:    "identifier",
57	tokInclude:  "include",
58	tokIncdir:   "incdir",
59	tokDefine:   "define",
60	tokResource: "resource",
61	tokString:   "string",
62	tokCExpr:    "CEXPR",
63	tokInt:      "int",
64	tokNewLine:  "NEWLINE",
65	tokEOF:      "EOF",
66}
67
68func init() {
69	for ch, tok := range punctuation {
70		if tok == tokIllegal {
71			continue
72		}
73		tok2str[tok] = fmt.Sprintf("%q", ch)
74	}
75}
76
77var keywords = map[string]token{
78	"include":  tokInclude,
79	"incdir":   tokIncdir,
80	"define":   tokDefine,
81	"resource": tokResource,
82}
83
84func (tok token) String() string {
85	return tok2str[tok]
86}
87
88type scanner struct {
89	data         []byte
90	filename     string
91	errorHandler ErrorHandler
92
93	ch   byte
94	off  int
95	line int
96	col  int
97
98	prev1 token
99	prev2 token
100
101	errors int
102}
103
104func newScanner(data []byte, filename string, errorHandler ErrorHandler) *scanner {
105	if errorHandler == nil {
106		errorHandler = LoggingHandler
107	}
108	s := &scanner{
109		data:         data,
110		filename:     filename,
111		errorHandler: errorHandler,
112		off:          -1,
113	}
114	s.next()
115	return s
116}
117
118type ErrorHandler func(pos Pos, msg string)
119
120func LoggingHandler(pos Pos, msg string) {
121	fmt.Fprintf(os.Stderr, "%v: %v\n", pos, msg)
122}
123
124func (pos Pos) String() string {
125	return fmt.Sprintf("%v:%v:%v", pos.File, pos.Line, pos.Col)
126}
127
128func (s *scanner) Scan() (tok token, lit string, pos Pos) {
129	s.skipWhitespace()
130	pos = s.pos()
131	switch {
132	case s.ch == 0:
133		tok = tokEOF
134		s.next()
135	case s.ch == '`':
136		tok = tokCExpr
137		lit = s.scanCExpr(pos)
138	case s.prev2 == tokDefine && s.prev1 == tokIdent:
139		// Note: the old form for C expressions, not really lexable.
140		// TODO(dvyukov): get rid of this eventually.
141		tok = tokCExpr
142		for ; s.ch != '\n'; s.next() {
143		}
144		lit = string(s.data[pos.Off:s.off])
145	case s.ch == '#':
146		tok = tokComment
147		for s.next(); s.ch != '\n'; s.next() {
148		}
149		lit = string(s.data[pos.Off+1 : s.off])
150	case s.ch == '"' || s.ch == '<':
151		tok = tokString
152		lit = s.scanStr(pos)
153	case s.ch >= '0' && s.ch <= '9' || s.ch == '-':
154		tok = tokInt
155		lit = s.scanInt(pos)
156	case s.ch == '\'':
157		tok = tokInt
158		lit = s.scanChar(pos)
159	case s.ch == '_' || s.ch >= 'a' && s.ch <= 'z' || s.ch >= 'A' && s.ch <= 'Z':
160		tok, lit = s.scanIdent(pos)
161	default:
162		tok = punctuation[s.ch]
163		if tok == tokIllegal {
164			s.Error(pos, "illegal character %#U", s.ch)
165		}
166		s.next()
167	}
168	s.prev2 = s.prev1
169	s.prev1 = tok
170	return
171}
172
173func (s *scanner) scanCExpr(pos Pos) string {
174	for s.next(); s.ch != '`' && s.ch != '\n'; s.next() {
175	}
176	if s.ch == '\n' {
177		s.Error(pos, "C expression is not terminated")
178		return ""
179	}
180	lit := string(s.data[pos.Off+1 : s.off])
181	s.next()
182	return lit
183}
184
185func (s *scanner) scanStr(pos Pos) string {
186	// TODO(dvyukov): get rid of <...> strings, that's only includes
187	closing := byte('"')
188	if s.ch == '<' {
189		closing = '>'
190	}
191	for s.next(); s.ch != closing; s.next() {
192		if s.ch == 0 || s.ch == '\n' {
193			s.Error(pos, "string literal is not terminated")
194			return ""
195		}
196	}
197	lit := string(s.data[pos.Off+1 : s.off])
198	for i := 0; i < len(lit); i++ {
199		if lit[i] < 0x20 || lit[i] >= 0x80 {
200			pos1 := pos
201			pos1.Col += i + 1
202			pos1.Off += i + 1
203			s.Error(pos1, "illegal character %#U in string literal", lit[i])
204			break
205		}
206	}
207	s.next()
208	return lit
209}
210
211func (s *scanner) scanInt(pos Pos) string {
212	for s.ch >= '0' && s.ch <= '9' ||
213		s.ch >= 'a' && s.ch <= 'f' ||
214		s.ch >= 'A' && s.ch <= 'F' ||
215		s.ch == 'x' || s.ch == '-' {
216		s.next()
217	}
218	lit := string(s.data[pos.Off:s.off])
219	if _, err := strconv.ParseUint(lit, 10, 64); err == nil {
220		return lit
221	}
222	if len(lit) > 1 && lit[0] == '-' {
223		if _, err := strconv.ParseInt(lit, 10, 64); err == nil {
224			return lit
225		}
226	}
227	if len(lit) > 2 && lit[0] == '0' && lit[1] == 'x' {
228		if _, err := strconv.ParseUint(lit[2:], 16, 64); err == nil {
229			return lit
230		}
231	}
232	s.Error(pos, fmt.Sprintf("bad integer %q", lit))
233	return "0"
234}
235
236func (s *scanner) scanChar(pos Pos) string {
237	s.next()
238	s.next()
239	if s.ch != '\'' {
240		s.Error(pos, "char literal is not terminated")
241		return "0"
242	}
243	s.next()
244	return string(s.data[pos.Off : pos.Off+3])
245}
246
247func (s *scanner) scanIdent(pos Pos) (tok token, lit string) {
248	tok = tokIdent
249	for s.ch == '_' || s.ch == '$' ||
250		s.ch >= 'a' && s.ch <= 'z' ||
251		s.ch >= 'A' && s.ch <= 'Z' ||
252		s.ch >= '0' && s.ch <= '9' {
253		s.next()
254	}
255	lit = string(s.data[pos.Off:s.off])
256	if key, ok := keywords[lit]; ok {
257		tok = key
258	}
259	return
260}
261
262func (s *scanner) Error(pos Pos, msg string, args ...interface{}) {
263	s.errors++
264	s.errorHandler(pos, fmt.Sprintf(msg, args...))
265}
266
267func (s *scanner) Ok() bool {
268	return s.errors == 0
269}
270
271func (s *scanner) next() {
272	s.off++
273	for s.off < len(s.data) && s.data[s.off] == '\r' {
274		s.off++
275	}
276	if s.off == len(s.data) {
277		// Always emit NEWLINE before EOF.
278		// Makes lots of things simpler as we always
279		// want to treat EOF as NEWLINE as well.
280		s.ch = '\n'
281		s.off++
282		return
283	}
284	if s.off > len(s.data) {
285		s.ch = 0
286		return
287	}
288	if s.off == 0 || s.data[s.off-1] == '\n' {
289		s.line++
290		s.col = 0
291	}
292	s.ch = s.data[s.off]
293	s.col++
294	if s.ch == 0 {
295		s.Error(s.pos(), "illegal character \\x00")
296	}
297}
298
299func (s *scanner) skipWhitespace() {
300	for s.ch == ' ' || s.ch == '\t' {
301		s.next()
302	}
303}
304
305func (s *scanner) pos() Pos {
306	return Pos{
307		File: s.filename,
308		Off:  s.off,
309		Line: s.line,
310		Col:  s.col,
311	}
312}
313