1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2018 The Android Open Source Project
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10#      http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17"""A parser for the Minijail policy file."""
18
19from __future__ import absolute_import
20from __future__ import division
21from __future__ import print_function
22
23import collections
24import itertools
25import os.path
26import re
27
28try:
29    import bpf
30except ImportError:
31    from minijail import bpf
32
33
34Token = collections.namedtuple(
35    'Token', ['type', 'value', 'filename', 'line', 'line_number', 'column'])
36
37# A regex that can tokenize a Minijail policy file line.
38_TOKEN_SPECIFICATION = (
39    ('COMMENT', r'#.*$'),
40    ('WHITESPACE', r'\s+'),
41    ('CONTINUATION', r'\\$'),
42    ('DEFAULT', r'@default\b'),
43    ('INCLUDE', r'@include\b'),
44    ('FREQUENCY', r'@frequency\b'),
45    ('PATH', r'(?:\.)?/\S+'),
46    ('NUMERIC_CONSTANT', r'-?0[xX][0-9a-fA-F]+|-?0[Oo][0-7]+|-?[0-9]+'),
47    ('COLON', r':'),
48    ('SEMICOLON', r';'),
49    ('COMMA', r','),
50    ('BITWISE_COMPLEMENT', r'~'),
51    ('LPAREN', r'\('),
52    ('RPAREN', r'\)'),
53    ('LBRACE', r'\{'),
54    ('RBRACE', r'\}'),
55    ('RBRACKET', r'\]'),
56    ('LBRACKET', r'\['),
57    ('OR', r'\|\|'),
58    ('AND', r'&&'),
59    ('BITWISE_OR', r'\|'),
60    ('OP', r'&|\bin\b|==|!=|<=|<|>=|>'),
61    ('EQUAL', r'='),
62    ('ARGUMENT', r'\barg[0-9]+\b'),
63    ('RETURN', r'\breturn\b'),
64    ('ACTION',
65     r'\ballow\b|\bkill-process\b|\bkill-thread\b|\bkill\b|\btrap\b|'
66     r'\btrace\b|\blog\b|\buser-notify\b'
67    ),
68    ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9-@]*'),
69)
70_TOKEN_RE = re.compile('|'.join(
71    r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
72
73
74class ParseException(Exception):
75    """An exception that is raised when parsing fails."""
76
77    # pylint: disable=too-many-arguments
78    def __init__(self,
79                 message,
80                 filename,
81                 *,
82                 line='',
83                 line_number=1,
84                 token=None):
85        if token:
86            line = token.line
87            line_number = token.line_number
88            column = token.column
89            length = len(token.value)
90        else:
91            column = len(line)
92            length = 1
93
94        message = ('%s(%d:%d): %s') % (filename, line_number, column + 1,
95                                       message)
96        message += '\n    %s' % line
97        message += '\n    %s%s' % (' ' * column, '^' * length)
98        super().__init__(message)
99
100
101class ParserState:
102    """Stores the state of the Parser to provide better diagnostics."""
103
104    def __init__(self, filename):
105        self._filename = filename
106        self._line = ''
107        self._line_number = 0
108
109    @property
110    def filename(self):
111        """Return the name of the file being processed."""
112        return self._filename
113
114    @property
115    def line(self):
116        """Return the current line being processed."""
117        return self._line
118
119    @property
120    def line_number(self):
121        """Return the current line number being processed."""
122        return self._line_number
123
124    def error(self, message, token=None):
125        """Raise a ParserException with the provided message."""
126        raise ParseException(
127            message,
128            self.filename,
129            line=self._line,
130            line_number=self._line_number,
131            token=token)
132
133    def tokenize(self, lines):
134        """Return a list of tokens for the current line."""
135        tokens = []
136
137        for line_number, line in enumerate(lines):
138            self._line_number = line_number + 1
139            self._line = line.rstrip('\r\n')
140
141            last_end = 0
142            for token in _TOKEN_RE.finditer(self._line):
143                if token.start() != last_end:
144                    self.error(
145                        'invalid token',
146                        token=Token('INVALID',
147                                    self._line[last_end:token.start()],
148                                    self.filename, self._line,
149                                    self._line_number, last_end))
150                last_end = token.end()
151
152                # Omit whitespace and comments now to avoid sprinkling this logic
153                # elsewhere.
154                if token.lastgroup in ('WHITESPACE', 'COMMENT',
155                                       'CONTINUATION'):
156                    continue
157                tokens.append(
158                    Token(token.lastgroup, token.group(), self.filename,
159                          self._line, self._line_number, token.start()))
160            if last_end != len(self._line):
161                self.error(
162                    'invalid token',
163                    token=Token('INVALID', self._line[last_end:],
164                                self.filename, self._line, self._line_number,
165                                last_end))
166
167            if self._line.endswith('\\'):
168                # This line is not finished yet.
169                continue
170
171            if tokens:
172                # Return a copy of the token list so that the caller can be free
173                # to modify it.
174                yield tokens[::]
175            tokens.clear()
176
177
178Atom = collections.namedtuple('Atom', ['argument_index', 'op', 'value'])
179"""A single boolean comparison within a filter expression."""
180
181Filter = collections.namedtuple('Filter', ['expression', 'action'])
182"""The result of parsing a DNF filter expression, with its action.
183
184Since the expression is in Disjunctive Normal Form, it is composed of two levels
185of lists, one for disjunctions and the inner one for conjunctions. The elements
186of the inner list are Atoms.
187"""
188
189Syscall = collections.namedtuple('Syscall', ['name', 'number'])
190"""A system call."""
191
192ParsedFilterStatement = collections.namedtuple(
193    'ParsedFilterStatement', ['syscalls', 'filters', 'token'])
194"""The result of parsing a filter statement.
195
196Statements have a list of syscalls, and an associated list of filters that will
197be evaluated sequentially when any of the syscalls is invoked.
198"""
199
200FilterStatement = collections.namedtuple('FilterStatement',
201                                         ['syscall', 'frequency', 'filters'])
202"""The filter list for a particular syscall.
203
204This is a mapping from one syscall to a list of filters that are evaluated
205sequentially. The last filter is always an unconditional action.
206"""
207
208ParsedPolicy = collections.namedtuple('ParsedPolicy',
209                                      ['default_action', 'filter_statements'])
210"""The result of parsing a minijail .policy file."""
211
212
213# pylint: disable=too-few-public-methods
214class PolicyParser:
215    """A parser for the Minijail seccomp policy file format."""
216
217    def __init__(self,
218                 arch,
219                 *,
220                 kill_action,
221                 include_depth_limit=10,
222                 override_default_action=None):
223        self._parser_states = [ParserState("<memory>")]
224        self._kill_action = kill_action
225        self._include_depth_limit = include_depth_limit
226        self._default_action = self._kill_action
227        self._override_default_action = override_default_action
228        self._frequency_mapping = collections.defaultdict(int)
229        self._arch = arch
230
231    @property
232    def _parser_state(self):
233        return self._parser_states[-1]
234
235    # single-constant = identifier
236    #                 | numeric-constant
237    #                 ;
238    def _parse_single_constant(self, token):
239        if token.type == 'IDENTIFIER':
240            if token.value not in self._arch.constants:
241                self._parser_state.error('invalid constant', token=token)
242            single_constant = self._arch.constants[token.value]
243        elif token.type == 'NUMERIC_CONSTANT':
244            try:
245                single_constant = int(token.value, base=0)
246            except ValueError:
247                self._parser_state.error('invalid constant', token=token)
248        else:
249            self._parser_state.error('invalid constant', token=token)
250        if single_constant > self._arch.max_unsigned:
251            self._parser_state.error('unsigned overflow', token=token)
252        elif single_constant < self._arch.min_signed:
253            self._parser_state.error('signed underflow', token=token)
254        elif single_constant < 0:
255            # This converts the constant to an unsigned representation of the
256            # same value, since BPF only uses unsigned values.
257            single_constant = self._arch.truncate_word(single_constant)
258        return single_constant
259
260    # constant = [ '~' ] , '(' , value , ')'
261    #          | [ '~' ] , single-constant
262    #          ;
263    def _parse_constant(self, tokens):
264        negate = False
265        if tokens[0].type == 'BITWISE_COMPLEMENT':
266            negate = True
267            tokens.pop(0)
268            if not tokens:
269                self._parser_state.error('empty complement')
270            if tokens[0].type == 'BITWISE_COMPLEMENT':
271                self._parser_state.error(
272                    'invalid double complement', token=tokens[0])
273        if tokens[0].type == 'LPAREN':
274            last_open_paren = tokens.pop(0)
275            single_value = self.parse_value(tokens)
276            if not tokens or tokens[0].type != 'RPAREN':
277                self._parser_state.error(
278                    'unclosed parenthesis', token=last_open_paren)
279        else:
280            single_value = self._parse_single_constant(tokens[0])
281        tokens.pop(0)
282        if negate:
283            single_value = self._arch.truncate_word(~single_value)
284        return single_value
285
286    # value = constant , [ { '|' , constant } ]
287    #       ;
288    def parse_value(self, tokens):
289        """Parse constants separated bitwise OR operator |.
290
291        Constants can be:
292
293        - A number that can be parsed with int(..., base=0)
294        - A named constant expression.
295        - A parenthesized, valid constant expression.
296        - A valid constant expression prefixed with the unary bitwise
297          complement operator ~.
298        - A series of valid constant expressions separated by bitwise
299          OR operator |.
300
301        If there is an error parsing any of the constants, the whole process
302        fails.
303        """
304
305        value = 0
306        while tokens:
307            value |= self._parse_constant(tokens)
308            if not tokens or tokens[0].type != 'BITWISE_OR':
309                break
310            tokens.pop(0)
311        else:
312            self._parser_state.error('empty constant')
313        return value
314
315    # atom = argument , op , value
316    #      ;
317    def _parse_atom(self, tokens):
318        if not tokens:
319            self._parser_state.error('missing argument')
320        argument = tokens.pop(0)
321        if argument.type != 'ARGUMENT':
322            self._parser_state.error('invalid argument', token=argument)
323
324        if not tokens:
325            self._parser_state.error('missing operator')
326        operator = tokens.pop(0)
327        if operator.type != 'OP':
328            self._parser_state.error('invalid operator', token=operator)
329
330        value = self.parse_value(tokens)
331        argument_index = int(argument.value[3:])
332        if not (0 <= argument_index < bpf.MAX_SYSCALL_ARGUMENTS):
333            self._parser_state.error('invalid argument', token=argument)
334        return Atom(argument_index, operator.value, value)
335
336    # clause = atom , [ { '&&' , atom } ]
337    #        ;
338    def _parse_clause(self, tokens):
339        atoms = []
340        while tokens:
341            atoms.append(self._parse_atom(tokens))
342            if not tokens or tokens[0].type != 'AND':
343                break
344            tokens.pop(0)
345        else:
346            self._parser_state.error('empty clause')
347        return atoms
348
349    # argument-expression = clause , [ { '||' , clause } ]
350    #                   ;
351    def parse_argument_expression(self, tokens):
352        """Parse a argument expression in Disjunctive Normal Form.
353
354        Since BPF disallows back jumps, we build the basic blocks in reverse
355        order so that all the jump targets are known by the time we need to
356        reference them.
357        """
358
359        clauses = []
360        while tokens:
361            clauses.append(self._parse_clause(tokens))
362            if not tokens or tokens[0].type != 'OR':
363                break
364            tokens.pop(0)
365        else:
366            self._parser_state.error('empty argument expression')
367        return clauses
368
369    # default-action = 'kill-process'
370    #                | 'kill-thread'
371    #                | 'kill'
372    #                | 'trap'
373    #                | 'user-notify'
374    #                ;
375    def _parse_default_action(self, tokens):
376        if not tokens:
377            self._parser_state.error('missing default action')
378        action_token = tokens.pop(0)
379        if action_token.type != 'ACTION':
380            return self._parser_state.error(
381                'invalid default action', token=action_token)
382        if action_token.value == 'kill-process':
383            return bpf.KillProcess()
384        if action_token.value == 'kill-thread':
385            return bpf.KillThread()
386        if action_token.value == 'kill':
387            return self._kill_action
388        if action_token.value == 'trap':
389            return bpf.Trap()
390        if action_token.value == 'user-notify':
391            return bpf.UserNotify()
392        return self._parser_state.error(
393            'invalid permissive default action', token=action_token)
394
395    # action = 'allow' | '1'
396    #        | 'kill-process'
397    #        | 'kill-thread'
398    #        | 'kill'
399    #        | 'trap'
400    #        | 'trace'
401    #        | 'log'
402    #        | 'user-notify'
403    #        | 'return' , single-constant
404    #        ;
405    def parse_action(self, tokens):
406        if not tokens:
407            self._parser_state.error('missing action')
408        action_token = tokens.pop(0)
409        if action_token.type == 'ACTION':
410            if action_token.value == 'allow':
411                return bpf.Allow()
412            if action_token.value == 'kill':
413                return self._kill_action
414            if action_token.value == 'kill-process':
415                return bpf.KillProcess()
416            if action_token.value == 'kill-thread':
417                return bpf.KillThread()
418            if action_token.value == 'trap':
419                return bpf.Trap()
420            if action_token.value == 'trace':
421                return bpf.Trace()
422            if action_token.value == 'user-notify':
423                return bpf.UserNotify()
424            if action_token.value == 'log':
425                return bpf.Log()
426        elif action_token.type == 'NUMERIC_CONSTANT':
427            constant = self._parse_single_constant(action_token)
428            if constant == 1:
429                return bpf.Allow()
430        elif action_token.type == 'RETURN':
431            if not tokens:
432                self._parser_state.error('missing return value')
433            return bpf.ReturnErrno(self._parse_single_constant(tokens.pop(0)))
434        return self._parser_state.error('invalid action', token=action_token)
435
436    # single-filter = action
437    #               | argument-expression , [ ';' , action ]
438    #               ;
439    def _parse_single_filter(self, tokens):
440        if not tokens:
441            self._parser_state.error('missing filter')
442        if tokens[0].type == 'ARGUMENT':
443            # Only argument expressions can start with an ARGUMENT token.
444            argument_expression = self.parse_argument_expression(tokens)
445            if tokens and tokens[0].type == 'SEMICOLON':
446                tokens.pop(0)
447                action = self.parse_action(tokens)
448            else:
449                action = bpf.Allow()
450            return Filter(argument_expression, action)
451        else:
452            return Filter(None, self.parse_action(tokens))
453
454    # filter = '{' , single-filter , [ { ',' , single-filter } ] , '}'
455    #        | single-filter
456    #        ;
457    def parse_filter(self, tokens):
458        """Parse a filter and return a list of Filter objects."""
459        if not tokens:
460            self._parser_state.error('missing filter')
461        filters = []
462        if tokens[0].type == 'LBRACE':
463            opening_brace = tokens.pop(0)
464            while tokens:
465                filters.append(self._parse_single_filter(tokens))
466                if not tokens or tokens[0].type != 'COMMA':
467                    break
468                tokens.pop(0)
469            if not tokens or tokens[0].type != 'RBRACE':
470                self._parser_state.error('unclosed brace', token=opening_brace)
471            tokens.pop(0)
472        else:
473            filters.append(self._parse_single_filter(tokens))
474        return filters
475
476    # key-value-pair = identifier , '=', identifier , [ { ',' , identifier } ]
477    #                ;
478    def _parse_key_value_pair(self, tokens):
479        if not tokens:
480            self._parser_state.error('missing key')
481        key = tokens.pop(0)
482        if key.type != 'IDENTIFIER':
483            self._parser_state.error('invalid key', token=key)
484        if not tokens:
485            self._parser_state.error('missing equal')
486        if tokens[0].type != 'EQUAL':
487            self._parser_state.error('invalid equal', token=tokens[0])
488        tokens.pop(0)
489        value_list = []
490        while tokens:
491            value = tokens.pop(0)
492            if value.type != 'IDENTIFIER':
493                self._parser_state.error('invalid value', token=value)
494            value_list.append(value.value)
495            if not tokens or tokens[0].type != 'COMMA':
496                break
497            tokens.pop(0)
498        else:
499            self._parser_state.error('empty value')
500        return (key.value, value_list)
501
502    # metadata = '[' , key-value-pair , [ { ';' , key-value-pair } ] , ']'
503    #          ;
504    def _parse_metadata(self, tokens):
505        if not tokens:
506            self._parser_state.error('missing opening bracket')
507        opening_bracket = tokens.pop(0)
508        if opening_bracket.type != 'LBRACKET':
509            self._parser_state.error(
510                'invalid opening bracket', token=opening_bracket)
511        metadata = {}
512        while tokens:
513            first_token = tokens[0]
514            key, value = self._parse_key_value_pair(tokens)
515            if key in metadata:
516                self._parser_state.error(
517                    'duplicate metadata key: "%s"' % key, token=first_token)
518            metadata[key] = value
519            if not tokens or tokens[0].type != 'SEMICOLON':
520                break
521            tokens.pop(0)
522        if not tokens or tokens[0].type != 'RBRACKET':
523            self._parser_state.error('unclosed bracket', token=opening_bracket)
524        tokens.pop(0)
525        return metadata
526
527    # syscall-descriptor = syscall-name , [ metadata ]
528    #                    | syscall-group-name , [ metadata ]
529    #                    ;
530    def _parse_syscall_descriptor(self, tokens):
531        if not tokens:
532            self._parser_state.error('missing syscall descriptor')
533        syscall_descriptor = tokens.pop(0)
534        # `kill` as a syscall name is a special case since kill is also a valid
535        # action and actions have precendence over identifiers.
536        if (syscall_descriptor.type != 'IDENTIFIER' and
537            syscall_descriptor.value != 'kill'):
538            self._parser_state.error(
539                'invalid syscall descriptor', token=syscall_descriptor)
540        if tokens and tokens[0].type == 'LBRACKET':
541            metadata = self._parse_metadata(tokens)
542            if 'arch' in metadata and self._arch.arch_name not in metadata['arch']:
543                return ()
544        if '@' in syscall_descriptor.value:
545            # This is a syscall group.
546            subtokens = syscall_descriptor.value.split('@')
547            if len(subtokens) != 2:
548                self._parser_state.error(
549                    'invalid syscall group name', token=syscall_descriptor)
550            syscall_group_name, syscall_namespace_name = subtokens
551            if syscall_namespace_name not in self._arch.syscall_groups:
552                self._parser_state.error(
553                    'nonexistent syscall group namespace',
554                    token=syscall_descriptor)
555            syscall_namespace = self._arch.syscall_groups[
556                syscall_namespace_name]
557            if syscall_group_name not in syscall_namespace:
558                self._parser_state.error(
559                    'nonexistent syscall group', token=syscall_descriptor)
560            return (Syscall(name, self._arch.syscalls[name])
561                    for name in syscall_namespace[syscall_group_name])
562        if syscall_descriptor.value not in self._arch.syscalls:
563            self._parser_state.error(
564                'nonexistent syscall', token=syscall_descriptor)
565        return (Syscall(syscall_descriptor.value,
566                        self._arch.syscalls[syscall_descriptor.value]), )
567
568    # filter-statement = '{' , syscall-descriptor , [ { ',', syscall-descriptor } ] , '}' ,
569    #                       ':' , filter
570    #                  | syscall-descriptor , ':' , filter
571    #                  ;
572    def parse_filter_statement(self, tokens):
573        """Parse a filter statement and return a ParsedFilterStatement."""
574        if not tokens:
575            self._parser_state.error('empty filter statement')
576        syscall_descriptors = []
577        if tokens[0].type == 'LBRACE':
578            opening_brace = tokens.pop(0)
579            while tokens:
580                syscall_descriptors.extend(
581                    self._parse_syscall_descriptor(tokens))
582                if not tokens or tokens[0].type != 'COMMA':
583                    break
584                tokens.pop(0)
585            if not tokens or tokens[0].type != 'RBRACE':
586                self._parser_state.error('unclosed brace', token=opening_brace)
587            tokens.pop(0)
588        else:
589            syscall_descriptors.extend(self._parse_syscall_descriptor(tokens))
590        if not tokens:
591            self._parser_state.error('missing colon')
592        if tokens[0].type != 'COLON':
593            self._parser_state.error('invalid colon', token=tokens[0])
594        # Given that there can be multiple syscalls and filters in a single
595        # filter statement, use the colon token as the anchor for error location
596        # purposes.
597        colon_token = tokens.pop(0)
598        parsed_filter = self.parse_filter(tokens)
599        if not syscall_descriptors:
600            return None
601        return ParsedFilterStatement(
602            tuple(syscall_descriptors), parsed_filter, colon_token)
603
604    # include-statement = '@include' , posix-path
605    #                   ;
606    def _parse_include_statement(self, tokens):
607        if not tokens:
608            self._parser_state.error('empty filter statement')
609        if tokens[0].type != 'INCLUDE':
610            self._parser_state.error('invalid include', token=tokens[0])
611        tokens.pop(0)
612        if not tokens:
613            self._parser_state.error('empty include path')
614        include_path = tokens.pop(0)
615        if include_path.type != 'PATH':
616            self._parser_state.error(
617                'invalid include path', token=include_path)
618        if len(self._parser_states) == self._include_depth_limit:
619            self._parser_state.error('@include statement nested too deep')
620        include_filename = os.path.normpath(
621            os.path.join(
622                os.path.dirname(self._parser_state.filename),
623                include_path.value))
624        if not os.path.isfile(include_filename):
625            self._parser_state.error(
626                'Could not @include %s' % include_filename, token=include_path)
627        return self._parse_policy_file(include_filename)
628
629    def _parse_frequency_file(self, filename):
630        self._parser_states.append(ParserState(filename))
631        try:
632            frequency_mapping = collections.defaultdict(int)
633            with open(filename) as frequency_file:
634                for tokens in self._parser_state.tokenize(frequency_file):
635                    syscall_numbers = self._parse_syscall_descriptor(tokens)
636                    if not tokens:
637                        self._parser_state.error('missing colon')
638                    if tokens[0].type != 'COLON':
639                        self._parser_state.error(
640                            'invalid colon', token=tokens[0])
641                    tokens.pop(0)
642
643                    if not tokens:
644                        self._parser_state.error('missing number')
645                    number = tokens.pop(0)
646                    if number.type != 'NUMERIC_CONSTANT':
647                        self._parser_state.error(
648                            'invalid number', token=number)
649                    number_value = int(number.value, base=0)
650                    if number_value < 0:
651                        self._parser_state.error(
652                            'invalid number', token=number)
653
654                    for syscall_number in syscall_numbers:
655                        frequency_mapping[syscall_number] += number_value
656            return frequency_mapping
657        finally:
658            self._parser_states.pop()
659
660    # frequency-statement = '@frequency' , posix-path
661    #                      ;
662    def _parse_frequency_statement(self, tokens):
663        if not tokens:
664            self._parser_state.error('empty frequency statement')
665        if tokens[0].type != 'FREQUENCY':
666            self._parser_state.error('invalid frequency', token=tokens[0])
667        tokens.pop(0)
668        if not tokens:
669            self._parser_state.error('empty frequency path')
670        frequency_path = tokens.pop(0)
671        if frequency_path.type != 'PATH':
672            self._parser_state.error(
673                'invalid frequency path', token=frequency_path)
674        frequency_filename = os.path.normpath(
675            os.path.join(
676                os.path.dirname(self._parser_state.filename),
677                frequency_path.value))
678        if not os.path.isfile(frequency_filename):
679            self._parser_state.error(
680                'Could not open frequency file %s' % frequency_filename,
681                token=frequency_path)
682        return self._parse_frequency_file(frequency_filename)
683
684    # default-statement = '@default' , default-action
685    #                   ;
686    def _parse_default_statement(self, tokens):
687        if not tokens:
688            self._parser_state.error('empty default statement')
689        if tokens[0].type != 'DEFAULT':
690            self._parser_state.error('invalid default', token=tokens[0])
691        tokens.pop(0)
692        if not tokens:
693            self._parser_state.error('empty action')
694        return self._parse_default_action(tokens)
695
696    def _parse_policy_file(self, filename):
697        self._parser_states.append(ParserState(filename))
698        try:
699            statements = []
700            with open(filename) as policy_file:
701                for tokens in self._parser_state.tokenize(policy_file):
702                    if tokens[0].type == 'INCLUDE':
703                        statements.extend(
704                            self._parse_include_statement(tokens))
705                    elif tokens[0].type == 'FREQUENCY':
706                        for syscall_number, frequency in self._parse_frequency_statement(
707                                tokens).items():
708                            self._frequency_mapping[
709                                syscall_number] += frequency
710                    elif tokens[0].type == 'DEFAULT':
711                        self._default_action = self._parse_default_statement(
712                            tokens)
713                    else:
714                        statement = self.parse_filter_statement(tokens)
715                        if statement is None:
716                            # If all the syscalls in the statement are for
717                            # another arch, skip the whole statement.
718                            continue
719                        statements.append(statement)
720
721                    if tokens:
722                        self._parser_state.error(
723                            'extra tokens', token=tokens[0])
724            return statements
725        finally:
726            self._parser_states.pop()
727
728    def parse_file(self, filename):
729        """Parse a file and return the list of FilterStatements."""
730        self._frequency_mapping = collections.defaultdict(int)
731        try:
732            statements = [x for x in self._parse_policy_file(filename)]
733        except RecursionError:
734            raise ParseException(
735                'recursion limit exceeded',
736                filename,
737                line=self._parser_states[-1].line)
738
739        # Collapse statements into a single syscall-to-filter-list, remembering
740        # the token for each filter for better diagnostics.
741        syscall_filter_mapping = {}
742        syscall_filter_definitions = {}
743        filter_statements = []
744        for syscalls, filters, token in statements:
745            for syscall in syscalls:
746                if syscall not in syscall_filter_mapping:
747                    filter_statements.append(
748                        FilterStatement(
749                            syscall, self._frequency_mapping.get(syscall, 1),
750                            []))
751                    syscall_filter_mapping[syscall] = filter_statements[-1]
752                    syscall_filter_definitions[syscall] = []
753                for filt in filters:
754                    syscall_filter_mapping[syscall].filters.append(filt)
755                    syscall_filter_definitions[syscall].append(token)
756        default_action = self._override_default_action or self._default_action
757        for filter_statement in filter_statements:
758            unconditional_actions_suffix = list(
759                itertools.dropwhile(lambda filt: filt.expression is not None,
760                                    filter_statement.filters))
761            if len(unconditional_actions_suffix) == 1:
762                # The last filter already has an unconditional action, no need
763                # to add another one.
764                continue
765            if len(unconditional_actions_suffix) > 1:
766                previous_definition_token = syscall_filter_definitions[
767                    filter_statement.syscall][
768                        -len(unconditional_actions_suffix)]
769                current_definition_token = syscall_filter_definitions[
770                    filter_statement.syscall][
771                        -len(unconditional_actions_suffix) + 1]
772                raise ParseException(
773                    ('Syscall %s (number %d) already had '
774                     'an unconditional action applied') %
775                    (filter_statement.syscall.name,
776                     filter_statement.syscall.number),
777                    filename=current_definition_token.filename,
778                    token=current_definition_token) from ParseException(
779                        'Previous definition',
780                        filename=previous_definition_token.filename,
781                        token=previous_definition_token)
782            assert not unconditional_actions_suffix
783            filter_statement.filters.append(
784                Filter(expression=None, action=default_action))
785        return ParsedPolicy(default_action, filter_statements)
786