1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2018 The Android Open Source Project
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10#      http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17"""A parser for the Minijail policy file."""
18
19from __future__ import absolute_import
20from __future__ import division
21from __future__ import print_function
22
23import collections
24import itertools
25import os.path
26import re
27
28import bpf
29
30Token = collections.namedtuple(
31    'token', ['type', 'value', 'filename', 'line', 'line_number', 'column'])
32
33# A regex that can tokenize a Minijail policy file line.
34_TOKEN_SPECIFICATION = (
35    ('COMMENT', r'#.*$'),
36    ('WHITESPACE', r'\s+'),
37    ('CONTINUATION', r'\\$'),
38    ('DEFAULT', r'@default'),
39    ('INCLUDE', r'@include'),
40    ('FREQUENCY', r'@frequency'),
41    ('PATH', r'(?:\.)?/\S+'),
42    ('NUMERIC_CONSTANT', r'-?0[xX][0-9a-fA-F]+|-?0[Oo][0-7]+|-?[0-9]+'),
43    ('COLON', r':'),
44    ('SEMICOLON', r';'),
45    ('COMMA', r','),
46    ('BITWISE_COMPLEMENT', r'~'),
47    ('LPAREN', r'\('),
48    ('RPAREN', r'\)'),
49    ('LBRACE', r'\{'),
50    ('RBRACE', r'\}'),
51    ('RBRACKET', r'\]'),
52    ('LBRACKET', r'\['),
53    ('OR', r'\|\|'),
54    ('AND', r'&&'),
55    ('BITWISE_OR', r'\|'),
56    ('OP', r'&|in|==|!=|<=|<|>=|>'),
57    ('EQUAL', r'='),
58    ('ARGUMENT', r'arg[0-9]+'),
59    ('RETURN', r'return'),
60    ('ACTION', r'allow|kill-process|kill-thread|kill|trap|trace|log'),
61    ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9-@]*'),
62)
63_TOKEN_RE = re.compile('|'.join(
64    r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
65
66
67class ParseException(Exception):
68    """An exception that is raised when parsing fails."""
69
70    # pylint: disable=too-many-arguments
71    def __init__(self,
72                 message,
73                 filename,
74                 *,
75                 line='',
76                 line_number=1,
77                 token=None):
78        if token:
79            line = token.line
80            line_number = token.line_number
81            column = token.column
82            length = len(token.value)
83        else:
84            column = len(line)
85            length = 1
86
87        message = ('%s(%d:%d): %s') % (filename, line_number, column + 1,
88                                       message)
89        message += '\n    %s' % line
90        message += '\n    %s%s' % (' ' * column, '^' * length)
91        super().__init__(message)
92
93
94class ParserState:
95    """Stores the state of the Parser to provide better diagnostics."""
96
97    def __init__(self, filename):
98        self._filename = filename
99        self._line = ''
100        self._line_number = 0
101
102    @property
103    def filename(self):
104        """Return the name of the file being processed."""
105        return self._filename
106
107    @property
108    def line(self):
109        """Return the current line being processed."""
110        return self._line
111
112    @property
113    def line_number(self):
114        """Return the current line number being processed."""
115        return self._line_number
116
117    def error(self, message, token=None):
118        """Raise a ParserException with the provided message."""
119        raise ParseException(
120            message,
121            self.filename,
122            line=self._line,
123            line_number=self._line_number,
124            token=token)
125
126    def tokenize(self, lines):
127        """Return a list of tokens for the current line."""
128        tokens = []
129
130        for line_number, line in enumerate(lines):
131            self._line_number = line_number + 1
132            self._line = line.rstrip('\r\n')
133
134            last_end = 0
135            for token in _TOKEN_RE.finditer(self._line):
136                if token.start() != last_end:
137                    self.error(
138                        'invalid token',
139                        token=Token('INVALID',
140                                    self._line[last_end:token.start()],
141                                    self.filename, self._line,
142                                    self._line_number, last_end))
143                last_end = token.end()
144
145                # Omit whitespace and comments now to avoid sprinkling this logic
146                # elsewhere.
147                if token.lastgroup in ('WHITESPACE', 'COMMENT',
148                                       'CONTINUATION'):
149                    continue
150                tokens.append(
151                    Token(token.lastgroup, token.group(), self.filename,
152                          self._line, self._line_number, token.start()))
153            if last_end != len(self._line):
154                self.error(
155                    'invalid token',
156                    token=Token('INVALID', self._line[last_end:],
157                                self.filename, self._line, self._line_number,
158                                last_end))
159
160            if self._line.endswith('\\'):
161                # This line is not finished yet.
162                continue
163
164            if tokens:
165                # Return a copy of the token list so that the caller can be free
166                # to modify it.
167                yield tokens[::]
168            tokens.clear()
169
170
171Atom = collections.namedtuple('Atom', ['argument_index', 'op', 'value'])
172"""A single boolean comparison within a filter expression."""
173
174Filter = collections.namedtuple('Filter', ['expression', 'action'])
175"""The result of parsing a DNF filter expression, with its action.
176
177Since the expression is in Disjunctive Normal Form, it is composed of two levels
178of lists, one for disjunctions and the inner one for conjunctions. The elements
179of the inner list are Atoms.
180"""
181
182Syscall = collections.namedtuple('Syscall', ['name', 'number'])
183"""A system call."""
184
185ParsedFilterStatement = collections.namedtuple('ParsedFilterStatement',
186                                               ['syscalls', 'filters'])
187"""The result of parsing a filter statement.
188
189Statements have a list of syscalls, and an associated list of filters that will
190be evaluated sequentially when any of the syscalls is invoked.
191"""
192
193FilterStatement = collections.namedtuple('FilterStatement',
194                                         ['syscall', 'frequency', 'filters'])
195"""The filter list for a particular syscall.
196
197This is a mapping from one syscall to a list of filters that are evaluated
198sequentially. The last filter is always an unconditional action.
199"""
200
201ParsedPolicy = collections.namedtuple('ParsedPolicy',
202                                      ['default_action', 'filter_statements'])
203"""The result of parsing a minijail .policy file."""
204
205
206# pylint: disable=too-few-public-methods
207class PolicyParser:
208    """A parser for the Minijail seccomp policy file format."""
209
210    def __init__(self,
211                 arch,
212                 *,
213                 kill_action,
214                 include_depth_limit=10,
215                 override_default_action=None):
216        self._parser_states = [ParserState("<memory>")]
217        self._kill_action = kill_action
218        self._include_depth_limit = include_depth_limit
219        self._default_action = self._kill_action
220        self._override_default_action = override_default_action
221        self._frequency_mapping = collections.defaultdict(int)
222        self._arch = arch
223
224    @property
225    def _parser_state(self):
226        return self._parser_states[-1]
227
228    # single-constant = identifier
229    #                 | numeric-constant
230    #                 ;
231    def _parse_single_constant(self, token):
232        if token.type == 'IDENTIFIER':
233            if token.value not in self._arch.constants:
234                self._parser_state.error('invalid constant', token=token)
235            single_constant = self._arch.constants[token.value]
236        elif token.type == 'NUMERIC_CONSTANT':
237            try:
238                single_constant = int(token.value, base=0)
239            except ValueError:
240                self._parser_state.error('invalid constant', token=token)
241        else:
242            self._parser_state.error('invalid constant', token=token)
243        if single_constant > self._arch.max_unsigned:
244            self._parser_state.error('unsigned overflow', token=token)
245        elif single_constant < self._arch.min_signed:
246            self._parser_state.error('signed underflow', token=token)
247        elif single_constant < 0:
248            # This converts the constant to an unsigned representation of the
249            # same value, since BPF only uses unsigned values.
250            single_constant = self._arch.truncate_word(single_constant)
251        return single_constant
252
253    # constant = [ '~' ] , '(' , value , ')'
254    #          | [ '~' ] , single-constant
255    #          ;
256    def _parse_constant(self, tokens):
257        negate = False
258        if tokens[0].type == 'BITWISE_COMPLEMENT':
259            negate = True
260            tokens.pop(0)
261            if not tokens:
262                self._parser_state.error('empty complement')
263            if tokens[0].type == 'BITWISE_COMPLEMENT':
264                self._parser_state.error(
265                    'invalid double complement', token=tokens[0])
266        if tokens[0].type == 'LPAREN':
267            last_open_paren = tokens.pop(0)
268            single_value = self.parse_value(tokens)
269            if not tokens or tokens[0].type != 'RPAREN':
270                self._parser_state.error(
271                    'unclosed parenthesis', token=last_open_paren)
272        else:
273            single_value = self._parse_single_constant(tokens[0])
274        tokens.pop(0)
275        if negate:
276            single_value = self._arch.truncate_word(~single_value)
277        return single_value
278
279    # value = constant , [ { '|' , constant } ]
280    #       ;
281    def parse_value(self, tokens):
282        """Parse constants separated bitwise OR operator |.
283
284        Constants can be:
285
286        - A number that can be parsed with int(..., base=0)
287        - A named constant expression.
288        - A parenthesized, valid constant expression.
289        - A valid constant expression prefixed with the unary bitwise
290          complement operator ~.
291        - A series of valid constant expressions separated by bitwise
292          OR operator |.
293
294        If there is an error parsing any of the constants, the whole process
295        fails.
296        """
297
298        value = 0
299        while tokens:
300            value |= self._parse_constant(tokens)
301            if not tokens or tokens[0].type != 'BITWISE_OR':
302                break
303            tokens.pop(0)
304        else:
305            self._parser_state.error('empty constant')
306        return value
307
308    # atom = argument , op , value
309    #      ;
310    def _parse_atom(self, tokens):
311        if not tokens:
312            self._parser_state.error('missing argument')
313        argument = tokens.pop(0)
314        if argument.type != 'ARGUMENT':
315            self._parser_state.error('invalid argument', token=argument)
316
317        if not tokens:
318            self._parser_state.error('missing operator')
319        operator = tokens.pop(0)
320        if operator.type != 'OP':
321            self._parser_state.error('invalid operator', token=operator)
322
323        value = self.parse_value(tokens)
324        argument_index = int(argument.value[3:])
325        if not (0 <= argument_index < bpf.MAX_SYSCALL_ARGUMENTS):
326            self._parser_state.error('invalid argument', token=argument)
327        return Atom(argument_index, operator.value, value)
328
329    # clause = atom , [ { '&&' , atom } ]
330    #        ;
331    def _parse_clause(self, tokens):
332        atoms = []
333        while tokens:
334            atoms.append(self._parse_atom(tokens))
335            if not tokens or tokens[0].type != 'AND':
336                break
337            tokens.pop(0)
338        else:
339            self._parser_state.error('empty clause')
340        return atoms
341
342    # argument-expression = clause , [ { '||' , clause } ]
343    #                   ;
344    def parse_argument_expression(self, tokens):
345        """Parse a argument expression in Disjunctive Normal Form.
346
347        Since BPF disallows back jumps, we build the basic blocks in reverse
348        order so that all the jump targets are known by the time we need to
349        reference them.
350        """
351
352        clauses = []
353        while tokens:
354            clauses.append(self._parse_clause(tokens))
355            if not tokens or tokens[0].type != 'OR':
356                break
357            tokens.pop(0)
358        else:
359            self._parser_state.error('empty argument expression')
360        return clauses
361
362    # default-action = 'kill-process'
363    #                | 'kill-thread'
364    #                | 'kill'
365    #                | 'trap'
366    #                ;
367    def _parse_default_action(self, tokens):
368        if not tokens:
369            self._parser_state.error('missing default action')
370        action_token = tokens.pop(0)
371        if action_token.type != 'ACTION':
372            return self._parser_state.error(
373                'invalid default action', token=action_token)
374        if action_token.value == 'kill-process':
375            return bpf.KillProcess()
376        if action_token.value == 'kill-thread':
377            return bpf.KillThread()
378        if action_token.value == 'kill':
379            return self._kill_action
380        if action_token.value == 'trap':
381            return bpf.Trap()
382        return self._parser_state.error(
383            'invalid permissive default action', token=action_token)
384
385    # action = 'allow' | '1'
386    #        | 'kill-process'
387    #        | 'kill-thread'
388    #        | 'kill'
389    #        | 'trap'
390    #        | 'trace'
391    #        | 'log'
392    #        | 'return' , single-constant
393    #        ;
394    def parse_action(self, tokens):
395        if not tokens:
396            self._parser_state.error('missing action')
397        action_token = tokens.pop(0)
398        if action_token.type == 'ACTION':
399            if action_token.value == 'allow':
400                return bpf.Allow()
401            if action_token.value == 'kill':
402                return self._kill_action
403            if action_token.value == 'kill-process':
404                return bpf.KillProcess()
405            if action_token.value == 'kill-thread':
406                return bpf.KillThread()
407            if action_token.value == 'trap':
408                return bpf.Trap()
409            if action_token.value == 'trace':
410                return bpf.Trace()
411            if action_token.value == 'log':
412                return bpf.Log()
413        elif action_token.type == 'NUMERIC_CONSTANT':
414            constant = self._parse_single_constant(action_token)
415            if constant == 1:
416                return bpf.Allow()
417        elif action_token.type == 'RETURN':
418            if not tokens:
419                self._parser_state.error('missing return value')
420            return bpf.ReturnErrno(self._parse_single_constant(tokens.pop(0)))
421        return self._parser_state.error('invalid action', token=action_token)
422
423    # single-filter = action
424    #               | argument-expression , [ ';' , action ]
425    #               ;
426    def _parse_single_filter(self, tokens):
427        if not tokens:
428            self._parser_state.error('missing filter')
429        if tokens[0].type == 'ARGUMENT':
430            # Only argument expressions can start with an ARGUMENT token.
431            argument_expression = self.parse_argument_expression(tokens)
432            if tokens and tokens[0].type == 'SEMICOLON':
433                tokens.pop(0)
434                action = self.parse_action(tokens)
435            else:
436                action = bpf.Allow()
437            return Filter(argument_expression, action)
438        else:
439            return Filter(None, self.parse_action(tokens))
440
441    # filter = '{' , single-filter , [ { ',' , single-filter } ] , '}'
442    #        | single-filter
443    #        ;
444    def parse_filter(self, tokens):
445        """Parse a filter and return a list of Filter objects."""
446        if not tokens:
447            self._parser_state.error('missing filter')
448        filters = []
449        if tokens[0].type == 'LBRACE':
450            opening_brace = tokens.pop(0)
451            while tokens:
452                filters.append(self._parse_single_filter(tokens))
453                if not tokens or tokens[0].type != 'COMMA':
454                    break
455                tokens.pop(0)
456            if not tokens or tokens[0].type != 'RBRACE':
457                self._parser_state.error('unclosed brace', token=opening_brace)
458            tokens.pop(0)
459        else:
460            filters.append(self._parse_single_filter(tokens))
461        return filters
462
463    # key-value-pair = identifier , '=', identifier , [ { ',' , identifier } ]
464    #                ;
465    def _parse_key_value_pair(self, tokens):
466        if not tokens:
467            self._parser_state.error('missing key')
468        key = tokens.pop(0)
469        if key.type != 'IDENTIFIER':
470            self._parser_state.error('invalid key', token=key)
471        if not tokens:
472            self._parser_state.error('missing equal')
473        if tokens[0].type != 'EQUAL':
474            self._parser_state.error('invalid equal', token=tokens[0])
475        tokens.pop(0)
476        value_list = []
477        while tokens:
478            value = tokens.pop(0)
479            if value.type != 'IDENTIFIER':
480                self._parser_state.error('invalid value', token=value)
481            value_list.append(value.value)
482            if not tokens or tokens[0].type != 'COMMA':
483                break
484            tokens.pop(0)
485        else:
486            self._parser_state.error('empty value')
487        return (key.value, value_list)
488
489    # metadata = '[' , key-value-pair , [ { ';' , key-value-pair } ] , ']'
490    #          ;
491    def _parse_metadata(self, tokens):
492        if not tokens:
493            self._parser_state.error('missing opening bracket')
494        opening_bracket = tokens.pop(0)
495        if opening_bracket.type != 'LBRACKET':
496            self._parser_state.error(
497                'invalid opening bracket', token=opening_bracket)
498        metadata = {}
499        while tokens:
500            first_token = tokens[0]
501            key, value = self._parse_key_value_pair(tokens)
502            if key in metadata:
503                self._parser_state.error(
504                    'duplicate metadata key: "%s"' % key, token=first_token)
505            metadata[key] = value
506            if not tokens or tokens[0].type != 'SEMICOLON':
507                break
508            tokens.pop(0)
509        if not tokens or tokens[0].type != 'RBRACKET':
510            self._parser_state.error('unclosed bracket', token=opening_bracket)
511        tokens.pop(0)
512        return metadata
513
514    # syscall-descriptor = syscall-name , [ metadata ]
515    #                    | syscall-group-name , [ metadata ]
516    #                    ;
517    def _parse_syscall_descriptor(self, tokens):
518        if not tokens:
519            self._parser_state.error('missing syscall descriptor')
520        syscall_descriptor = tokens.pop(0)
521        if syscall_descriptor.type != 'IDENTIFIER':
522            self._parser_state.error(
523                'invalid syscall descriptor', token=syscall_descriptor)
524        if tokens and tokens[0].type == 'LBRACKET':
525            metadata = self._parse_metadata(tokens)
526            if 'arch' in metadata and self._arch.arch_name not in metadata['arch']:
527                return ()
528        if '@' in syscall_descriptor.value:
529            # This is a syscall group.
530            subtokens = syscall_descriptor.value.split('@')
531            if len(subtokens) != 2:
532                self._parser_state.error(
533                    'invalid syscall group name', token=syscall_descriptor)
534            syscall_group_name, syscall_namespace_name = subtokens
535            if syscall_namespace_name not in self._arch.syscall_groups:
536                self._parser_state.error(
537                    'nonexistent syscall group namespace',
538                    token=syscall_descriptor)
539            syscall_namespace = self._arch.syscall_groups[
540                syscall_namespace_name]
541            if syscall_group_name not in syscall_namespace:
542                self._parser_state.error(
543                    'nonexistent syscall group', token=syscall_descriptor)
544            return (Syscall(name, self._arch.syscalls[name])
545                    for name in syscall_namespace[syscall_group_name])
546        if syscall_descriptor.value not in self._arch.syscalls:
547            self._parser_state.error(
548                'nonexistent syscall', token=syscall_descriptor)
549        return (Syscall(syscall_descriptor.value,
550                        self._arch.syscalls[syscall_descriptor.value]), )
551
552    # filter-statement = '{' , syscall-descriptor , [ { ',', syscall-descriptor } ] , '}' ,
553    #                       ':' , filter
554    #                  | syscall-descriptor , ':' , filter
555    #                  ;
556    def parse_filter_statement(self, tokens):
557        """Parse a filter statement and return a ParsedFilterStatement."""
558        if not tokens:
559            self._parser_state.error('empty filter statement')
560        syscall_descriptors = []
561        if tokens[0].type == 'LBRACE':
562            opening_brace = tokens.pop(0)
563            while tokens:
564                syscall_descriptors.extend(
565                    self._parse_syscall_descriptor(tokens))
566                if not tokens or tokens[0].type != 'COMMA':
567                    break
568                tokens.pop(0)
569            if not tokens or tokens[0].type != 'RBRACE':
570                self._parser_state.error('unclosed brace', token=opening_brace)
571            tokens.pop(0)
572        else:
573            syscall_descriptors.extend(self._parse_syscall_descriptor(tokens))
574        if not tokens:
575            self._parser_state.error('missing colon')
576        if tokens[0].type != 'COLON':
577            self._parser_state.error('invalid colon', token=tokens[0])
578        tokens.pop(0)
579        parsed_filter = self.parse_filter(tokens)
580        if not syscall_descriptors:
581            return None
582        return ParsedFilterStatement(tuple(syscall_descriptors), parsed_filter)
583
584    # include-statement = '@include' , posix-path
585    #                   ;
586    def _parse_include_statement(self, tokens):
587        if not tokens:
588            self._parser_state.error('empty filter statement')
589        if tokens[0].type != 'INCLUDE':
590            self._parser_state.error('invalid include', token=tokens[0])
591        tokens.pop(0)
592        if not tokens:
593            self._parser_state.error('empty include path')
594        include_path = tokens.pop(0)
595        if include_path.type != 'PATH':
596            self._parser_state.error(
597                'invalid include path', token=include_path)
598        if len(self._parser_states) == self._include_depth_limit:
599            self._parser_state.error('@include statement nested too deep')
600        include_filename = os.path.normpath(
601            os.path.join(
602                os.path.dirname(self._parser_state.filename),
603                include_path.value))
604        if not os.path.isfile(include_filename):
605            self._parser_state.error(
606                'Could not @include %s' % include_filename, token=include_path)
607        return self._parse_policy_file(include_filename)
608
609    def _parse_frequency_file(self, filename):
610        self._parser_states.append(ParserState(filename))
611        try:
612            frequency_mapping = collections.defaultdict(int)
613            with open(filename) as frequency_file:
614                for tokens in self._parser_state.tokenize(frequency_file):
615                    syscall_numbers = self._parse_syscall_descriptor(tokens)
616                    if not tokens:
617                        self._parser_state.error('missing colon')
618                    if tokens[0].type != 'COLON':
619                        self._parser_state.error(
620                            'invalid colon', token=tokens[0])
621                    tokens.pop(0)
622
623                    if not tokens:
624                        self._parser_state.error('missing number')
625                    number = tokens.pop(0)
626                    if number.type != 'NUMERIC_CONSTANT':
627                        self._parser_state.error(
628                            'invalid number', token=number)
629                    number_value = int(number.value, base=0)
630                    if number_value < 0:
631                        self._parser_state.error(
632                            'invalid number', token=number)
633
634                    for syscall_number in syscall_numbers:
635                        frequency_mapping[syscall_number] += number_value
636            return frequency_mapping
637        finally:
638            self._parser_states.pop()
639
640    # frequency-statement = '@frequency' , posix-path
641    #                      ;
642    def _parse_frequency_statement(self, tokens):
643        if not tokens:
644            self._parser_state.error('empty frequency statement')
645        if tokens[0].type != 'FREQUENCY':
646            self._parser_state.error('invalid frequency', token=tokens[0])
647        tokens.pop(0)
648        if not tokens:
649            self._parser_state.error('empty frequency path')
650        frequency_path = tokens.pop(0)
651        if frequency_path.type != 'PATH':
652            self._parser_state.error(
653                'invalid frequency path', token=frequency_path)
654        frequency_filename = os.path.normpath(
655            os.path.join(
656                os.path.dirname(self._parser_state.filename),
657                frequency_path.value))
658        if not os.path.isfile(frequency_filename):
659            self._parser_state.error(
660                'Could not open frequency file %s' % frequency_filename,
661                token=frequency_path)
662        return self._parse_frequency_file(frequency_filename)
663
664    # default-statement = '@default' , default-action
665    #                   ;
666    def _parse_default_statement(self, tokens):
667        if not tokens:
668            self._parser_state.error('empty default statement')
669        if tokens[0].type != 'DEFAULT':
670            self._parser_state.error('invalid default', token=tokens[0])
671        tokens.pop(0)
672        if not tokens:
673            self._parser_state.error('empty action')
674        return self._parse_default_action(tokens)
675
676    def _parse_policy_file(self, filename):
677        self._parser_states.append(ParserState(filename))
678        try:
679            statements = []
680            with open(filename) as policy_file:
681                for tokens in self._parser_state.tokenize(policy_file):
682                    if tokens[0].type == 'INCLUDE':
683                        statements.extend(
684                            self._parse_include_statement(tokens))
685                    elif tokens[0].type == 'FREQUENCY':
686                        for syscall_number, frequency in self._parse_frequency_statement(
687                                tokens).items():
688                            self._frequency_mapping[
689                                syscall_number] += frequency
690                    elif tokens[0].type == 'DEFAULT':
691                        self._default_action = self._parse_default_statement(
692                            tokens)
693                    else:
694                        statement = self.parse_filter_statement(tokens)
695                        if statement is None:
696                            # If all the syscalls in the statement are for
697                            # another arch, skip the whole statement.
698                            continue
699                        statements.append(statement)
700
701                    if tokens:
702                        self._parser_state.error(
703                            'extra tokens', token=tokens[0])
704            return statements
705        finally:
706            self._parser_states.pop()
707
708    def parse_file(self, filename):
709        """Parse a file and return the list of FilterStatements."""
710        self._frequency_mapping = collections.defaultdict(int)
711        try:
712            statements = [x for x in self._parse_policy_file(filename)]
713        except RecursionError:
714            raise ParseException(
715                'recursion limit exceeded',
716                filename,
717                line=self._parser_states[-1].line)
718
719        # Collapse statements into a single syscall-to-filter-list.
720        syscall_filter_mapping = {}
721        filter_statements = []
722        for syscalls, filters in statements:
723            for syscall in syscalls:
724                if syscall not in syscall_filter_mapping:
725                    filter_statements.append(
726                        FilterStatement(
727                            syscall, self._frequency_mapping.get(syscall, 1),
728                            []))
729                    syscall_filter_mapping[syscall] = filter_statements[-1]
730                syscall_filter_mapping[syscall].filters.extend(filters)
731        default_action = self._override_default_action or self._default_action
732        for filter_statement in filter_statements:
733            unconditional_actions_suffix = list(
734                itertools.dropwhile(lambda filt: filt.expression is not None,
735                                    filter_statement.filters))
736            if len(unconditional_actions_suffix) == 1:
737                # The last filter already has an unconditional action, no need
738                # to add another one.
739                continue
740            if len(unconditional_actions_suffix) > 1:
741                raise ParseException(
742                    ('Syscall %s (number %d) already had '
743                     'an unconditional action applied') %
744                    (filter_statement.syscall.name,
745                     filter_statement.syscall.number),
746                    filename,
747                    line=self._parser_states[-1].line)
748            assert not unconditional_actions_suffix
749            filter_statement.filters.append(
750                Filter(expression=None, action=default_action))
751        return ParsedPolicy(default_action, filter_statements)
752