1# -*- coding: utf-8 -*-
2# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3# See https://llvm.org/LICENSE.txt for license information.
4# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5""" This module is responsible for to parse a compiler invocation. """
6
7import re
8import os
9import collections
10
11__all__ = ['split_command', 'classify_source', 'compiler_language']
12
13# Ignored compiler options map for compilation database creation.
14# The map is used in `split_command` method. (Which does ignore and classify
15# parameters.) Please note, that these are not the only parameters which
16# might be ignored.
17#
18# Keys are the option name, value number of options to skip
19IGNORED_FLAGS = {
20    # compiling only flag, ignored because the creator of compilation
21    # database will explicitly set it.
22    '-c': 0,
23    # preprocessor macros, ignored because would cause duplicate entries in
24    # the output (the only difference would be these flags). this is actual
25    # finding from users, who suffered longer execution time caused by the
26    # duplicates.
27    '-MD': 0,
28    '-MMD': 0,
29    '-MG': 0,
30    '-MP': 0,
31    '-MF': 1,
32    '-MT': 1,
33    '-MQ': 1,
34    # linker options, ignored because for compilation database will contain
35    # compilation commands only. so, the compiler would ignore these flags
36    # anyway. the benefit to get rid of them is to make the output more
37    # readable.
38    '-static': 0,
39    '-shared': 0,
40    '-s': 0,
41    '-rdynamic': 0,
42    '-l': 1,
43    '-L': 1,
44    '-u': 1,
45    '-z': 1,
46    '-T': 1,
47    '-Xlinker': 1
48}
49
50# Known C/C++ compiler executable name patterns
51COMPILER_PATTERNS = frozenset([
52    re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'),
53    re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'),
54    re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'),
55    re.compile(r'^llvm-g(cc|\+\+)$'),
56])
57
58
59def split_command(command):
60    """ Returns a value when the command is a compilation, None otherwise.
61
62    The value on success is a named tuple with the following attributes:
63
64        files:    list of source files
65        flags:    list of compile options
66        compiler: string value of 'c' or 'c++' """
67
68    # the result of this method
69    result = collections.namedtuple('Compilation',
70                                    ['compiler', 'flags', 'files'])
71    result.compiler = compiler_language(command)
72    result.flags = []
73    result.files = []
74    # quit right now, if the program was not a C/C++ compiler
75    if not result.compiler:
76        return None
77    # iterate on the compile options
78    args = iter(command[1:])
79    for arg in args:
80        # quit when compilation pass is not involved
81        if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}:
82            return None
83        # ignore some flags
84        elif arg in IGNORED_FLAGS:
85            count = IGNORED_FLAGS[arg]
86            for _ in range(count):
87                next(args)
88        elif re.match(r'^-(l|L|Wl,).+', arg):
89            pass
90        # some parameters could look like filename, take as compile option
91        elif arg in {'-D', '-I'}:
92            result.flags.extend([arg, next(args)])
93        # parameter which looks source file is taken...
94        elif re.match(r'^[^-].+', arg) and classify_source(arg):
95            result.files.append(arg)
96        # and consider everything else as compile option.
97        else:
98            result.flags.append(arg)
99    # do extra check on number of source files
100    return result if result.files else None
101
102
103def classify_source(filename, c_compiler=True):
104    """ Return the language from file name extension. """
105
106    mapping = {
107        '.c': 'c' if c_compiler else 'c++',
108        '.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output',
109        '.ii': 'c++-cpp-output',
110        '.m': 'objective-c',
111        '.mi': 'objective-c-cpp-output',
112        '.mm': 'objective-c++',
113        '.mii': 'objective-c++-cpp-output',
114        '.C': 'c++',
115        '.cc': 'c++',
116        '.CC': 'c++',
117        '.cp': 'c++',
118        '.cpp': 'c++',
119        '.cxx': 'c++',
120        '.c++': 'c++',
121        '.C++': 'c++',
122        '.txx': 'c++'
123    }
124
125    __, extension = os.path.splitext(os.path.basename(filename))
126    return mapping.get(extension)
127
128
129def compiler_language(command):
130    """ A predicate to decide the command is a compiler call or not.
131
132    Returns 'c' or 'c++' when it match. None otherwise. """
133
134    cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$')
135
136    if command:
137        executable = os.path.basename(command[0])
138        if any(pattern.match(executable) for pattern in COMPILER_PATTERNS):
139            return 'c++' if cplusplus.match(executable) else 'c'
140    return None
141