1# -*- coding: utf-8 -*- 2# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 3# See https://llvm.org/LICENSE.txt for license information. 4# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 5""" This module is responsible for to parse a compiler invocation. """ 6 7import re 8import os 9import collections 10 11__all__ = ['split_command', 'classify_source', 'compiler_language'] 12 13# Ignored compiler options map for compilation database creation. 14# The map is used in `split_command` method. (Which does ignore and classify 15# parameters.) Please note, that these are not the only parameters which 16# might be ignored. 17# 18# Keys are the option name, value number of options to skip 19IGNORED_FLAGS = { 20 # compiling only flag, ignored because the creator of compilation 21 # database will explicitly set it. 22 '-c': 0, 23 # preprocessor macros, ignored because would cause duplicate entries in 24 # the output (the only difference would be these flags). this is actual 25 # finding from users, who suffered longer execution time caused by the 26 # duplicates. 27 '-MD': 0, 28 '-MMD': 0, 29 '-MG': 0, 30 '-MP': 0, 31 '-MF': 1, 32 '-MT': 1, 33 '-MQ': 1, 34 # linker options, ignored because for compilation database will contain 35 # compilation commands only. so, the compiler would ignore these flags 36 # anyway. the benefit to get rid of them is to make the output more 37 # readable. 38 '-static': 0, 39 '-shared': 0, 40 '-s': 0, 41 '-rdynamic': 0, 42 '-l': 1, 43 '-L': 1, 44 '-u': 1, 45 '-z': 1, 46 '-T': 1, 47 '-Xlinker': 1 48} 49 50# Known C/C++ compiler executable name patterns 51COMPILER_PATTERNS = frozenset([ 52 re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'), 53 re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'), 54 re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'), 55 re.compile(r'^llvm-g(cc|\+\+)$'), 56]) 57 58 59def split_command(command): 60 """ Returns a value when the command is a compilation, None otherwise. 61 62 The value on success is a named tuple with the following attributes: 63 64 files: list of source files 65 flags: list of compile options 66 compiler: string value of 'c' or 'c++' """ 67 68 # the result of this method 69 result = collections.namedtuple('Compilation', 70 ['compiler', 'flags', 'files']) 71 result.compiler = compiler_language(command) 72 result.flags = [] 73 result.files = [] 74 # quit right now, if the program was not a C/C++ compiler 75 if not result.compiler: 76 return None 77 # iterate on the compile options 78 args = iter(command[1:]) 79 for arg in args: 80 # quit when compilation pass is not involved 81 if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}: 82 return None 83 # ignore some flags 84 elif arg in IGNORED_FLAGS: 85 count = IGNORED_FLAGS[arg] 86 for _ in range(count): 87 next(args) 88 elif re.match(r'^-(l|L|Wl,).+', arg): 89 pass 90 # some parameters could look like filename, take as compile option 91 elif arg in {'-D', '-I'}: 92 result.flags.extend([arg, next(args)]) 93 # parameter which looks source file is taken... 94 elif re.match(r'^[^-].+', arg) and classify_source(arg): 95 result.files.append(arg) 96 # and consider everything else as compile option. 97 else: 98 result.flags.append(arg) 99 # do extra check on number of source files 100 return result if result.files else None 101 102 103def classify_source(filename, c_compiler=True): 104 """ Return the language from file name extension. """ 105 106 mapping = { 107 '.c': 'c' if c_compiler else 'c++', 108 '.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output', 109 '.ii': 'c++-cpp-output', 110 '.m': 'objective-c', 111 '.mi': 'objective-c-cpp-output', 112 '.mm': 'objective-c++', 113 '.mii': 'objective-c++-cpp-output', 114 '.C': 'c++', 115 '.cc': 'c++', 116 '.CC': 'c++', 117 '.cp': 'c++', 118 '.cpp': 'c++', 119 '.cxx': 'c++', 120 '.c++': 'c++', 121 '.C++': 'c++', 122 '.txx': 'c++' 123 } 124 125 __, extension = os.path.splitext(os.path.basename(filename)) 126 return mapping.get(extension) 127 128 129def compiler_language(command): 130 """ A predicate to decide the command is a compiler call or not. 131 132 Returns 'c' or 'c++' when it match. None otherwise. """ 133 134 cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$') 135 136 if command: 137 executable = os.path.basename(command[0]) 138 if any(pattern.match(executable) for pattern in COMPILER_PATTERNS): 139 return 'c++' if cplusplus.match(executable) else 'c' 140 return None 141