1# -*- coding: utf-8 -*-
2#                     The LLVM Compiler Infrastructure
3#
4# This file is distributed under the University of Illinois Open Source
5# License. See LICENSE.TXT for details.
6""" This module implements the 'scan-build' command API.
7
8To run the static analyzer against a build is done in multiple steps:
9
10 -- Intercept: capture the compilation command during the build,
11 -- Analyze:   run the analyzer against the captured commands,
12 -- Report:    create a cover report from the analyzer outputs.  """
13
14import sys
15import re
16import os
17import os.path
18import json
19import argparse
20import logging
21import subprocess
22import multiprocessing
23from libscanbuild import initialize_logging, tempdir, command_entry_point
24from libscanbuild.runner import run
25from libscanbuild.intercept import capture
26from libscanbuild.report import report_directory, document
27from libscanbuild.clang import get_checkers
28from libscanbuild.compilation import split_command
29
30__all__ = ['analyze_build_main', 'analyze_build_wrapper']
31
32COMPILER_WRAPPER_CC = 'analyze-cc'
33COMPILER_WRAPPER_CXX = 'analyze-c++'
34
35
36@command_entry_point
37def analyze_build_main(bin_dir, from_build_command):
38    """ Entry point for 'analyze-build' and 'scan-build'. """
39
40    parser = create_parser(from_build_command)
41    args = parser.parse_args()
42    validate(parser, args, from_build_command)
43
44    # setup logging
45    initialize_logging(args.verbose)
46    logging.debug('Parsed arguments: %s', args)
47
48    with report_directory(args.output, args.keep_empty) as target_dir:
49        if not from_build_command:
50            # run analyzer only and generate cover report
51            run_analyzer(args, target_dir)
52            number_of_bugs = document(args, target_dir, True)
53            return number_of_bugs if args.status_bugs else 0
54        elif args.intercept_first:
55            # run build command and capture compiler executions
56            exit_code = capture(args, bin_dir)
57            # next step to run the analyzer against the captured commands
58            if need_analyzer(args.build):
59                run_analyzer(args, target_dir)
60                # cover report generation and bug counting
61                number_of_bugs = document(args, target_dir, True)
62                # remove the compilation database when it was not requested
63                if os.path.exists(args.cdb):
64                    os.unlink(args.cdb)
65                # set exit status as it was requested
66                return number_of_bugs if args.status_bugs else exit_code
67            else:
68                return exit_code
69        else:
70            # run the build command with compiler wrappers which
71            # execute the analyzer too. (interposition)
72            environment = setup_environment(args, target_dir, bin_dir)
73            logging.debug('run build in environment: %s', environment)
74            exit_code = subprocess.call(args.build, env=environment)
75            logging.debug('build finished with exit code: %d', exit_code)
76            # cover report generation and bug counting
77            number_of_bugs = document(args, target_dir, False)
78            # set exit status as it was requested
79            return number_of_bugs if args.status_bugs else exit_code
80
81
82def need_analyzer(args):
83    """ Check the intent of the build command.
84
85    When static analyzer run against project configure step, it should be
86    silent and no need to run the analyzer or generate report.
87
88    To run `scan-build` against the configure step might be neccessary,
89    when compiler wrappers are used. That's the moment when build setup
90    check the compiler and capture the location for the build process. """
91
92    return len(args) and not re.search('configure|autogen', args[0])
93
94
95def run_analyzer(args, output_dir):
96    """ Runs the analyzer against the given compilation database. """
97
98    def exclude(filename):
99        """ Return true when any excluded directory prefix the filename. """
100        return any(re.match(r'^' + directory, filename)
101                   for directory in args.excludes)
102
103    consts = {
104        'clang': args.clang,
105        'output_dir': output_dir,
106        'output_format': args.output_format,
107        'output_failures': args.output_failures,
108        'direct_args': analyzer_params(args),
109        'force_debug': args.force_debug
110    }
111
112    logging.debug('run analyzer against compilation database')
113    with open(args.cdb, 'r') as handle:
114        generator = (dict(cmd, **consts)
115                     for cmd in json.load(handle) if not exclude(cmd['file']))
116        # when verbose output requested execute sequentially
117        pool = multiprocessing.Pool(1 if args.verbose > 2 else None)
118        for current in pool.imap_unordered(run, generator):
119            if current is not None:
120                # display error message from the static analyzer
121                for line in current['error_output']:
122                    logging.info(line.rstrip())
123        pool.close()
124        pool.join()
125
126
127def setup_environment(args, destination, bin_dir):
128    """ Set up environment for build command to interpose compiler wrapper. """
129
130    environment = dict(os.environ)
131    environment.update({
132        'CC': os.path.join(bin_dir, COMPILER_WRAPPER_CC),
133        'CXX': os.path.join(bin_dir, COMPILER_WRAPPER_CXX),
134        'ANALYZE_BUILD_CC': args.cc,
135        'ANALYZE_BUILD_CXX': args.cxx,
136        'ANALYZE_BUILD_CLANG': args.clang if need_analyzer(args.build) else '',
137        'ANALYZE_BUILD_VERBOSE': 'DEBUG' if args.verbose > 2 else 'WARNING',
138        'ANALYZE_BUILD_REPORT_DIR': destination,
139        'ANALYZE_BUILD_REPORT_FORMAT': args.output_format,
140        'ANALYZE_BUILD_REPORT_FAILURES': 'yes' if args.output_failures else '',
141        'ANALYZE_BUILD_PARAMETERS': ' '.join(analyzer_params(args)),
142        'ANALYZE_BUILD_FORCE_DEBUG': 'yes' if args.force_debug else ''
143    })
144    return environment
145
146
147def analyze_build_wrapper(cplusplus):
148    """ Entry point for `analyze-cc` and `analyze-c++` compiler wrappers. """
149
150    # initialize wrapper logging
151    logging.basicConfig(format='analyze: %(levelname)s: %(message)s',
152                        level=os.getenv('ANALYZE_BUILD_VERBOSE', 'INFO'))
153    # execute with real compiler
154    compiler = os.getenv('ANALYZE_BUILD_CXX', 'c++') if cplusplus \
155        else os.getenv('ANALYZE_BUILD_CC', 'cc')
156    compilation = [compiler] + sys.argv[1:]
157    logging.info('execute compiler: %s', compilation)
158    result = subprocess.call(compilation)
159    # exit when it fails, ...
160    if result or not os.getenv('ANALYZE_BUILD_CLANG'):
161        return result
162    # ... and run the analyzer if all went well.
163    try:
164        # check is it a compilation
165        compilation = split_command(sys.argv)
166        if compilation is None:
167            return result
168        # collect the needed parameters from environment, crash when missing
169        parameters = {
170            'clang': os.getenv('ANALYZE_BUILD_CLANG'),
171            'output_dir': os.getenv('ANALYZE_BUILD_REPORT_DIR'),
172            'output_format': os.getenv('ANALYZE_BUILD_REPORT_FORMAT'),
173            'output_failures': os.getenv('ANALYZE_BUILD_REPORT_FAILURES'),
174            'direct_args': os.getenv('ANALYZE_BUILD_PARAMETERS',
175                                     '').split(' '),
176            'force_debug': os.getenv('ANALYZE_BUILD_FORCE_DEBUG'),
177            'directory': os.getcwd(),
178            'command': [sys.argv[0], '-c'] + compilation.flags
179        }
180        # call static analyzer against the compilation
181        for source in compilation.files:
182            parameters.update({'file': source})
183            logging.debug('analyzer parameters %s', parameters)
184            current = run(parameters)
185            # display error message from the static analyzer
186            if current is not None:
187                for line in current['error_output']:
188                    logging.info(line.rstrip())
189    except Exception:
190        logging.exception("run analyzer inside compiler wrapper failed.")
191    return result
192
193
194def analyzer_params(args):
195    """ A group of command line arguments can mapped to command
196    line arguments of the analyzer. This method generates those. """
197
198    def prefix_with(constant, pieces):
199        """ From a sequence create another sequence where every second element
200        is from the original sequence and the odd elements are the prefix.
201
202        eg.: prefix_with(0, [1,2,3]) creates [0, 1, 0, 2, 0, 3] """
203
204        return [elem for piece in pieces for elem in [constant, piece]]
205
206    result = []
207
208    if args.store_model:
209        result.append('-analyzer-store={0}'.format(args.store_model))
210    if args.constraints_model:
211        result.append('-analyzer-constraints={0}'.format(
212            args.constraints_model))
213    if args.internal_stats:
214        result.append('-analyzer-stats')
215    if args.analyze_headers:
216        result.append('-analyzer-opt-analyze-headers')
217    if args.stats:
218        result.append('-analyzer-checker=debug.Stats')
219    if args.maxloop:
220        result.extend(['-analyzer-max-loop', str(args.maxloop)])
221    if args.output_format:
222        result.append('-analyzer-output={0}'.format(args.output_format))
223    if args.analyzer_config:
224        result.append(args.analyzer_config)
225    if args.verbose >= 4:
226        result.append('-analyzer-display-progress')
227    if args.plugins:
228        result.extend(prefix_with('-load', args.plugins))
229    if args.enable_checker:
230        checkers = ','.join(args.enable_checker)
231        result.extend(['-analyzer-checker', checkers])
232    if args.disable_checker:
233        checkers = ','.join(args.disable_checker)
234        result.extend(['-analyzer-disable-checker', checkers])
235    if os.getenv('UBIVIZ'):
236        result.append('-analyzer-viz-egraph-ubigraph')
237
238    return prefix_with('-Xclang', result)
239
240
241def print_active_checkers(checkers):
242    """ Print active checkers to stdout. """
243
244    for name in sorted(name for name, (_, active) in checkers.items()
245                       if active):
246        print(name)
247
248
249def print_checkers(checkers):
250    """ Print verbose checker help to stdout. """
251
252    print('')
253    print('available checkers:')
254    print('')
255    for name in sorted(checkers.keys()):
256        description, active = checkers[name]
257        prefix = '+' if active else ' '
258        if len(name) > 30:
259            print(' {0} {1}'.format(prefix, name))
260            print(' ' * 35 + description)
261        else:
262            print(' {0} {1: <30}  {2}'.format(prefix, name, description))
263    print('')
264    print('NOTE: "+" indicates that an analysis is enabled by default.')
265    print('')
266
267
268def validate(parser, args, from_build_command):
269    """ Validation done by the parser itself, but semantic check still
270    needs to be done. This method is doing that. """
271
272    if args.help_checkers_verbose:
273        print_checkers(get_checkers(args.clang, args.plugins))
274        parser.exit()
275    elif args.help_checkers:
276        print_active_checkers(get_checkers(args.clang, args.plugins))
277        parser.exit()
278
279    if from_build_command and not args.build:
280        parser.error('missing build command')
281
282
283def create_parser(from_build_command):
284    """ Command line argument parser factory method. """
285
286    parser = argparse.ArgumentParser(
287        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
288
289    parser.add_argument(
290        '--verbose', '-v',
291        action='count',
292        default=0,
293        help="""Enable verbose output from '%(prog)s'. A second and third
294                flag increases verbosity.""")
295    parser.add_argument(
296        '--override-compiler',
297        action='store_true',
298        help="""Always resort to the compiler wrapper even when better
299                interposition methods are available.""")
300    parser.add_argument(
301        '--intercept-first',
302        action='store_true',
303        help="""Run the build commands only, build a compilation database,
304                then run the static analyzer afterwards.
305                Generally speaking it has better coverage on build commands.
306                With '--override-compiler' it use compiler wrapper, but does
307                not run the analyzer till the build is finished. """)
308    parser.add_argument(
309        '--cdb',
310        metavar='<file>',
311        default="compile_commands.json",
312        help="""The JSON compilation database.""")
313
314    parser.add_argument(
315        '--output', '-o',
316        metavar='<path>',
317        default=tempdir(),
318        help="""Specifies the output directory for analyzer reports.
319                Subdirectory will be created if default directory is targeted.
320                """)
321    parser.add_argument(
322        '--status-bugs',
323        action='store_true',
324        help="""By default, the exit status of '%(prog)s' is the same as the
325                executed build command. Specifying this option causes the exit
326                status of '%(prog)s' to be non zero if it found potential bugs
327                and zero otherwise.""")
328    parser.add_argument(
329        '--html-title',
330        metavar='<title>',
331        help="""Specify the title used on generated HTML pages.
332                If not specified, a default title will be used.""")
333    parser.add_argument(
334        '--analyze-headers',
335        action='store_true',
336        help="""Also analyze functions in #included files. By default, such
337                functions are skipped unless they are called by functions
338                within the main source file.""")
339    format_group = parser.add_mutually_exclusive_group()
340    format_group.add_argument(
341        '--plist', '-plist',
342        dest='output_format',
343        const='plist',
344        default='html',
345        action='store_const',
346        help="""This option outputs the results as a set of .plist files.""")
347    format_group.add_argument(
348        '--plist-html', '-plist-html',
349        dest='output_format',
350        const='plist-html',
351        default='html',
352        action='store_const',
353        help="""This option outputs the results as a set of .html and .plist
354                files.""")
355    # TODO: implement '-view '
356
357    advanced = parser.add_argument_group('advanced options')
358    advanced.add_argument(
359        '--keep-empty',
360        action='store_true',
361        help="""Don't remove the build results directory even if no issues
362                were reported.""")
363    advanced.add_argument(
364        '--no-failure-reports', '-no-failure-reports',
365        dest='output_failures',
366        action='store_false',
367        help="""Do not create a 'failures' subdirectory that includes analyzer
368                crash reports and preprocessed source files.""")
369    advanced.add_argument(
370        '--stats', '-stats',
371        action='store_true',
372        help="""Generates visitation statistics for the project being analyzed.
373                """)
374    advanced.add_argument(
375        '--internal-stats',
376        action='store_true',
377        help="""Generate internal analyzer statistics.""")
378    advanced.add_argument(
379        '--maxloop', '-maxloop',
380        metavar='<loop count>',
381        type=int,
382        help="""Specifiy the number of times a block can be visited before
383                giving up. Increase for more comprehensive coverage at a cost
384                of speed.""")
385    advanced.add_argument(
386        '--store', '-store',
387        metavar='<model>',
388        dest='store_model',
389        choices=['region', 'basic'],
390        help="""Specify the store model used by the analyzer.
391                'region' specifies a field- sensitive store model.
392                'basic' which is far less precise but can more quickly
393                analyze code. 'basic' was the default store model for
394                checker-0.221 and earlier.""")
395    advanced.add_argument(
396        '--constraints', '-constraints',
397        metavar='<model>',
398        dest='constraints_model',
399        choices=['range', 'basic'],
400        help="""Specify the contraint engine used by the analyzer. Specifying
401                'basic' uses a simpler, less powerful constraint model used by
402                checker-0.160 and earlier.""")
403    advanced.add_argument(
404        '--use-analyzer',
405        metavar='<path>',
406        dest='clang',
407        default='clang',
408        help="""'%(prog)s' uses the 'clang' executable relative to itself for
409                static analysis. One can override this behavior with this
410                option by using the 'clang' packaged with Xcode (on OS X) or
411                from the PATH.""")
412    advanced.add_argument(
413        '--use-cc',
414        metavar='<path>',
415        dest='cc',
416        default='cc',
417        help="""When '%(prog)s' analyzes a project by interposing a "fake
418                compiler", which executes a real compiler for compilation and
419                do other tasks (to run the static analyzer or just record the
420                compiler invocation). Because of this interposing, '%(prog)s'
421                does not know what compiler your project normally uses.
422                Instead, it simply overrides the CC environment variable, and
423                guesses your default compiler.
424
425                If you need '%(prog)s' to use a specific compiler for
426                *compilation* then you can use this option to specify a path
427                to that compiler.""")
428    advanced.add_argument(
429        '--use-c++',
430        metavar='<path>',
431        dest='cxx',
432        default='c++',
433        help="""This is the same as "--use-cc" but for C++ code.""")
434    advanced.add_argument(
435        '--analyzer-config', '-analyzer-config',
436        metavar='<options>',
437        help="""Provide options to pass through to the analyzer's
438                -analyzer-config flag. Several options are separated with
439                comma: 'key1=val1,key2=val2'
440
441                Available options:
442                    stable-report-filename=true or false (default)
443
444                Switch the page naming to:
445                report-<filename>-<function/method name>-<id>.html
446                instead of report-XXXXXX.html""")
447    advanced.add_argument(
448        '--exclude',
449        metavar='<directory>',
450        dest='excludes',
451        action='append',
452        default=[],
453        help="""Do not run static analyzer against files found in this
454                directory. (You can specify this option multiple times.)
455                Could be usefull when project contains 3rd party libraries.
456                The directory path shall be absolute path as file names in
457                the compilation database.""")
458    advanced.add_argument(
459        '--force-analyze-debug-code',
460        dest='force_debug',
461        action='store_true',
462        help="""Tells analyzer to enable assertions in code even if they were
463                disabled during compilation, enabling more precise results.""")
464
465    plugins = parser.add_argument_group('checker options')
466    plugins.add_argument(
467        '--load-plugin', '-load-plugin',
468        metavar='<plugin library>',
469        dest='plugins',
470        action='append',
471        help="""Loading external checkers using the clang plugin interface.""")
472    plugins.add_argument(
473        '--enable-checker', '-enable-checker',
474        metavar='<checker name>',
475        action=AppendCommaSeparated,
476        help="""Enable specific checker.""")
477    plugins.add_argument(
478        '--disable-checker', '-disable-checker',
479        metavar='<checker name>',
480        action=AppendCommaSeparated,
481        help="""Disable specific checker.""")
482    plugins.add_argument(
483        '--help-checkers',
484        action='store_true',
485        help="""A default group of checkers is run unless explicitly disabled.
486                Exactly which checkers constitute the default group is a
487                function of the operating system in use. These can be printed
488                with this flag.""")
489    plugins.add_argument(
490        '--help-checkers-verbose',
491        action='store_true',
492        help="""Print all available checkers and mark the enabled ones.""")
493
494    if from_build_command:
495        parser.add_argument(
496            dest='build',
497            nargs=argparse.REMAINDER,
498            help="""Command to run.""")
499
500    return parser
501
502
503class AppendCommaSeparated(argparse.Action):
504    """ argparse Action class to support multiple comma separated lists. """
505
506    def __call__(self, __parser, namespace, values, __option_string):
507        # getattr(obj, attr, default) does not really returns default but none
508        if getattr(namespace, self.dest, None) is None:
509            setattr(namespace, self.dest, [])
510        # once it's fixed we can use as expected
511        actual = getattr(namespace, self.dest)
512        actual.extend(values.split(','))
513        setattr(namespace, self.dest, actual)
514