1# -*- coding: utf-8 -*-
2#                     The LLVM Compiler Infrastructure
3#
4# This file is distributed under the University of Illinois Open Source
5# License. See LICENSE.TXT for details.
6""" This module is responsible to generate 'index.html' for the report.
7
8The input for this step is the output directory, where individual reports
9could be found. It parses those reports and generates 'index.html'. """
10
11import re
12import os
13import os.path
14import sys
15import shutil
16import time
17import tempfile
18import itertools
19import plistlib
20import glob
21import json
22import logging
23import contextlib
24from libscanbuild import duplicate_check
25from libscanbuild.clang import get_version
26
27__all__ = ['report_directory', 'document']
28
29
30@contextlib.contextmanager
31def report_directory(hint, keep):
32    """ Responsible for the report directory.
33
34    hint -- could specify the parent directory of the output directory.
35    keep -- a boolean value to keep or delete the empty report directory. """
36
37    stamp = time.strftime('scan-build-%Y-%m-%d-%H%M%S-', time.localtime())
38
39    parentdir = os.path.abspath(hint)
40    if not os.path.exists(parentdir):
41        os.makedirs(parentdir)
42
43    name = tempfile.mkdtemp(prefix=stamp, dir=parentdir)
44
45    logging.info('Report directory created: %s', name)
46
47    try:
48        yield name
49    finally:
50        if os.listdir(name):
51            msg = "Run 'scan-view %s' to examine bug reports."
52            keep = True
53        else:
54            if keep:
55                msg = "Report directory '%s' contans no report, but kept."
56            else:
57                msg = "Removing directory '%s' because it contains no report."
58        logging.warning(msg, name)
59
60        if not keep:
61            os.rmdir(name)
62
63
64def document(args, output_dir, use_cdb):
65    """ Generates cover report and returns the number of bugs/crashes. """
66
67    html_reports_available = args.output_format in {'html', 'plist-html'}
68
69    logging.debug('count crashes and bugs')
70    crash_count = sum(1 for _ in read_crashes(output_dir))
71    bug_counter = create_counters()
72    for bug in read_bugs(output_dir, html_reports_available):
73        bug_counter(bug)
74    result = crash_count + bug_counter.total
75
76    if html_reports_available and result:
77        logging.debug('generate index.html file')
78        # common prefix for source files to have sort filenames
79        prefix = commonprefix_from(args.cdb) if use_cdb else os.getcwd()
80        # assemble the cover from multiple fragments
81        try:
82            fragments = []
83            if bug_counter.total:
84                fragments.append(bug_summary(output_dir, bug_counter))
85                fragments.append(bug_report(output_dir, prefix))
86            if crash_count:
87                fragments.append(crash_report(output_dir, prefix))
88            assemble_cover(output_dir, prefix, args, fragments)
89            # copy additinal files to the report
90            copy_resource_files(output_dir)
91            if use_cdb:
92                shutil.copy(args.cdb, output_dir)
93        finally:
94            for fragment in fragments:
95                os.remove(fragment)
96    return result
97
98
99def assemble_cover(output_dir, prefix, args, fragments):
100    """ Put together the fragments into a final report. """
101
102    import getpass
103    import socket
104    import datetime
105
106    if args.html_title is None:
107        args.html_title = os.path.basename(prefix) + ' - analyzer results'
108
109    with open(os.path.join(output_dir, 'index.html'), 'w') as handle:
110        indent = 0
111        handle.write(reindent("""
112        |<!DOCTYPE html>
113        |<html>
114        |  <head>
115        |    <title>{html_title}</title>
116        |    <link type="text/css" rel="stylesheet" href="scanview.css"/>
117        |    <script type='text/javascript' src="sorttable.js"></script>
118        |    <script type='text/javascript' src='selectable.js'></script>
119        |  </head>""", indent).format(html_title=args.html_title))
120        handle.write(comment('SUMMARYENDHEAD'))
121        handle.write(reindent("""
122        |  <body>
123        |    <h1>{html_title}</h1>
124        |    <table>
125        |      <tr><th>User:</th><td>{user_name}@{host_name}</td></tr>
126        |      <tr><th>Working Directory:</th><td>{current_dir}</td></tr>
127        |      <tr><th>Command Line:</th><td>{cmd_args}</td></tr>
128        |      <tr><th>Clang Version:</th><td>{clang_version}</td></tr>
129        |      <tr><th>Date:</th><td>{date}</td></tr>
130        |    </table>""", indent).format(html_title=args.html_title,
131                                         user_name=getpass.getuser(),
132                                         host_name=socket.gethostname(),
133                                         current_dir=prefix,
134                                         cmd_args=' '.join(sys.argv),
135                                         clang_version=get_version(args.clang),
136                                         date=datetime.datetime.today(
137                                         ).strftime('%c')))
138        for fragment in fragments:
139            # copy the content of fragments
140            with open(fragment, 'r') as input_handle:
141                shutil.copyfileobj(input_handle, handle)
142        handle.write(reindent("""
143        |  </body>
144        |</html>""", indent))
145
146
147def bug_summary(output_dir, bug_counter):
148    """ Bug summary is a HTML table to give a better overview of the bugs. """
149
150    name = os.path.join(output_dir, 'summary.html.fragment')
151    with open(name, 'w') as handle:
152        indent = 4
153        handle.write(reindent("""
154        |<h2>Bug Summary</h2>
155        |<table>
156        |  <thead>
157        |    <tr>
158        |      <td>Bug Type</td>
159        |      <td>Quantity</td>
160        |      <td class="sorttable_nosort">Display?</td>
161        |    </tr>
162        |  </thead>
163        |  <tbody>""", indent))
164        handle.write(reindent("""
165        |    <tr style="font-weight:bold">
166        |      <td class="SUMM_DESC">All Bugs</td>
167        |      <td class="Q">{0}</td>
168        |      <td>
169        |        <center>
170        |          <input checked type="checkbox" id="AllBugsCheck"
171        |                 onClick="CopyCheckedStateToCheckButtons(this);"/>
172        |        </center>
173        |      </td>
174        |    </tr>""", indent).format(bug_counter.total))
175        for category, types in bug_counter.categories.items():
176            handle.write(reindent("""
177        |    <tr>
178        |      <th>{0}</th><th colspan=2></th>
179        |    </tr>""", indent).format(category))
180            for bug_type in types.values():
181                handle.write(reindent("""
182        |    <tr>
183        |      <td class="SUMM_DESC">{bug_type}</td>
184        |      <td class="Q">{bug_count}</td>
185        |      <td>
186        |        <center>
187        |          <input checked type="checkbox"
188        |                 onClick="ToggleDisplay(this,'{bug_type_class}');"/>
189        |        </center>
190        |      </td>
191        |    </tr>""", indent).format(**bug_type))
192        handle.write(reindent("""
193        |  </tbody>
194        |</table>""", indent))
195        handle.write(comment('SUMMARYBUGEND'))
196    return name
197
198
199def bug_report(output_dir, prefix):
200    """ Creates a fragment from the analyzer reports. """
201
202    pretty = prettify_bug(prefix, output_dir)
203    bugs = (pretty(bug) for bug in read_bugs(output_dir, True))
204
205    name = os.path.join(output_dir, 'bugs.html.fragment')
206    with open(name, 'w') as handle:
207        indent = 4
208        handle.write(reindent("""
209        |<h2>Reports</h2>
210        |<table class="sortable" style="table-layout:automatic">
211        |  <thead>
212        |    <tr>
213        |      <td>Bug Group</td>
214        |      <td class="sorttable_sorted">
215        |        Bug Type
216        |        <span id="sorttable_sortfwdind">&nbsp;&#x25BE;</span>
217        |      </td>
218        |      <td>File</td>
219        |      <td>Function/Method</td>
220        |      <td class="Q">Line</td>
221        |      <td class="Q">Path Length</td>
222        |      <td class="sorttable_nosort"></td>
223        |    </tr>
224        |  </thead>
225        |  <tbody>""", indent))
226        handle.write(comment('REPORTBUGCOL'))
227        for current in bugs:
228            handle.write(reindent("""
229        |    <tr class="{bug_type_class}">
230        |      <td class="DESC">{bug_category}</td>
231        |      <td class="DESC">{bug_type}</td>
232        |      <td>{bug_file}</td>
233        |      <td class="DESC">{bug_function}</td>
234        |      <td class="Q">{bug_line}</td>
235        |      <td class="Q">{bug_path_length}</td>
236        |      <td><a href="{report_file}#EndPath">View Report</a></td>
237        |    </tr>""", indent).format(**current))
238            handle.write(comment('REPORTBUG', {'id': current['report_file']}))
239        handle.write(reindent("""
240        |  </tbody>
241        |</table>""", indent))
242        handle.write(comment('REPORTBUGEND'))
243    return name
244
245
246def crash_report(output_dir, prefix):
247    """ Creates a fragment from the compiler crashes. """
248
249    pretty = prettify_crash(prefix, output_dir)
250    crashes = (pretty(crash) for crash in read_crashes(output_dir))
251
252    name = os.path.join(output_dir, 'crashes.html.fragment')
253    with open(name, 'w') as handle:
254        indent = 4
255        handle.write(reindent("""
256        |<h2>Analyzer Failures</h2>
257        |<p>The analyzer had problems processing the following files:</p>
258        |<table>
259        |  <thead>
260        |    <tr>
261        |      <td>Problem</td>
262        |      <td>Source File</td>
263        |      <td>Preprocessed File</td>
264        |      <td>STDERR Output</td>
265        |    </tr>
266        |  </thead>
267        |  <tbody>""", indent))
268        for current in crashes:
269            handle.write(reindent("""
270        |    <tr>
271        |      <td>{problem}</td>
272        |      <td>{source}</td>
273        |      <td><a href="{file}">preprocessor output</a></td>
274        |      <td><a href="{stderr}">analyzer std err</a></td>
275        |    </tr>""", indent).format(**current))
276            handle.write(comment('REPORTPROBLEM', current))
277        handle.write(reindent("""
278        |  </tbody>
279        |</table>""", indent))
280        handle.write(comment('REPORTCRASHES'))
281    return name
282
283
284def read_crashes(output_dir):
285    """ Generate a unique sequence of crashes from given output directory. """
286
287    return (parse_crash(filename)
288            for filename in glob.iglob(os.path.join(output_dir, 'failures',
289                                                    '*.info.txt')))
290
291
292def read_bugs(output_dir, html):
293    """ Generate a unique sequence of bugs from given output directory.
294
295    Duplicates can be in a project if the same module was compiled multiple
296    times with different compiler options. These would be better to show in
297    the final report (cover) only once. """
298
299    parser = parse_bug_html if html else parse_bug_plist
300    pattern = '*.html' if html else '*.plist'
301
302    duplicate = duplicate_check(
303        lambda bug: '{bug_line}.{bug_path_length}:{bug_file}'.format(**bug))
304
305    bugs = itertools.chain.from_iterable(
306        # parser creates a bug generator not the bug itself
307        parser(filename)
308        for filename in glob.iglob(os.path.join(output_dir, pattern)))
309
310    return (bug for bug in bugs if not duplicate(bug))
311
312
313def parse_bug_plist(filename):
314    """ Returns the generator of bugs from a single .plist file. """
315
316    content = plistlib.readPlist(filename)
317    files = content.get('files')
318    for bug in content.get('diagnostics', []):
319        if len(files) <= int(bug['location']['file']):
320            logging.warning('Parsing bug from "%s" failed', filename)
321            continue
322
323        yield {
324            'result': filename,
325            'bug_type': bug['type'],
326            'bug_category': bug['category'],
327            'bug_line': int(bug['location']['line']),
328            'bug_path_length': int(bug['location']['col']),
329            'bug_file': files[int(bug['location']['file'])]
330        }
331
332
333def parse_bug_html(filename):
334    """ Parse out the bug information from HTML output. """
335
336    patterns = [re.compile(r'<!-- BUGTYPE (?P<bug_type>.*) -->$'),
337                re.compile(r'<!-- BUGFILE (?P<bug_file>.*) -->$'),
338                re.compile(r'<!-- BUGPATHLENGTH (?P<bug_path_length>.*) -->$'),
339                re.compile(r'<!-- BUGLINE (?P<bug_line>.*) -->$'),
340                re.compile(r'<!-- BUGCATEGORY (?P<bug_category>.*) -->$'),
341                re.compile(r'<!-- BUGDESC (?P<bug_description>.*) -->$'),
342                re.compile(r'<!-- FUNCTIONNAME (?P<bug_function>.*) -->$')]
343    endsign = re.compile(r'<!-- BUGMETAEND -->')
344
345    bug = {
346        'report_file': filename,
347        'bug_function': 'n/a',  # compatibility with < clang-3.5
348        'bug_category': 'Other',
349        'bug_line': 0,
350        'bug_path_length': 1
351    }
352
353    with open(filename) as handler:
354        for line in handler.readlines():
355            # do not read the file further
356            if endsign.match(line):
357                break
358            # search for the right lines
359            for regex in patterns:
360                match = regex.match(line.strip())
361                if match:
362                    bug.update(match.groupdict())
363                    break
364
365    encode_value(bug, 'bug_line', int)
366    encode_value(bug, 'bug_path_length', int)
367
368    yield bug
369
370
371def parse_crash(filename):
372    """ Parse out the crash information from the report file. """
373
374    match = re.match(r'(.*)\.info\.txt', filename)
375    name = match.group(1) if match else None
376    with open(filename) as handler:
377        lines = handler.readlines()
378        return {
379            'source': lines[0].rstrip(),
380            'problem': lines[1].rstrip(),
381            'file': name,
382            'info': name + '.info.txt',
383            'stderr': name + '.stderr.txt'
384        }
385
386
387def category_type_name(bug):
388    """ Create a new bug attribute from bug by category and type.
389
390    The result will be used as CSS class selector in the final report. """
391
392    def smash(key):
393        """ Make value ready to be HTML attribute value. """
394
395        return bug.get(key, '').lower().replace(' ', '_').replace("'", '')
396
397    return escape('bt_' + smash('bug_category') + '_' + smash('bug_type'))
398
399
400def create_counters():
401    """ Create counters for bug statistics.
402
403    Two entries are maintained: 'total' is an integer, represents the
404    number of bugs. The 'categories' is a two level categorisation of bug
405    counters. The first level is 'bug category' the second is 'bug type'.
406    Each entry in this classification is a dictionary of 'count', 'type'
407    and 'label'. """
408
409    def predicate(bug):
410        bug_category = bug['bug_category']
411        bug_type = bug['bug_type']
412        current_category = predicate.categories.get(bug_category, dict())
413        current_type = current_category.get(bug_type, {
414            'bug_type': bug_type,
415            'bug_type_class': category_type_name(bug),
416            'bug_count': 0
417        })
418        current_type.update({'bug_count': current_type['bug_count'] + 1})
419        current_category.update({bug_type: current_type})
420        predicate.categories.update({bug_category: current_category})
421        predicate.total += 1
422
423    predicate.total = 0
424    predicate.categories = dict()
425    return predicate
426
427
428def prettify_bug(prefix, output_dir):
429    def predicate(bug):
430        """ Make safe this values to embed into HTML. """
431
432        bug['bug_type_class'] = category_type_name(bug)
433
434        encode_value(bug, 'bug_file', lambda x: escape(chop(prefix, x)))
435        encode_value(bug, 'bug_category', escape)
436        encode_value(bug, 'bug_type', escape)
437        encode_value(bug, 'report_file', lambda x: escape(chop(output_dir, x)))
438        return bug
439
440    return predicate
441
442
443def prettify_crash(prefix, output_dir):
444    def predicate(crash):
445        """ Make safe this values to embed into HTML. """
446
447        encode_value(crash, 'source', lambda x: escape(chop(prefix, x)))
448        encode_value(crash, 'problem', escape)
449        encode_value(crash, 'file', lambda x: escape(chop(output_dir, x)))
450        encode_value(crash, 'info', lambda x: escape(chop(output_dir, x)))
451        encode_value(crash, 'stderr', lambda x: escape(chop(output_dir, x)))
452        return crash
453
454    return predicate
455
456
457def copy_resource_files(output_dir):
458    """ Copy the javascript and css files to the report directory. """
459
460    this_dir = os.path.dirname(os.path.realpath(__file__))
461    for resource in os.listdir(os.path.join(this_dir, 'resources')):
462        shutil.copy(os.path.join(this_dir, 'resources', resource), output_dir)
463
464
465def encode_value(container, key, encode):
466    """ Run 'encode' on 'container[key]' value and update it. """
467
468    if key in container:
469        value = encode(container[key])
470        container.update({key: value})
471
472
473def chop(prefix, filename):
474    """ Create 'filename' from '/prefix/filename' """
475
476    return filename if not len(prefix) else os.path.relpath(filename, prefix)
477
478
479def escape(text):
480    """ Paranoid HTML escape method. (Python version independent) """
481
482    escape_table = {
483        '&': '&amp;',
484        '"': '&quot;',
485        "'": '&apos;',
486        '>': '&gt;',
487        '<': '&lt;'
488    }
489    return ''.join(escape_table.get(c, c) for c in text)
490
491
492def reindent(text, indent):
493    """ Utility function to format html output and keep indentation. """
494
495    result = ''
496    for line in text.splitlines():
497        if len(line.strip()):
498            result += ' ' * indent + line.split('|')[1] + os.linesep
499    return result
500
501
502def comment(name, opts=dict()):
503    """ Utility function to format meta information as comment. """
504
505    attributes = ''
506    for key, value in opts.items():
507        attributes += ' {0}="{1}"'.format(key, value)
508
509    return '<!-- {0}{1} -->{2}'.format(name, attributes, os.linesep)
510
511
512def commonprefix_from(filename):
513    """ Create file prefix from a compilation database entries. """
514
515    with open(filename, 'r') as handle:
516        return commonprefix(item['file'] for item in json.load(handle))
517
518
519def commonprefix(files):
520    """ Fixed version of os.path.commonprefix. Return the longest path prefix
521    that is a prefix of all paths in filenames. """
522
523    result = None
524    for current in files:
525        if result is not None:
526            result = os.path.commonprefix([result, current])
527        else:
528            result = current
529
530    if result is None:
531        return ''
532    elif not os.path.isdir(result):
533        return os.path.dirname(result)
534    else:
535        return os.path.abspath(result)
536