1#!/usr/bin/env python
2
3"""
4Static Analyzer qualification infrastructure.
5
6The goal is to test the analyzer against different projects,
7check for failures, compare results, and measure performance.
8
9Repository Directory will contain sources of the projects as well as the
10information on how to build them and the expected output.
11Repository Directory structure:
12   - ProjectMap file
13   - Historical Performance Data
14   - Project Dir1
15     - ReferenceOutput
16   - Project Dir2
17     - ReferenceOutput
18   ..
19Note that the build tree must be inside the project dir.
20
21To test the build of the analyzer one would:
22   - Copy over a copy of the Repository Directory. (TODO: Prefer to ensure that
23     the build directory does not pollute the repository to min network
24     traffic).
25   - Build all projects, until error. Produce logs to report errors.
26   - Compare results.
27
28The files which should be kept around for failure investigations:
29   RepositoryCopy/Project DirI/ScanBuildResults
30   RepositoryCopy/Project DirI/run_static_analyzer.log
31
32Assumptions (TODO: shouldn't need to assume these.):
33   The script is being run from the Repository Directory.
34   The compiler for scan-build and scan-build are in the PATH.
35   export PATH=/Users/zaks/workspace/c2llvm/build/Release+Asserts/bin:$PATH
36
37For more logging, set the  env variables:
38   zaks:TI zaks$ export CCC_ANALYZER_LOG=1
39   zaks:TI zaks$ export CCC_ANALYZER_VERBOSE=1
40
41The list of checkers tested are hardcoded in the Checkers variable.
42For testing additional checkers, use the SA_ADDITIONAL_CHECKERS environment
43variable. It should contain a comma separated list.
44"""
45import CmpRuns
46import SATestUtils as utils
47from ProjectMap import DownloadType, ProjectInfo
48
49import glob
50import logging
51import math
52import multiprocessing
53import os
54import plistlib
55import shutil
56import sys
57import threading
58import time
59import zipfile
60
61from queue import Queue
62# mypy has problems finding InvalidFileException in the module
63# and this is we can shush that false positive
64from plistlib import InvalidFileException  # type:ignore
65from subprocess import CalledProcessError, check_call
66from typing import Dict, IO, List, NamedTuple, Optional, TYPE_CHECKING, Tuple
67
68
69###############################################################################
70# Helper functions.
71###############################################################################
72
73class StreamToLogger:
74    def __init__(self, logger: logging.Logger,
75                 log_level: int = logging.INFO):
76        self.logger = logger
77        self.log_level = log_level
78
79    def write(self, message: str):
80        # Rstrip in order not to write an extra newline.
81        self.logger.log(self.log_level, message.rstrip())
82
83    def flush(self):
84        pass
85
86    def fileno(self) -> int:
87        return 0
88
89
90LOCAL = threading.local()
91
92
93def init_logger(name: str):
94    # TODO: use debug levels for VERBOSE messages
95    logger = logging.getLogger(name)
96    logger.setLevel(logging.DEBUG)
97    LOCAL.stdout = StreamToLogger(logger, logging.INFO)
98    LOCAL.stderr = StreamToLogger(logger, logging.ERROR)
99
100
101init_logger("main")
102
103
104def stderr(message: str):
105    LOCAL.stderr.write(message)
106
107
108def stdout(message: str):
109    LOCAL.stdout.write(message)
110
111
112logging.basicConfig(
113    format='%(asctime)s:%(levelname)s:%(name)s: %(message)s')
114
115
116###############################################################################
117# Configuration setup.
118###############################################################################
119
120
121# Find Clang for static analysis.
122if 'CC' in os.environ:
123    cc_candidate: Optional[str] = os.environ['CC']
124else:
125    cc_candidate = utils.which("clang", os.environ['PATH'])
126if not cc_candidate:
127    stderr("Error: cannot find 'clang' in PATH")
128    sys.exit(1)
129
130CLANG = cc_candidate
131
132# Number of jobs.
133MAX_JOBS = int(math.ceil(multiprocessing.cpu_count() * 0.75))
134
135# Names of the project specific scripts.
136# The script that downloads the project.
137DOWNLOAD_SCRIPT = "download_project.sh"
138# The script that needs to be executed before the build can start.
139CLEANUP_SCRIPT = "cleanup_run_static_analyzer.sh"
140# This is a file containing commands for scan-build.
141BUILD_SCRIPT = "run_static_analyzer.cmd"
142
143# A comment in a build script which disables wrapping.
144NO_PREFIX_CMD = "#NOPREFIX"
145
146# The log file name.
147LOG_DIR_NAME = "Logs"
148BUILD_LOG_NAME = "run_static_analyzer.log"
149# Summary file - contains the summary of the failures. Ex: This info can be be
150# displayed when buildbot detects a build failure.
151NUM_OF_FAILURES_IN_SUMMARY = 10
152
153# The scan-build result directory.
154OUTPUT_DIR_NAME = "ScanBuildResults"
155REF_PREFIX = "Ref"
156
157# The name of the directory storing the cached project source. If this
158# directory does not exist, the download script will be executed.
159# That script should create the "CachedSource" directory and download the
160# project source into it.
161CACHED_SOURCE_DIR_NAME = "CachedSource"
162
163# The name of the directory containing the source code that will be analyzed.
164# Each time a project is analyzed, a fresh copy of its CachedSource directory
165# will be copied to the PatchedSource directory and then the local patches
166# in PATCHFILE_NAME will be applied (if PATCHFILE_NAME exists).
167PATCHED_SOURCE_DIR_NAME = "PatchedSource"
168
169# The name of the patchfile specifying any changes that should be applied
170# to the CachedSource before analyzing.
171PATCHFILE_NAME = "changes_for_analyzer.patch"
172
173# The list of checkers used during analyzes.
174# Currently, consists of all the non-experimental checkers, plus a few alpha
175# checkers we don't want to regress on.
176CHECKERS = ",".join([
177    "alpha.unix.SimpleStream",
178    "alpha.security.taint",
179    "cplusplus.NewDeleteLeaks",
180    "core",
181    "cplusplus",
182    "deadcode",
183    "security",
184    "unix",
185    "osx",
186    "nullability"
187])
188
189VERBOSE = 0
190
191
192###############################################################################
193# Test harness logic.
194###############################################################################
195
196
197def run_cleanup_script(directory: str, build_log_file: IO):
198    """
199    Run pre-processing script if any.
200    """
201    cwd = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
202    script_path = os.path.join(directory, CLEANUP_SCRIPT)
203
204    utils.run_script(script_path, build_log_file, cwd,
205                     out=LOCAL.stdout, err=LOCAL.stderr,
206                     verbose=VERBOSE)
207
208
209class TestInfo(NamedTuple):
210    """
211    Information about a project and settings for its analysis.
212    """
213    project: ProjectInfo
214    override_compiler: bool = False
215    extra_analyzer_config: str = ""
216    is_reference_build: bool = False
217    strictness: int = 0
218
219
220# typing package doesn't have a separate type for Queue, but has a generic stub
221# We still want to have a type-safe checked project queue, for this reason,
222# we specify generic type for mypy.
223#
224# It is a common workaround for this situation:
225# https://mypy.readthedocs.io/en/stable/common_issues.html#using-classes-that-are-generic-in-stubs-but-not-at-runtime
226if TYPE_CHECKING:
227    TestQueue = Queue[TestInfo]  # this is only processed by mypy
228else:
229    TestQueue = Queue  # this will be executed at runtime
230
231
232class RegressionTester:
233    """
234    A component aggregating all of the project testing.
235    """
236    def __init__(self, jobs: int, projects: List[ProjectInfo],
237                 override_compiler: bool, extra_analyzer_config: str,
238                 regenerate: bool, strictness: bool):
239        self.jobs = jobs
240        self.projects = projects
241        self.override_compiler = override_compiler
242        self.extra_analyzer_config = extra_analyzer_config
243        self.regenerate = regenerate
244        self.strictness = strictness
245
246    def test_all(self) -> bool:
247        projects_to_test: List[TestInfo] = []
248
249        # Test the projects.
250        for project in self.projects:
251            projects_to_test.append(
252                TestInfo(project,
253                         self.override_compiler,
254                         self.extra_analyzer_config,
255                         self.regenerate, self.strictness))
256        if self.jobs <= 1:
257            return self._single_threaded_test_all(projects_to_test)
258        else:
259            return self._multi_threaded_test_all(projects_to_test)
260
261    def _single_threaded_test_all(self,
262                                  projects_to_test: List[TestInfo]) -> bool:
263        """
264        Run all projects.
265        :return: whether tests have passed.
266        """
267        success = True
268        for project_info in projects_to_test:
269            tester = ProjectTester(project_info)
270            success &= tester.test()
271        return success
272
273    def _multi_threaded_test_all(self,
274                                 projects_to_test: List[TestInfo]) -> bool:
275        """
276        Run each project in a separate thread.
277
278        This is OK despite GIL, as testing is blocked
279        on launching external processes.
280
281        :return: whether tests have passed.
282        """
283        tasks_queue = TestQueue()
284
285        for project_info in projects_to_test:
286            tasks_queue.put(project_info)
287
288        results_differ = threading.Event()
289        failure_flag = threading.Event()
290
291        for _ in range(self.jobs):
292            T = TestProjectThread(tasks_queue, results_differ, failure_flag)
293            T.start()
294
295        # Required to handle Ctrl-C gracefully.
296        while tasks_queue.unfinished_tasks:
297            time.sleep(0.1)  # Seconds.
298            if failure_flag.is_set():
299                stderr("Test runner crashed\n")
300                sys.exit(1)
301        return not results_differ.is_set()
302
303
304class ProjectTester:
305    """
306    A component aggregating testing for one project.
307    """
308    def __init__(self, test_info: TestInfo, silent: bool = False):
309        self.project = test_info.project
310        self.override_compiler = test_info.override_compiler
311        self.extra_analyzer_config = test_info.extra_analyzer_config
312        self.is_reference_build = test_info.is_reference_build
313        self.strictness = test_info.strictness
314        self.silent = silent
315
316    def test(self) -> bool:
317        """
318        Test a given project.
319        :return tests_passed: Whether tests have passed according
320        to the :param strictness: criteria.
321        """
322        if not self.project.enabled:
323            self.out(
324                f" \n\n--- Skipping disabled project {self.project.name}\n")
325            return True
326
327        self.out(f" \n\n--- Building project {self.project.name}\n")
328
329        start_time = time.time()
330
331        project_dir = self.get_project_dir()
332        self.vout(f"  Build directory: {project_dir}.\n")
333
334        # Set the build results directory.
335        output_dir = self.get_output_dir()
336
337        self.build(project_dir, output_dir)
338        check_build(output_dir)
339
340        if self.is_reference_build:
341            cleanup_reference_results(output_dir)
342            passed = True
343        else:
344            passed = run_cmp_results(project_dir, self.strictness)
345
346        self.out(f"Completed tests for project {self.project.name} "
347                 f"(time: {time.time() - start_time:.2f}).\n")
348
349        return passed
350
351    def get_project_dir(self) -> str:
352        return os.path.join(os.path.abspath(os.curdir), self.project.name)
353
354    def get_output_dir(self) -> str:
355        if self.is_reference_build:
356            dirname = REF_PREFIX + OUTPUT_DIR_NAME
357        else:
358            dirname = OUTPUT_DIR_NAME
359
360        return os.path.join(self.get_project_dir(), dirname)
361
362    def build(self, directory: str, output_dir: str) -> Tuple[float, int]:
363        build_log_path = get_build_log_path(output_dir)
364
365        self.out(f"Log file: {build_log_path}\n")
366        self.out(f"Output directory: {output_dir}\n")
367
368        remove_log_file(output_dir)
369
370        # Clean up scan build results.
371        if os.path.exists(output_dir):
372            self.vout(f"  Removing old results: {output_dir}\n")
373
374            shutil.rmtree(output_dir)
375
376        assert(not os.path.exists(output_dir))
377        os.makedirs(os.path.join(output_dir, LOG_DIR_NAME))
378
379        # Build and analyze the project.
380        with open(build_log_path, "w+") as build_log_file:
381            if self.project.mode == 1:
382                self._download_and_patch(directory, build_log_file)
383                run_cleanup_script(directory, build_log_file)
384                build_time, memory = self.scan_build(directory, output_dir,
385                                                     build_log_file)
386            else:
387                build_time, memory = self.analyze_preprocessed(directory,
388                                                               output_dir)
389
390            if self.is_reference_build:
391                run_cleanup_script(directory, build_log_file)
392                normalize_reference_results(directory, output_dir,
393                                            self.project.mode)
394
395        self.out(f"Build complete (time: {utils.time_to_str(build_time)}, "
396                 f"peak memory: {utils.memory_to_str(memory)}). "
397                 f"See the log for more details: {build_log_path}\n")
398
399        return build_time, memory
400
401    def scan_build(self, directory: str, output_dir: str,
402                   build_log_file: IO) -> Tuple[float, int]:
403        """
404        Build the project with scan-build by reading in the commands and
405        prefixing them with the scan-build options.
406        """
407        build_script_path = os.path.join(directory, BUILD_SCRIPT)
408        if not os.path.exists(build_script_path):
409            stderr(f"Error: build script is not defined: "
410                   f"{build_script_path}\n")
411            sys.exit(1)
412
413        all_checkers = CHECKERS
414        if 'SA_ADDITIONAL_CHECKERS' in os.environ:
415            all_checkers = (all_checkers + ',' +
416                            os.environ['SA_ADDITIONAL_CHECKERS'])
417
418        # Run scan-build from within the patched source directory.
419        cwd = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
420
421        options = f"--use-analyzer '{CLANG}' "
422        options += f"-plist-html -o '{output_dir}' "
423        options += f"-enable-checker {all_checkers} "
424        options += "--keep-empty "
425        options += f"-analyzer-config '{self.generate_config()}' "
426
427        if self.override_compiler:
428            options += "--override-compiler "
429
430        extra_env: Dict[str, str] = {}
431
432        execution_time = 0.0
433        peak_memory = 0
434
435        try:
436            command_file = open(build_script_path, "r")
437            command_prefix = "scan-build " + options + " "
438
439            for command in command_file:
440                command = command.strip()
441
442                if len(command) == 0:
443                    continue
444
445                # Custom analyzer invocation specified by project.
446                # Communicate required information using environment variables
447                # instead.
448                if command == NO_PREFIX_CMD:
449                    command_prefix = ""
450                    extra_env['OUTPUT'] = output_dir
451                    extra_env['CC'] = CLANG
452                    extra_env['ANALYZER_CONFIG'] = self.generate_config()
453                    continue
454
455                if command.startswith("#"):
456                    continue
457
458                # If using 'make', auto imply a -jX argument
459                # to speed up analysis.  xcodebuild will
460                # automatically use the maximum number of cores.
461                if (command.startswith("make ") or command == "make") and \
462                        "-j" not in command:
463                    command += f" -j{MAX_JOBS}"
464
465                command_to_run = command_prefix + command
466
467                self.vout(f"  Executing: {command_to_run}\n")
468
469                time, mem = utils.check_and_measure_call(
470                    command_to_run, cwd=cwd,
471                    stderr=build_log_file,
472                    stdout=build_log_file,
473                    env=dict(os.environ, **extra_env),
474                    shell=True)
475
476                execution_time += time
477                peak_memory = max(peak_memory, mem)
478
479        except CalledProcessError:
480            stderr("Error: scan-build failed. Its output was: \n")
481            build_log_file.seek(0)
482            shutil.copyfileobj(build_log_file, LOCAL.stderr)
483            sys.exit(1)
484
485        return execution_time, peak_memory
486
487    def analyze_preprocessed(self, directory: str,
488                             output_dir: str) -> Tuple[float, int]:
489        """
490        Run analysis on a set of preprocessed files.
491        """
492        if os.path.exists(os.path.join(directory, BUILD_SCRIPT)):
493            stderr(f"Error: The preprocessed files project "
494                   f"should not contain {BUILD_SCRIPT}\n")
495            raise Exception()
496
497        prefix = CLANG + " --analyze "
498
499        prefix += "--analyzer-output plist "
500        prefix += " -Xclang -analyzer-checker=" + CHECKERS
501        prefix += " -fcxx-exceptions -fblocks "
502        prefix += " -Xclang -analyzer-config "
503        prefix += f"-Xclang {self.generate_config()} "
504
505        if self.project.mode == 2:
506            prefix += "-std=c++11 "
507
508        plist_path = os.path.join(directory, output_dir, "date")
509        fail_path = os.path.join(plist_path, "failures")
510        os.makedirs(fail_path)
511
512        execution_time = 0.0
513        peak_memory = 0
514
515        for full_file_name in glob.glob(directory + "/*"):
516            file_name = os.path.basename(full_file_name)
517            failed = False
518
519            # Only run the analyzes on supported files.
520            if utils.has_no_extension(file_name):
521                continue
522            if not utils.is_valid_single_input_file(file_name):
523                stderr(f"Error: Invalid single input file {full_file_name}.\n")
524                raise Exception()
525
526            # Build and call the analyzer command.
527            plist_basename = os.path.join(plist_path, file_name)
528            output_option = f"-o '{plist_basename}.plist' "
529            command = f"{prefix}{output_option}'{file_name}'"
530
531            log_path = os.path.join(fail_path, file_name + ".stderr.txt")
532            with open(log_path, "w+") as log_file:
533                try:
534                    self.vout(f"  Executing: {command}\n")
535
536                    time, mem = utils.check_and_measure_call(
537                        command, cwd=directory, stderr=log_file,
538                        stdout=log_file, shell=True)
539
540                    execution_time += time
541                    peak_memory = max(peak_memory, mem)
542
543                except CalledProcessError as e:
544                    stderr(f"Error: Analyzes of {full_file_name} failed. "
545                           f"See {log_file.name} for details. "
546                           f"Error code {e.returncode}.\n")
547                    failed = True
548
549                # If command did not fail, erase the log file.
550                if not failed:
551                    os.remove(log_file.name)
552
553        return execution_time, peak_memory
554
555    def generate_config(self) -> str:
556        out = "serialize-stats=true,stable-report-filename=true"
557
558        if self.extra_analyzer_config:
559            out += "," + self.extra_analyzer_config
560
561        return out
562
563    def _download_and_patch(self, directory: str, build_log_file: IO):
564        """
565        Download the project and apply the local patchfile if it exists.
566        """
567        cached_source = os.path.join(directory, CACHED_SOURCE_DIR_NAME)
568
569        # If the we don't already have the cached source, run the project's
570        # download script to download it.
571        if not os.path.exists(cached_source):
572            self._download(directory, build_log_file)
573            if not os.path.exists(cached_source):
574                stderr(f"Error: '{cached_source}' not found after download.\n")
575                exit(1)
576
577        patched_source = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
578
579        # Remove potentially stale patched source.
580        if os.path.exists(patched_source):
581            shutil.rmtree(patched_source)
582
583        # Copy the cached source and apply any patches to the copy.
584        shutil.copytree(cached_source, patched_source, symlinks=True)
585        self._apply_patch(directory, build_log_file)
586
587    def _download(self, directory: str, build_log_file: IO):
588        """
589        Run the script to download the project, if it exists.
590        """
591        if self.project.source == DownloadType.GIT:
592            self._download_from_git(directory, build_log_file)
593        elif self.project.source == DownloadType.ZIP:
594            self._unpack_zip(directory, build_log_file)
595        elif self.project.source == DownloadType.SCRIPT:
596            self._run_download_script(directory, build_log_file)
597        else:
598            raise ValueError(
599                f"Unknown source type '{self.project.source}' is found "
600                f"for the '{self.project.name}' project")
601
602    def _download_from_git(self, directory: str, build_log_file: IO):
603        repo = self.project.origin
604        cached_source = os.path.join(directory, CACHED_SOURCE_DIR_NAME)
605
606        check_call(f"git clone --recursive {repo} {cached_source}",
607                   cwd=directory, stderr=build_log_file,
608                   stdout=build_log_file, shell=True)
609        check_call(f"git checkout --quiet {self.project.commit}",
610                   cwd=cached_source, stderr=build_log_file,
611                   stdout=build_log_file, shell=True)
612
613    def _unpack_zip(self, directory: str, build_log_file: IO):
614        zip_files = list(glob.glob(directory + "/*.zip"))
615
616        if len(zip_files) == 0:
617            raise ValueError(
618                f"Couldn't find any zip files to unpack for the "
619                f"'{self.project.name}' project")
620
621        if len(zip_files) > 1:
622            raise ValueError(
623                f"Couldn't decide which of the zip files ({zip_files}) "
624                f"for the '{self.project.name}' project to unpack")
625
626        with zipfile.ZipFile(zip_files[0], "r") as zip_file:
627            zip_file.extractall(os.path.join(directory,
628                                             CACHED_SOURCE_DIR_NAME))
629
630    @staticmethod
631    def _run_download_script(directory: str, build_log_file: IO):
632        script_path = os.path.join(directory, DOWNLOAD_SCRIPT)
633        utils.run_script(script_path, build_log_file, directory,
634                         out=LOCAL.stdout, err=LOCAL.stderr,
635                         verbose=VERBOSE)
636
637    def _apply_patch(self, directory: str, build_log_file: IO):
638        patchfile_path = os.path.join(directory, PATCHFILE_NAME)
639        patched_source = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
640
641        if not os.path.exists(patchfile_path):
642            self.out("  No local patches.\n")
643            return
644
645        self.out("  Applying patch.\n")
646        try:
647            check_call(f"patch -p1 < '{patchfile_path}'",
648                       cwd=patched_source,
649                       stderr=build_log_file,
650                       stdout=build_log_file,
651                       shell=True)
652
653        except CalledProcessError:
654            stderr(f"Error: Patch failed. "
655                   f"See {build_log_file.name} for details.\n")
656            sys.exit(1)
657
658    def out(self, what: str):
659        if not self.silent:
660            stdout(what)
661
662    def vout(self, what: str):
663        if VERBOSE >= 1:
664            self.out(what)
665
666
667class TestProjectThread(threading.Thread):
668    def __init__(self, tasks_queue: TestQueue,
669                 results_differ: threading.Event,
670                 failure_flag: threading.Event):
671        """
672        :param results_differ: Used to signify that results differ from
673               the canonical ones.
674        :param failure_flag: Used to signify a failure during the run.
675        """
676        self.tasks_queue = tasks_queue
677        self.results_differ = results_differ
678        self.failure_flag = failure_flag
679        super().__init__()
680
681        # Needed to gracefully handle interrupts with Ctrl-C
682        self.daemon = True
683
684    def run(self):
685        while not self.tasks_queue.empty():
686            try:
687                test_info = self.tasks_queue.get()
688                init_logger(test_info.project.name)
689
690                tester = ProjectTester(test_info)
691                if not tester.test():
692                    self.results_differ.set()
693
694                self.tasks_queue.task_done()
695
696            except BaseException:
697                self.failure_flag.set()
698                raise
699
700
701###############################################################################
702# Utility functions.
703###############################################################################
704
705
706def check_build(output_dir: str):
707    """
708    Given the scan-build output directory, checks if the build failed
709    (by searching for the failures directories). If there are failures, it
710    creates a summary file in the output directory.
711
712    """
713    # Check if there are failures.
714    failures = glob.glob(output_dir + "/*/failures/*.stderr.txt")
715    total_failed = len(failures)
716
717    if total_failed == 0:
718        clean_up_empty_plists(output_dir)
719        clean_up_empty_folders(output_dir)
720
721        plists = glob.glob(output_dir + "/*/*.plist")
722        stdout(f"Number of bug reports "
723               f"(non-empty plist files) produced: {len(plists)}\n")
724        return
725
726    stderr("Error: analysis failed.\n")
727    stderr(f"Total of {total_failed} failures discovered.\n")
728
729    if total_failed > NUM_OF_FAILURES_IN_SUMMARY:
730        stderr(f"See the first {NUM_OF_FAILURES_IN_SUMMARY} below.\n")
731
732    for index, failed_log_path in enumerate(failures, start=1):
733        if index >= NUM_OF_FAILURES_IN_SUMMARY:
734            break
735
736        stderr(f"\n-- Error #{index} -----------\n")
737
738        with open(failed_log_path, "r") as failed_log:
739            shutil.copyfileobj(failed_log, LOCAL.stdout)
740
741    if total_failed > NUM_OF_FAILURES_IN_SUMMARY:
742        stderr("See the results folder for more.")
743
744    sys.exit(1)
745
746
747def cleanup_reference_results(output_dir: str):
748    """
749    Delete html, css, and js files from reference results. These can
750    include multiple copies of the benchmark source and so get very large.
751    """
752    extensions = ["html", "css", "js"]
753
754    for extension in extensions:
755        for file_to_rm in glob.glob(f"{output_dir}/*/*.{extension}"):
756            file_to_rm = os.path.join(output_dir, file_to_rm)
757            os.remove(file_to_rm)
758
759    # Remove the log file. It leaks absolute path names.
760    remove_log_file(output_dir)
761
762
763def run_cmp_results(directory: str, strictness: int = 0) -> bool:
764    """
765    Compare the warnings produced by scan-build.
766    strictness defines the success criteria for the test:
767      0 - success if there are no crashes or analyzer failure.
768      1 - success if there are no difference in the number of reported bugs.
769      2 - success if all the bug reports are identical.
770
771    :return success: Whether tests pass according to the strictness
772    criteria.
773    """
774    tests_passed = True
775    start_time = time.time()
776
777    ref_dir = os.path.join(directory, REF_PREFIX + OUTPUT_DIR_NAME)
778    new_dir = os.path.join(directory, OUTPUT_DIR_NAME)
779
780    # We have to go one level down the directory tree.
781    ref_list = glob.glob(ref_dir + "/*")
782    new_list = glob.glob(new_dir + "/*")
783
784    # Log folders are also located in the results dir, so ignore them.
785    ref_log_dir = os.path.join(ref_dir, LOG_DIR_NAME)
786    if ref_log_dir in ref_list:
787        ref_list.remove(ref_log_dir)
788    new_list.remove(os.path.join(new_dir, LOG_DIR_NAME))
789
790    if len(ref_list) != len(new_list):
791        stderr(f"Mismatch in number of results folders: "
792               f"{ref_list} vs {new_list}")
793        sys.exit(1)
794
795    # There might be more then one folder underneath - one per each scan-build
796    # command (Ex: one for configure and one for make).
797    if len(ref_list) > 1:
798        # Assume that the corresponding folders have the same names.
799        ref_list.sort()
800        new_list.sort()
801
802    # Iterate and find the differences.
803    num_diffs = 0
804    for ref_dir, new_dir in zip(ref_list, new_list):
805        assert(ref_dir != new_dir)
806
807        if VERBOSE >= 1:
808            stdout(f"  Comparing Results: {ref_dir} {new_dir}\n")
809
810        patched_source = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
811
812        ref_results = CmpRuns.ResultsDirectory(ref_dir)
813        new_results = CmpRuns.ResultsDirectory(new_dir, patched_source)
814
815        # Scan the results, delete empty plist files.
816        num_diffs, reports_in_ref, reports_in_new = \
817            CmpRuns.dump_scan_build_results_diff(ref_results, new_results,
818                                                 delete_empty=False,
819                                                 out=LOCAL.stdout)
820
821        if num_diffs > 0:
822            stdout(f"Warning: {num_diffs} differences in diagnostics.\n")
823
824        if strictness >= 2 and num_diffs > 0:
825            stdout("Error: Diffs found in strict mode (2).\n")
826            tests_passed = False
827
828        elif strictness >= 1 and reports_in_ref != reports_in_new:
829            stdout("Error: The number of results are different "
830                   " strict mode (1).\n")
831            tests_passed = False
832
833    stdout(f"Diagnostic comparison complete "
834           f"(time: {time.time() - start_time:.2f}).\n")
835
836    return tests_passed
837
838
839def normalize_reference_results(directory: str, output_dir: str,
840                                build_mode: int):
841    """
842    Make the absolute paths relative in the reference results.
843    """
844    for dir_path, _, filenames in os.walk(output_dir):
845        for filename in filenames:
846            if not filename.endswith('plist'):
847                continue
848
849            plist = os.path.join(dir_path, filename)
850            data = plistlib.readPlist(plist)
851            path_prefix = directory
852
853            if build_mode == 1:
854                path_prefix = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
855
856            paths = [source[len(path_prefix) + 1:]
857                     if source.startswith(path_prefix) else source
858                     for source in data['files']]
859            data['files'] = paths
860
861            # Remove transient fields which change from run to run.
862            for diagnostic in data['diagnostics']:
863                if 'HTMLDiagnostics_files' in diagnostic:
864                    diagnostic.pop('HTMLDiagnostics_files')
865
866            if 'clang_version' in data:
867                data.pop('clang_version')
868
869            plistlib.writePlist(data, plist)
870
871
872def get_build_log_path(output_dir: str) -> str:
873    return os.path.join(output_dir, LOG_DIR_NAME, BUILD_LOG_NAME)
874
875
876def remove_log_file(output_dir: str):
877    build_log_path = get_build_log_path(output_dir)
878
879    # Clean up the log file.
880    if os.path.exists(build_log_path):
881        if VERBOSE >= 1:
882            stdout(f"  Removing log file: {build_log_path}\n")
883
884        os.remove(build_log_path)
885
886
887def clean_up_empty_plists(output_dir: str):
888    """
889    A plist file is created for each call to the analyzer(each source file).
890    We are only interested on the once that have bug reports,
891    so delete the rest.
892    """
893    for plist in glob.glob(output_dir + "/*/*.plist"):
894        plist = os.path.join(output_dir, plist)
895
896        try:
897            with open(plist, "rb") as plist_file:
898                data = plistlib.load(plist_file)
899            # Delete empty reports.
900            if not data['files']:
901                os.remove(plist)
902                continue
903
904        except InvalidFileException as e:
905            stderr(f"Error parsing plist file {plist}: {str(e)}")
906            continue
907
908
909def clean_up_empty_folders(output_dir: str):
910    """
911    Remove empty folders from results, as git would not store them.
912    """
913    subdirs = glob.glob(output_dir + "/*")
914    for subdir in subdirs:
915        if not os.listdir(subdir):
916            os.removedirs(subdir)
917
918
919if __name__ == "__main__":
920    print("SATestBuild.py should not be used on its own.")
921    print("Please use 'SATest.py build' instead")
922    sys.exit(1)
923