1#!/usr/bin/env python
2
3"""
4Static Analyzer qualification infrastructure.
5
6The goal is to test the analyzer against different projects, check for failures,
7compare results, and measure performance.
8
9Repository Directory will contain sources of the projects as well as the
10information on how to build them and the expected output.
11Repository Directory structure:
12   - ProjectMap file
13   - Historical Performance Data
14   - Project Dir1
15     - ReferenceOutput
16   - Project Dir2
17     - ReferenceOutput
18   ..
19
20To test the build of the analyzer one would:
21   - Copy over a copy of the Repository Directory. (TODO: Prefer to ensure that
22     the build directory does not pollute the repository to min network traffic).
23   - Build all projects, until error. Produce logs to report errors.
24   - Compare results.
25
26The files which should be kept around for failure investigations:
27   RepositoryCopy/Project DirI/ScanBuildResults
28   RepositoryCopy/Project DirI/run_static_analyzer.log
29
30Assumptions (TODO: shouldn't need to assume these.):
31   The script is being run from the Repository Directory.
32   The compiler for scan-build and scan-build are in the PATH.
33   export PATH=/Users/zaks/workspace/c2llvm/build/Release+Asserts/bin:$PATH
34
35For more logging, set the  env variables:
36   zaks:TI zaks$ export CCC_ANALYZER_LOG=1
37   zaks:TI zaks$ export CCC_ANALYZER_VERBOSE=1
38"""
39import CmpRuns
40
41import os
42import csv
43import sys
44import glob
45import math
46import shutil
47import time
48import plistlib
49from subprocess import check_call, CalledProcessError
50
51#------------------------------------------------------------------------------
52# Helper functions.
53#------------------------------------------------------------------------------
54
55def detectCPUs():
56    """
57    Detects the number of CPUs on a system. Cribbed from pp.
58    """
59    # Linux, Unix and MacOS:
60    if hasattr(os, "sysconf"):
61        if os.sysconf_names.has_key("SC_NPROCESSORS_ONLN"):
62            # Linux & Unix:
63            ncpus = os.sysconf("SC_NPROCESSORS_ONLN")
64            if isinstance(ncpus, int) and ncpus > 0:
65                return ncpus
66        else: # OSX:
67            return int(capture(['sysctl', '-n', 'hw.ncpu']))
68    # Windows:
69    if os.environ.has_key("NUMBER_OF_PROCESSORS"):
70        ncpus = int(os.environ["NUMBER_OF_PROCESSORS"])
71        if ncpus > 0:
72            return ncpus
73    return 1 # Default
74
75def which(command, paths = None):
76   """which(command, [paths]) - Look up the given command in the paths string
77   (or the PATH environment variable, if unspecified)."""
78
79   if paths is None:
80       paths = os.environ.get('PATH','')
81
82   # Check for absolute match first.
83   if os.path.exists(command):
84       return command
85
86   # Would be nice if Python had a lib function for this.
87   if not paths:
88       paths = os.defpath
89
90   # Get suffixes to search.
91   # On Cygwin, 'PATHEXT' may exist but it should not be used.
92   if os.pathsep == ';':
93       pathext = os.environ.get('PATHEXT', '').split(';')
94   else:
95       pathext = ['']
96
97   # Search the paths...
98   for path in paths.split(os.pathsep):
99       for ext in pathext:
100           p = os.path.join(path, command + ext)
101           if os.path.exists(p):
102               return p
103
104   return None
105
106# Make sure we flush the output after every print statement.
107class flushfile(object):
108    def __init__(self, f):
109        self.f = f
110    def write(self, x):
111        self.f.write(x)
112        self.f.flush()
113
114sys.stdout = flushfile(sys.stdout)
115
116def getProjectMapPath():
117    ProjectMapPath = os.path.join(os.path.abspath(os.curdir),
118                                  ProjectMapFile)
119    if not os.path.exists(ProjectMapPath):
120        print "Error: Cannot find the Project Map file " + ProjectMapPath +\
121                "\nRunning script for the wrong directory?"
122        sys.exit(-1)
123    return ProjectMapPath
124
125def getProjectDir(ID):
126    return os.path.join(os.path.abspath(os.curdir), ID)
127
128def getSBOutputDirName(IsReferenceBuild) :
129    if IsReferenceBuild == True :
130        return SBOutputDirReferencePrefix + SBOutputDirName
131    else :
132        return SBOutputDirName
133
134#------------------------------------------------------------------------------
135# Configuration setup.
136#------------------------------------------------------------------------------
137
138# Find Clang for static analysis.
139Clang = which("clang", os.environ['PATH'])
140if not Clang:
141    print "Error: cannot find 'clang' in PATH"
142    sys.exit(-1)
143
144# Number of jobs.
145Jobs = int(math.ceil(detectCPUs() * 0.75))
146
147# Project map stores info about all the "registered" projects.
148ProjectMapFile = "projectMap.csv"
149
150# Names of the project specific scripts.
151# The script that needs to be executed before the build can start.
152CleanupScript = "cleanup_run_static_analyzer.sh"
153# This is a file containing commands for scan-build.
154BuildScript = "run_static_analyzer.cmd"
155
156# The log file name.
157LogFolderName = "Logs"
158BuildLogName = "run_static_analyzer.log"
159# Summary file - contains the summary of the failures. Ex: This info can be be
160# displayed when buildbot detects a build failure.
161NumOfFailuresInSummary = 10
162FailuresSummaryFileName = "failures.txt"
163# Summary of the result diffs.
164DiffsSummaryFileName = "diffs.txt"
165
166# The scan-build result directory.
167SBOutputDirName = "ScanBuildResults"
168SBOutputDirReferencePrefix = "Ref"
169
170# The list of checkers used during analyzes.
171# Currently, consists of all the non-experimental checkers, plus a few alpha
172# checkers we don't want to regress on.
173Checkers="alpha.unix.SimpleStream,alpha.security.taint,cplusplus.NewDeleteLeaks,core,cplusplus,deadcode,security,unix,osx"
174
175Verbose = 1
176
177#------------------------------------------------------------------------------
178# Test harness logic.
179#------------------------------------------------------------------------------
180
181# Run pre-processing script if any.
182def runCleanupScript(Dir, PBuildLogFile):
183    ScriptPath = os.path.join(Dir, CleanupScript)
184    if os.path.exists(ScriptPath):
185        try:
186            if Verbose == 1:
187                print "  Executing: %s" % (ScriptPath,)
188            check_call("chmod +x %s" % ScriptPath, cwd = Dir,
189                                              stderr=PBuildLogFile,
190                                              stdout=PBuildLogFile,
191                                              shell=True)
192            check_call(ScriptPath, cwd = Dir, stderr=PBuildLogFile,
193                                              stdout=PBuildLogFile,
194                                              shell=True)
195        except:
196            print "Error: The pre-processing step failed. See ", \
197                  PBuildLogFile.name, " for details."
198            sys.exit(-1)
199
200# Build the project with scan-build by reading in the commands and
201# prefixing them with the scan-build options.
202def runScanBuild(Dir, SBOutputDir, PBuildLogFile):
203    BuildScriptPath = os.path.join(Dir, BuildScript)
204    if not os.path.exists(BuildScriptPath):
205        print "Error: build script is not defined: %s" % BuildScriptPath
206        sys.exit(-1)
207    SBOptions = "--use-analyzer " + Clang + " "
208    SBOptions += "-plist-html -o " + SBOutputDir + " "
209    SBOptions += "-enable-checker " + Checkers + " "
210    SBOptions += "--keep-empty "
211    # Always use ccc-analyze to ensure that we can locate the failures
212    # directory.
213    SBOptions += "--override-compiler "
214    try:
215        SBCommandFile = open(BuildScriptPath, "r")
216        SBPrefix = "scan-build " + SBOptions + " "
217        for Command in SBCommandFile:
218            Command = Command.strip()
219            # If using 'make', auto imply a -jX argument
220            # to speed up analysis.  xcodebuild will
221            # automatically use the maximum number of cores.
222            if (Command.startswith("make ") or Command == "make") and \
223                "-j" not in Command:
224                Command += " -j%d" % Jobs
225            SBCommand = SBPrefix + Command
226            if Verbose == 1:
227                print "  Executing: %s" % (SBCommand,)
228            check_call(SBCommand, cwd = Dir, stderr=PBuildLogFile,
229                                             stdout=PBuildLogFile,
230                                             shell=True)
231    except:
232        print "Error: scan-build failed. See ",PBuildLogFile.name,\
233              " for details."
234        raise
235
236def hasNoExtension(FileName):
237    (Root, Ext) = os.path.splitext(FileName)
238    if ((Ext == "")) :
239        return True
240    return False
241
242def isValidSingleInputFile(FileName):
243    (Root, Ext) = os.path.splitext(FileName)
244    if ((Ext == ".i") | (Ext == ".ii") |
245        (Ext == ".c") | (Ext == ".cpp") |
246        (Ext == ".m") | (Ext == "")) :
247        return True
248    return False
249
250# Run analysis on a set of preprocessed files.
251def runAnalyzePreprocessed(Dir, SBOutputDir, Mode):
252    if os.path.exists(os.path.join(Dir, BuildScript)):
253        print "Error: The preprocessed files project should not contain %s" % \
254               BuildScript
255        raise Exception()
256
257    CmdPrefix = Clang + " -cc1 -analyze -analyzer-output=plist -w "
258    CmdPrefix += "-analyzer-checker=" + Checkers +" -fcxx-exceptions -fblocks "
259
260    if (Mode == 2) :
261        CmdPrefix += "-std=c++11 "
262
263    PlistPath = os.path.join(Dir, SBOutputDir, "date")
264    FailPath = os.path.join(PlistPath, "failures");
265    os.makedirs(FailPath);
266
267    for FullFileName in glob.glob(Dir + "/*"):
268        FileName = os.path.basename(FullFileName)
269        Failed = False
270
271        # Only run the analyzes on supported files.
272        if (hasNoExtension(FileName)):
273            continue
274        if (isValidSingleInputFile(FileName) == False):
275            print "Error: Invalid single input file %s." % (FullFileName,)
276            raise Exception()
277
278        # Build and call the analyzer command.
279        OutputOption = "-o " + os.path.join(PlistPath, FileName) + ".plist "
280        Command = CmdPrefix + OutputOption + os.path.join(Dir, FileName)
281        LogFile = open(os.path.join(FailPath, FileName + ".stderr.txt"), "w+b")
282        try:
283            if Verbose == 1:
284                print "  Executing: %s" % (Command,)
285            check_call(Command, cwd = Dir, stderr=LogFile,
286                                           stdout=LogFile,
287                                           shell=True)
288        except CalledProcessError, e:
289            print "Error: Analyzes of %s failed. See %s for details." \
290                  "Error code %d." % \
291                   (FullFileName, LogFile.name, e.returncode)
292            Failed = True
293        finally:
294            LogFile.close()
295
296        # If command did not fail, erase the log file.
297        if Failed == False:
298            os.remove(LogFile.name);
299
300def buildProject(Dir, SBOutputDir, ProjectBuildMode, IsReferenceBuild):
301    TBegin = time.time()
302
303    BuildLogPath = os.path.join(SBOutputDir, LogFolderName, BuildLogName)
304    print "Log file: %s" % (BuildLogPath,)
305    print "Output directory: %s" %(SBOutputDir, )
306
307    # Clean up the log file.
308    if (os.path.exists(BuildLogPath)) :
309        RmCommand = "rm " + BuildLogPath
310        if Verbose == 1:
311            print "  Executing: %s" % (RmCommand,)
312        check_call(RmCommand, shell=True)
313
314    # Clean up scan build results.
315    if (os.path.exists(SBOutputDir)) :
316        RmCommand = "rm -r " + SBOutputDir
317        if Verbose == 1:
318            print "  Executing: %s" % (RmCommand,)
319            check_call(RmCommand, shell=True)
320    assert(not os.path.exists(SBOutputDir))
321    os.makedirs(os.path.join(SBOutputDir, LogFolderName))
322
323    # Open the log file.
324    PBuildLogFile = open(BuildLogPath, "wb+")
325
326    # Build and analyze the project.
327    try:
328        runCleanupScript(Dir, PBuildLogFile)
329
330        if (ProjectBuildMode == 1):
331            runScanBuild(Dir, SBOutputDir, PBuildLogFile)
332        else:
333            runAnalyzePreprocessed(Dir, SBOutputDir, ProjectBuildMode)
334
335        if IsReferenceBuild :
336            runCleanupScript(Dir, PBuildLogFile)
337
338    finally:
339        PBuildLogFile.close()
340
341    print "Build complete (time: %.2f). See the log for more details: %s" % \
342           ((time.time()-TBegin), BuildLogPath)
343
344# A plist file is created for each call to the analyzer(each source file).
345# We are only interested on the once that have bug reports, so delete the rest.
346def CleanUpEmptyPlists(SBOutputDir):
347    for F in glob.glob(SBOutputDir + "/*/*.plist"):
348        P = os.path.join(SBOutputDir, F)
349
350        Data = plistlib.readPlist(P)
351        # Delete empty reports.
352        if not Data['files']:
353            os.remove(P)
354            continue
355
356# Given the scan-build output directory, checks if the build failed
357# (by searching for the failures directories). If there are failures, it
358# creates a summary file in the output directory.
359def checkBuild(SBOutputDir):
360    # Check if there are failures.
361    Failures = glob.glob(SBOutputDir + "/*/failures/*.stderr.txt")
362    TotalFailed = len(Failures);
363    if TotalFailed == 0:
364        CleanUpEmptyPlists(SBOutputDir)
365        Plists = glob.glob(SBOutputDir + "/*/*.plist")
366        print "Number of bug reports (non-empty plist files) produced: %d" %\
367           len(Plists)
368        return;
369
370    # Create summary file to display when the build fails.
371    SummaryPath = os.path.join(SBOutputDir, LogFolderName, FailuresSummaryFileName)
372    if (Verbose > 0):
373        print "  Creating the failures summary file %s" % (SummaryPath,)
374
375    SummaryLog = open(SummaryPath, "w+")
376    try:
377        SummaryLog.write("Total of %d failures discovered.\n" % (TotalFailed,))
378        if TotalFailed > NumOfFailuresInSummary:
379            SummaryLog.write("See the first %d below.\n"
380                                                   % (NumOfFailuresInSummary,))
381        # TODO: Add a line "See the results folder for more."
382
383        FailuresCopied = NumOfFailuresInSummary
384        Idx = 0
385        for FailLogPathI in Failures:
386            if Idx >= NumOfFailuresInSummary:
387                break;
388            Idx += 1
389            SummaryLog.write("\n-- Error #%d -----------\n" % (Idx,));
390            FailLogI = open(FailLogPathI, "r");
391            try:
392                shutil.copyfileobj(FailLogI, SummaryLog);
393            finally:
394                FailLogI.close()
395    finally:
396        SummaryLog.close()
397
398    print "Error: analysis failed. See ", SummaryPath
399    sys.exit(-1)
400
401# Auxiliary object to discard stdout.
402class Discarder(object):
403    def write(self, text):
404        pass # do nothing
405
406# Compare the warnings produced by scan-build.
407def runCmpResults(Dir):
408    TBegin = time.time()
409
410    RefDir = os.path.join(Dir, SBOutputDirReferencePrefix + SBOutputDirName)
411    NewDir = os.path.join(Dir, SBOutputDirName)
412
413    # We have to go one level down the directory tree.
414    RefList = glob.glob(RefDir + "/*")
415    NewList = glob.glob(NewDir + "/*")
416
417    # Log folders are also located in the results dir, so ignore them.
418    RefLogDir = os.path.join(RefDir, LogFolderName)
419    if RefLogDir in RefList:
420        RefList.remove(RefLogDir)
421    NewList.remove(os.path.join(NewDir, LogFolderName))
422
423    if len(RefList) == 0 or len(NewList) == 0:
424        return False
425    assert(len(RefList) == len(NewList))
426
427    # There might be more then one folder underneath - one per each scan-build
428    # command (Ex: one for configure and one for make).
429    if (len(RefList) > 1):
430        # Assume that the corresponding folders have the same names.
431        RefList.sort()
432        NewList.sort()
433
434    # Iterate and find the differences.
435    NumDiffs = 0
436    PairList = zip(RefList, NewList)
437    for P in PairList:
438        RefDir = P[0]
439        NewDir = P[1]
440
441        assert(RefDir != NewDir)
442        if Verbose == 1:
443            print "  Comparing Results: %s %s" % (RefDir, NewDir)
444
445        DiffsPath = os.path.join(NewDir, DiffsSummaryFileName)
446        Opts = CmpRuns.CmpOptions(DiffsPath)
447        # Discard everything coming out of stdout (CmpRun produces a lot of them).
448        OLD_STDOUT = sys.stdout
449        sys.stdout = Discarder()
450        # Scan the results, delete empty plist files.
451        NumDiffs = CmpRuns.dumpScanBuildResultsDiff(RefDir, NewDir, Opts, False)
452        sys.stdout = OLD_STDOUT
453        if (NumDiffs > 0) :
454            print "Warning: %r differences in diagnostics. See %s" % \
455                  (NumDiffs, DiffsPath,)
456
457    print "Diagnostic comparison complete (time: %.2f)." % (time.time()-TBegin)
458    return (NumDiffs > 0)
459
460def updateSVN(Mode, ProjectsMap):
461    try:
462        ProjectsMap.seek(0)
463        for I in csv.reader(ProjectsMap):
464            ProjName = I[0]
465            Path = os.path.join(ProjName, getSBOutputDirName(True))
466
467            if Mode == "delete":
468                Command = "svn delete %s" % (Path,)
469            else:
470                Command = "svn add %s" % (Path,)
471
472            if Verbose == 1:
473                print "  Executing: %s" % (Command,)
474            check_call(Command, shell=True)
475
476        if Mode == "delete":
477            CommitCommand = "svn commit -m \"[analyzer tests] Remove " \
478                            "reference results.\""
479        else:
480            CommitCommand = "svn commit -m \"[analyzer tests] Add new " \
481                            "reference results.\""
482        if Verbose == 1:
483            print "  Executing: %s" % (CommitCommand,)
484        check_call(CommitCommand, shell=True)
485    except:
486        print "Error: SVN update failed."
487        sys.exit(-1)
488
489def testProject(ID, ProjectBuildMode, IsReferenceBuild=False, Dir=None):
490    print " \n\n--- Building project %s" % (ID,)
491
492    TBegin = time.time()
493
494    if Dir is None :
495        Dir = getProjectDir(ID)
496    if Verbose == 1:
497        print "  Build directory: %s." % (Dir,)
498
499    # Set the build results directory.
500    RelOutputDir = getSBOutputDirName(IsReferenceBuild)
501    SBOutputDir = os.path.join(Dir, RelOutputDir)
502
503    buildProject(Dir, SBOutputDir, ProjectBuildMode, IsReferenceBuild)
504
505    checkBuild(SBOutputDir)
506
507    if IsReferenceBuild == False:
508        runCmpResults(Dir)
509
510    print "Completed tests for project %s (time: %.2f)." % \
511          (ID, (time.time()-TBegin))
512
513def testAll(IsReferenceBuild = False, UpdateSVN = False):
514    PMapFile = open(getProjectMapPath(), "rb")
515    try:
516        # Validate the input.
517        for I in csv.reader(PMapFile):
518            if (len(I) != 2) :
519                print "Error: Rows in the ProjectMapFile should have 3 entries."
520                raise Exception()
521            if (not ((I[1] == "0") | (I[1] == "1") | (I[1] == "2"))):
522                print "Error: Second entry in the ProjectMapFile should be 0" \
523                      " (single file), 1 (project), or 2(single file c++11)."
524                raise Exception()
525
526        # When we are regenerating the reference results, we might need to
527        # update svn. Remove reference results from SVN.
528        if UpdateSVN == True:
529            assert(IsReferenceBuild == True);
530            updateSVN("delete",  PMapFile);
531
532        # Test the projects.
533        PMapFile.seek(0)
534        for I in csv.reader(PMapFile):
535            testProject(I[0], int(I[1]), IsReferenceBuild)
536
537        # Add reference results to SVN.
538        if UpdateSVN == True:
539            updateSVN("add",  PMapFile);
540
541    except:
542        print "Error occurred. Premature termination."
543        raise
544    finally:
545        PMapFile.close()
546
547if __name__ == '__main__':
548    IsReference = False
549    UpdateSVN = False
550    if len(sys.argv) >= 2:
551        if sys.argv[1] == "-r":
552            IsReference = True
553        elif sys.argv[1] == "-rs":
554            IsReference = True
555            UpdateSVN = True
556        else:
557          print >> sys.stderr, 'Usage: ', sys.argv[0],\
558                             '[-r|-rs]' \
559                             'Use -r to regenerate reference output' \
560                             'Use -rs to regenerate reference output and update svn'
561
562    testAll(IsReference, UpdateSVN)
563