1#!/usr/bin/env python 2 3""" 4Static Analyzer qualification infrastructure. 5 6The goal is to test the analyzer against different projects, check for failures, 7compare results, and measure performance. 8 9Repository Directory will contain sources of the projects as well as the 10information on how to build them and the expected output. 11Repository Directory structure: 12 - ProjectMap file 13 - Historical Performance Data 14 - Project Dir1 15 - ReferenceOutput 16 - Project Dir2 17 - ReferenceOutput 18 .. 19 20To test the build of the analyzer one would: 21 - Copy over a copy of the Repository Directory. (TODO: Prefer to ensure that 22 the build directory does not pollute the repository to min network traffic). 23 - Build all projects, until error. Produce logs to report errors. 24 - Compare results. 25 26The files which should be kept around for failure investigations: 27 RepositoryCopy/Project DirI/ScanBuildResults 28 RepositoryCopy/Project DirI/run_static_analyzer.log 29 30Assumptions (TODO: shouldn't need to assume these.): 31 The script is being run from the Repository Directory. 32 The compiler for scan-build and scan-build are in the PATH. 33 export PATH=/Users/zaks/workspace/c2llvm/build/Release+Asserts/bin:$PATH 34 35For more logging, set the env variables: 36 zaks:TI zaks$ export CCC_ANALYZER_LOG=1 37 zaks:TI zaks$ export CCC_ANALYZER_VERBOSE=1 38""" 39import CmpRuns 40 41import os 42import csv 43import sys 44import glob 45import math 46import shutil 47import time 48import plistlib 49from subprocess import check_call, CalledProcessError 50 51#------------------------------------------------------------------------------ 52# Helper functions. 53#------------------------------------------------------------------------------ 54 55def detectCPUs(): 56 """ 57 Detects the number of CPUs on a system. Cribbed from pp. 58 """ 59 # Linux, Unix and MacOS: 60 if hasattr(os, "sysconf"): 61 if os.sysconf_names.has_key("SC_NPROCESSORS_ONLN"): 62 # Linux & Unix: 63 ncpus = os.sysconf("SC_NPROCESSORS_ONLN") 64 if isinstance(ncpus, int) and ncpus > 0: 65 return ncpus 66 else: # OSX: 67 return int(capture(['sysctl', '-n', 'hw.ncpu'])) 68 # Windows: 69 if os.environ.has_key("NUMBER_OF_PROCESSORS"): 70 ncpus = int(os.environ["NUMBER_OF_PROCESSORS"]) 71 if ncpus > 0: 72 return ncpus 73 return 1 # Default 74 75def which(command, paths = None): 76 """which(command, [paths]) - Look up the given command in the paths string 77 (or the PATH environment variable, if unspecified).""" 78 79 if paths is None: 80 paths = os.environ.get('PATH','') 81 82 # Check for absolute match first. 83 if os.path.exists(command): 84 return command 85 86 # Would be nice if Python had a lib function for this. 87 if not paths: 88 paths = os.defpath 89 90 # Get suffixes to search. 91 # On Cygwin, 'PATHEXT' may exist but it should not be used. 92 if os.pathsep == ';': 93 pathext = os.environ.get('PATHEXT', '').split(';') 94 else: 95 pathext = [''] 96 97 # Search the paths... 98 for path in paths.split(os.pathsep): 99 for ext in pathext: 100 p = os.path.join(path, command + ext) 101 if os.path.exists(p): 102 return p 103 104 return None 105 106# Make sure we flush the output after every print statement. 107class flushfile(object): 108 def __init__(self, f): 109 self.f = f 110 def write(self, x): 111 self.f.write(x) 112 self.f.flush() 113 114sys.stdout = flushfile(sys.stdout) 115 116def getProjectMapPath(): 117 ProjectMapPath = os.path.join(os.path.abspath(os.curdir), 118 ProjectMapFile) 119 if not os.path.exists(ProjectMapPath): 120 print "Error: Cannot find the Project Map file " + ProjectMapPath +\ 121 "\nRunning script for the wrong directory?" 122 sys.exit(-1) 123 return ProjectMapPath 124 125def getProjectDir(ID): 126 return os.path.join(os.path.abspath(os.curdir), ID) 127 128def getSBOutputDirName(IsReferenceBuild) : 129 if IsReferenceBuild == True : 130 return SBOutputDirReferencePrefix + SBOutputDirName 131 else : 132 return SBOutputDirName 133 134#------------------------------------------------------------------------------ 135# Configuration setup. 136#------------------------------------------------------------------------------ 137 138# Find Clang for static analysis. 139Clang = which("clang", os.environ['PATH']) 140if not Clang: 141 print "Error: cannot find 'clang' in PATH" 142 sys.exit(-1) 143 144# Number of jobs. 145Jobs = int(math.ceil(detectCPUs() * 0.75)) 146 147# Project map stores info about all the "registered" projects. 148ProjectMapFile = "projectMap.csv" 149 150# Names of the project specific scripts. 151# The script that needs to be executed before the build can start. 152CleanupScript = "cleanup_run_static_analyzer.sh" 153# This is a file containing commands for scan-build. 154BuildScript = "run_static_analyzer.cmd" 155 156# The log file name. 157LogFolderName = "Logs" 158BuildLogName = "run_static_analyzer.log" 159# Summary file - contains the summary of the failures. Ex: This info can be be 160# displayed when buildbot detects a build failure. 161NumOfFailuresInSummary = 10 162FailuresSummaryFileName = "failures.txt" 163# Summary of the result diffs. 164DiffsSummaryFileName = "diffs.txt" 165 166# The scan-build result directory. 167SBOutputDirName = "ScanBuildResults" 168SBOutputDirReferencePrefix = "Ref" 169 170# The list of checkers used during analyzes. 171# Currently, consists of all the non-experimental checkers, plus a few alpha 172# checkers we don't want to regress on. 173Checkers="alpha.unix.SimpleStream,alpha.security.taint,cplusplus.NewDeleteLeaks,core,cplusplus,deadcode,security,unix,osx" 174 175Verbose = 1 176 177#------------------------------------------------------------------------------ 178# Test harness logic. 179#------------------------------------------------------------------------------ 180 181# Run pre-processing script if any. 182def runCleanupScript(Dir, PBuildLogFile): 183 ScriptPath = os.path.join(Dir, CleanupScript) 184 if os.path.exists(ScriptPath): 185 try: 186 if Verbose == 1: 187 print " Executing: %s" % (ScriptPath,) 188 check_call("chmod +x %s" % ScriptPath, cwd = Dir, 189 stderr=PBuildLogFile, 190 stdout=PBuildLogFile, 191 shell=True) 192 check_call(ScriptPath, cwd = Dir, stderr=PBuildLogFile, 193 stdout=PBuildLogFile, 194 shell=True) 195 except: 196 print "Error: The pre-processing step failed. See ", \ 197 PBuildLogFile.name, " for details." 198 sys.exit(-1) 199 200# Build the project with scan-build by reading in the commands and 201# prefixing them with the scan-build options. 202def runScanBuild(Dir, SBOutputDir, PBuildLogFile): 203 BuildScriptPath = os.path.join(Dir, BuildScript) 204 if not os.path.exists(BuildScriptPath): 205 print "Error: build script is not defined: %s" % BuildScriptPath 206 sys.exit(-1) 207 SBOptions = "--use-analyzer " + Clang + " " 208 SBOptions += "-plist-html -o " + SBOutputDir + " " 209 SBOptions += "-enable-checker " + Checkers + " " 210 SBOptions += "--keep-empty " 211 # Always use ccc-analyze to ensure that we can locate the failures 212 # directory. 213 SBOptions += "--override-compiler " 214 try: 215 SBCommandFile = open(BuildScriptPath, "r") 216 SBPrefix = "scan-build " + SBOptions + " " 217 for Command in SBCommandFile: 218 Command = Command.strip() 219 # If using 'make', auto imply a -jX argument 220 # to speed up analysis. xcodebuild will 221 # automatically use the maximum number of cores. 222 if (Command.startswith("make ") or Command == "make") and \ 223 "-j" not in Command: 224 Command += " -j%d" % Jobs 225 SBCommand = SBPrefix + Command 226 if Verbose == 1: 227 print " Executing: %s" % (SBCommand,) 228 check_call(SBCommand, cwd = Dir, stderr=PBuildLogFile, 229 stdout=PBuildLogFile, 230 shell=True) 231 except: 232 print "Error: scan-build failed. See ",PBuildLogFile.name,\ 233 " for details." 234 raise 235 236def hasNoExtension(FileName): 237 (Root, Ext) = os.path.splitext(FileName) 238 if ((Ext == "")) : 239 return True 240 return False 241 242def isValidSingleInputFile(FileName): 243 (Root, Ext) = os.path.splitext(FileName) 244 if ((Ext == ".i") | (Ext == ".ii") | 245 (Ext == ".c") | (Ext == ".cpp") | 246 (Ext == ".m") | (Ext == "")) : 247 return True 248 return False 249 250# Run analysis on a set of preprocessed files. 251def runAnalyzePreprocessed(Dir, SBOutputDir, Mode): 252 if os.path.exists(os.path.join(Dir, BuildScript)): 253 print "Error: The preprocessed files project should not contain %s" % \ 254 BuildScript 255 raise Exception() 256 257 CmdPrefix = Clang + " -cc1 -analyze -analyzer-output=plist -w " 258 CmdPrefix += "-analyzer-checker=" + Checkers +" -fcxx-exceptions -fblocks " 259 260 if (Mode == 2) : 261 CmdPrefix += "-std=c++11 " 262 263 PlistPath = os.path.join(Dir, SBOutputDir, "date") 264 FailPath = os.path.join(PlistPath, "failures"); 265 os.makedirs(FailPath); 266 267 for FullFileName in glob.glob(Dir + "/*"): 268 FileName = os.path.basename(FullFileName) 269 Failed = False 270 271 # Only run the analyzes on supported files. 272 if (hasNoExtension(FileName)): 273 continue 274 if (isValidSingleInputFile(FileName) == False): 275 print "Error: Invalid single input file %s." % (FullFileName,) 276 raise Exception() 277 278 # Build and call the analyzer command. 279 OutputOption = "-o " + os.path.join(PlistPath, FileName) + ".plist " 280 Command = CmdPrefix + OutputOption + os.path.join(Dir, FileName) 281 LogFile = open(os.path.join(FailPath, FileName + ".stderr.txt"), "w+b") 282 try: 283 if Verbose == 1: 284 print " Executing: %s" % (Command,) 285 check_call(Command, cwd = Dir, stderr=LogFile, 286 stdout=LogFile, 287 shell=True) 288 except CalledProcessError, e: 289 print "Error: Analyzes of %s failed. See %s for details." \ 290 "Error code %d." % \ 291 (FullFileName, LogFile.name, e.returncode) 292 Failed = True 293 finally: 294 LogFile.close() 295 296 # If command did not fail, erase the log file. 297 if Failed == False: 298 os.remove(LogFile.name); 299 300def buildProject(Dir, SBOutputDir, ProjectBuildMode, IsReferenceBuild): 301 TBegin = time.time() 302 303 BuildLogPath = os.path.join(SBOutputDir, LogFolderName, BuildLogName) 304 print "Log file: %s" % (BuildLogPath,) 305 print "Output directory: %s" %(SBOutputDir, ) 306 307 # Clean up the log file. 308 if (os.path.exists(BuildLogPath)) : 309 RmCommand = "rm " + BuildLogPath 310 if Verbose == 1: 311 print " Executing: %s" % (RmCommand,) 312 check_call(RmCommand, shell=True) 313 314 # Clean up scan build results. 315 if (os.path.exists(SBOutputDir)) : 316 RmCommand = "rm -r " + SBOutputDir 317 if Verbose == 1: 318 print " Executing: %s" % (RmCommand,) 319 check_call(RmCommand, shell=True) 320 assert(not os.path.exists(SBOutputDir)) 321 os.makedirs(os.path.join(SBOutputDir, LogFolderName)) 322 323 # Open the log file. 324 PBuildLogFile = open(BuildLogPath, "wb+") 325 326 # Build and analyze the project. 327 try: 328 runCleanupScript(Dir, PBuildLogFile) 329 330 if (ProjectBuildMode == 1): 331 runScanBuild(Dir, SBOutputDir, PBuildLogFile) 332 else: 333 runAnalyzePreprocessed(Dir, SBOutputDir, ProjectBuildMode) 334 335 if IsReferenceBuild : 336 runCleanupScript(Dir, PBuildLogFile) 337 338 finally: 339 PBuildLogFile.close() 340 341 print "Build complete (time: %.2f). See the log for more details: %s" % \ 342 ((time.time()-TBegin), BuildLogPath) 343 344# A plist file is created for each call to the analyzer(each source file). 345# We are only interested on the once that have bug reports, so delete the rest. 346def CleanUpEmptyPlists(SBOutputDir): 347 for F in glob.glob(SBOutputDir + "/*/*.plist"): 348 P = os.path.join(SBOutputDir, F) 349 350 Data = plistlib.readPlist(P) 351 # Delete empty reports. 352 if not Data['files']: 353 os.remove(P) 354 continue 355 356# Given the scan-build output directory, checks if the build failed 357# (by searching for the failures directories). If there are failures, it 358# creates a summary file in the output directory. 359def checkBuild(SBOutputDir): 360 # Check if there are failures. 361 Failures = glob.glob(SBOutputDir + "/*/failures/*.stderr.txt") 362 TotalFailed = len(Failures); 363 if TotalFailed == 0: 364 CleanUpEmptyPlists(SBOutputDir) 365 Plists = glob.glob(SBOutputDir + "/*/*.plist") 366 print "Number of bug reports (non-empty plist files) produced: %d" %\ 367 len(Plists) 368 return; 369 370 # Create summary file to display when the build fails. 371 SummaryPath = os.path.join(SBOutputDir, LogFolderName, FailuresSummaryFileName) 372 if (Verbose > 0): 373 print " Creating the failures summary file %s" % (SummaryPath,) 374 375 SummaryLog = open(SummaryPath, "w+") 376 try: 377 SummaryLog.write("Total of %d failures discovered.\n" % (TotalFailed,)) 378 if TotalFailed > NumOfFailuresInSummary: 379 SummaryLog.write("See the first %d below.\n" 380 % (NumOfFailuresInSummary,)) 381 # TODO: Add a line "See the results folder for more." 382 383 FailuresCopied = NumOfFailuresInSummary 384 Idx = 0 385 for FailLogPathI in Failures: 386 if Idx >= NumOfFailuresInSummary: 387 break; 388 Idx += 1 389 SummaryLog.write("\n-- Error #%d -----------\n" % (Idx,)); 390 FailLogI = open(FailLogPathI, "r"); 391 try: 392 shutil.copyfileobj(FailLogI, SummaryLog); 393 finally: 394 FailLogI.close() 395 finally: 396 SummaryLog.close() 397 398 print "Error: analysis failed. See ", SummaryPath 399 sys.exit(-1) 400 401# Auxiliary object to discard stdout. 402class Discarder(object): 403 def write(self, text): 404 pass # do nothing 405 406# Compare the warnings produced by scan-build. 407def runCmpResults(Dir): 408 TBegin = time.time() 409 410 RefDir = os.path.join(Dir, SBOutputDirReferencePrefix + SBOutputDirName) 411 NewDir = os.path.join(Dir, SBOutputDirName) 412 413 # We have to go one level down the directory tree. 414 RefList = glob.glob(RefDir + "/*") 415 NewList = glob.glob(NewDir + "/*") 416 417 # Log folders are also located in the results dir, so ignore them. 418 RefLogDir = os.path.join(RefDir, LogFolderName) 419 if RefLogDir in RefList: 420 RefList.remove(RefLogDir) 421 NewList.remove(os.path.join(NewDir, LogFolderName)) 422 423 if len(RefList) == 0 or len(NewList) == 0: 424 return False 425 assert(len(RefList) == len(NewList)) 426 427 # There might be more then one folder underneath - one per each scan-build 428 # command (Ex: one for configure and one for make). 429 if (len(RefList) > 1): 430 # Assume that the corresponding folders have the same names. 431 RefList.sort() 432 NewList.sort() 433 434 # Iterate and find the differences. 435 NumDiffs = 0 436 PairList = zip(RefList, NewList) 437 for P in PairList: 438 RefDir = P[0] 439 NewDir = P[1] 440 441 assert(RefDir != NewDir) 442 if Verbose == 1: 443 print " Comparing Results: %s %s" % (RefDir, NewDir) 444 445 DiffsPath = os.path.join(NewDir, DiffsSummaryFileName) 446 Opts = CmpRuns.CmpOptions(DiffsPath) 447 # Discard everything coming out of stdout (CmpRun produces a lot of them). 448 OLD_STDOUT = sys.stdout 449 sys.stdout = Discarder() 450 # Scan the results, delete empty plist files. 451 NumDiffs = CmpRuns.dumpScanBuildResultsDiff(RefDir, NewDir, Opts, False) 452 sys.stdout = OLD_STDOUT 453 if (NumDiffs > 0) : 454 print "Warning: %r differences in diagnostics. See %s" % \ 455 (NumDiffs, DiffsPath,) 456 457 print "Diagnostic comparison complete (time: %.2f)." % (time.time()-TBegin) 458 return (NumDiffs > 0) 459 460def updateSVN(Mode, ProjectsMap): 461 try: 462 ProjectsMap.seek(0) 463 for I in csv.reader(ProjectsMap): 464 ProjName = I[0] 465 Path = os.path.join(ProjName, getSBOutputDirName(True)) 466 467 if Mode == "delete": 468 Command = "svn delete %s" % (Path,) 469 else: 470 Command = "svn add %s" % (Path,) 471 472 if Verbose == 1: 473 print " Executing: %s" % (Command,) 474 check_call(Command, shell=True) 475 476 if Mode == "delete": 477 CommitCommand = "svn commit -m \"[analyzer tests] Remove " \ 478 "reference results.\"" 479 else: 480 CommitCommand = "svn commit -m \"[analyzer tests] Add new " \ 481 "reference results.\"" 482 if Verbose == 1: 483 print " Executing: %s" % (CommitCommand,) 484 check_call(CommitCommand, shell=True) 485 except: 486 print "Error: SVN update failed." 487 sys.exit(-1) 488 489def testProject(ID, ProjectBuildMode, IsReferenceBuild=False, Dir=None): 490 print " \n\n--- Building project %s" % (ID,) 491 492 TBegin = time.time() 493 494 if Dir is None : 495 Dir = getProjectDir(ID) 496 if Verbose == 1: 497 print " Build directory: %s." % (Dir,) 498 499 # Set the build results directory. 500 RelOutputDir = getSBOutputDirName(IsReferenceBuild) 501 SBOutputDir = os.path.join(Dir, RelOutputDir) 502 503 buildProject(Dir, SBOutputDir, ProjectBuildMode, IsReferenceBuild) 504 505 checkBuild(SBOutputDir) 506 507 if IsReferenceBuild == False: 508 runCmpResults(Dir) 509 510 print "Completed tests for project %s (time: %.2f)." % \ 511 (ID, (time.time()-TBegin)) 512 513def testAll(IsReferenceBuild = False, UpdateSVN = False): 514 PMapFile = open(getProjectMapPath(), "rb") 515 try: 516 # Validate the input. 517 for I in csv.reader(PMapFile): 518 if (len(I) != 2) : 519 print "Error: Rows in the ProjectMapFile should have 3 entries." 520 raise Exception() 521 if (not ((I[1] == "0") | (I[1] == "1") | (I[1] == "2"))): 522 print "Error: Second entry in the ProjectMapFile should be 0" \ 523 " (single file), 1 (project), or 2(single file c++11)." 524 raise Exception() 525 526 # When we are regenerating the reference results, we might need to 527 # update svn. Remove reference results from SVN. 528 if UpdateSVN == True: 529 assert(IsReferenceBuild == True); 530 updateSVN("delete", PMapFile); 531 532 # Test the projects. 533 PMapFile.seek(0) 534 for I in csv.reader(PMapFile): 535 testProject(I[0], int(I[1]), IsReferenceBuild) 536 537 # Add reference results to SVN. 538 if UpdateSVN == True: 539 updateSVN("add", PMapFile); 540 541 except: 542 print "Error occurred. Premature termination." 543 raise 544 finally: 545 PMapFile.close() 546 547if __name__ == '__main__': 548 IsReference = False 549 UpdateSVN = False 550 if len(sys.argv) >= 2: 551 if sys.argv[1] == "-r": 552 IsReference = True 553 elif sys.argv[1] == "-rs": 554 IsReference = True 555 UpdateSVN = True 556 else: 557 print >> sys.stderr, 'Usage: ', sys.argv[0],\ 558 '[-r|-rs]' \ 559 'Use -r to regenerate reference output' \ 560 'Use -rs to regenerate reference output and update svn' 561 562 testAll(IsReference, UpdateSVN) 563