1#!/usr/bin/env python 2 3""" 4This script prints out a csv file of `suite,test,path/to/control.file` where 5each row is a test that has failed every time that it ran for the past N days, 6where N is that one constant lower in this file. 7 8You run it like this 9 10 ./always_failing_tests.py | tee output 11 12But please note that since we're using the models to do queries, you'll probably 13need to move your local shadow config out of the way before you run this script 14so that you point at prod. 15""" 16 17import time 18import hashlib 19import re 20import datetime 21import sys 22 23import common 24from autotest_lib.frontend import setup_django_readonly_environment 25 26# Django and the models are only setup after 27# the setup_django_readonly_environment module is imported. 28from autotest_lib.frontend.tko import models as tko_models 29from autotest_lib.frontend.afe import models as afe_models 30from autotest_lib.server.cros.dynamic_suite import suite 31 32 33_DAYS_NOT_RUNNING_CUTOFF = 30 34 35 36def md5(s): 37 m = hashlib.md5() 38 m.update(s) 39 return m.hexdigest() 40 41 42def main(): 43 cutoff_delta = datetime.timedelta(_DAYS_NOT_RUNNING_CUTOFF) 44 cutoff_date = datetime.datetime.today() - cutoff_delta 45 statuses = {s.status_idx: s.word for s in tko_models.Status.objects.all()} 46 now = time.time() 47 48 tests = tko_models.Test.objects.select_related('job' 49 ).filter(started_time__gte=cutoff_date 50 ).exclude(test__contains='/' 51 ).exclude(test__contains='_JOB' 52 ).exclude(test='provision' 53 ).exclude(test__contains='try_new_image') 54 tests = list(tests) 55 # These prints are vague profiling work. We're handling a lot of data, so I 56 # had to dump some decent work into making sure things chug along at a 57 # decent speed. 58 print "DB: %d -- len=%d" % (time.time()-now, len(tests)) 59 60 def only_failures(d, t): 61 word = statuses[t.status_id] 62 if word == 'TEST_NA': 63 return d 64 if word == 'GOOD' or word == 'WARN': 65 passed = True 66 else: 67 passed = False 68 d[t.test] = d.get(t.test, False) or passed 69 return d 70 dct = reduce(only_failures, tests, {}) 71 print "OF: %d -- len=%d" % (time.time()-now, len(dct)) 72 73 all_fail = filter(lambda x: x.test in dct and not dct[x.test], tests) 74 print "AF: %d -- len=%d" % (time.time()-now, len(all_fail)) 75 76 hash_to_file = {} 77 fs_getter = suite.Suite.create_fs_getter(common.autotest_dir) 78 for control_file in fs_getter.get_control_file_list(): 79 with open(control_file, 'rb') as f: 80 h = md5(f.read()) 81 hash_to_file[h] = control_file.replace(common.autotest_dir, '')\ 82 .lstrip('/') 83 print "HF: %d -- len=%d" % (time.time()-now, len(hash_to_file)) 84 85 afe_job_ids = set(map(lambda t: t.job.afe_job_id, all_fail)) 86 afe_jobs = afe_models.Job.objects.select_related('parent_job')\ 87 .filter(id__in=afe_job_ids) 88 print "AJ: %d -- len=%d" % (time.time()-now, len(afe_jobs)) 89 90 job_to_hash = {} 91 for job in afe_jobs: 92 job_to_hash[job.id] = md5(job.control_file) 93 print "JH: %d -- len=%d" % (time.time()-now, len(job_to_hash)) 94 95 job_to_suite = {} 96 rgx = re.compile("test_suites/control.(\w+)") 97 for job in afe_jobs: 98 job_id = job.parent_job 99 if not job_id: 100 job_id = job 101 x = rgx.search(job_id.name) 102 if not x: 103 print job_id.name 104 continue 105 job_to_suite[job.id] = x.groups(1)[0] 106 107 def collect_by_suite_name(d, t): 108 s = job_to_suite.get(t.job.afe_job_id, None) 109 d.setdefault((s, t.test), []).append(t) 110 return d 111 by_name = reduce(collect_by_suite_name, all_fail, {}) 112 print "BN: %d -- len=%d" % (time.time()-now, len(by_name)) 113 114 for (s, testname), tests in by_name.iteritems(): 115 for test in tests: 116 h = job_to_hash[test.job.afe_job_id] 117 if h in hash_to_file: 118 print "%s,%s,%s" % (s, testname, hash_to_file[h]) 119 break 120 else: 121 print "%s,%s,?" % (s, testname) 122 123 124if __name__ == '__main__': 125 sys.exit(main()) 126