1#!/usr/bin/env python
2
3"""
4This script prints out a csv file of `suite,test,path/to/control.file` where
5each row is a test that has failed every time that it ran for the past N days,
6where N is that one constant lower in this file.
7
8You run it like this
9
10  ./always_failing_tests.py | tee output
11
12But please note that since we're using the models to do queries, you'll probably
13need to move your local shadow config out of the way before you run this script
14so that you point at prod.
15"""
16
17import time
18import hashlib
19import re
20import datetime
21import sys
22
23import common
24from autotest_lib.frontend import setup_django_readonly_environment
25
26# Django and the models are only setup after
27# the setup_django_readonly_environment module is imported.
28from autotest_lib.frontend.tko import models as tko_models
29from autotest_lib.frontend.afe import models as afe_models
30from autotest_lib.server.cros.dynamic_suite import suite
31
32
33_DAYS_NOT_RUNNING_CUTOFF = 30
34
35
36def md5(s):
37  m = hashlib.md5()
38  m.update(s)
39  return m.hexdigest()
40
41
42def main():
43    cutoff_delta = datetime.timedelta(_DAYS_NOT_RUNNING_CUTOFF)
44    cutoff_date = datetime.datetime.today() - cutoff_delta
45    statuses = {s.status_idx: s.word for s in tko_models.Status.objects.all()}
46    now = time.time()
47
48    tests = tko_models.Test.objects.select_related('job'
49            ).filter(started_time__gte=cutoff_date
50            ).exclude(test__contains='/'
51            ).exclude(test__contains='_JOB'
52            ).exclude(test='provision'
53            ).exclude(test__contains='try_new_image')
54    tests = list(tests)
55    # These prints are vague profiling work.  We're handling a lot of data, so I
56    # had to dump some decent work into making sure things chug along at a
57    # decent speed.
58    print "DB: %d -- len=%d" % (time.time()-now, len(tests))
59
60    def only_failures(d, t):
61      word = statuses[t.status_id]
62      if word == 'TEST_NA':
63        return d
64      if word == 'GOOD' or word == 'WARN':
65        passed = True
66      else:
67        passed = False
68      d[t.test] = d.get(t.test, False) or passed
69      return d
70    dct = reduce(only_failures, tests, {})
71    print "OF: %d -- len=%d" % (time.time()-now, len(dct))
72
73    all_fail = filter(lambda x: x.test in dct and not dct[x.test], tests)
74    print "AF: %d -- len=%d" % (time.time()-now, len(all_fail))
75
76    hash_to_file = {}
77    fs_getter = suite.Suite.create_fs_getter(common.autotest_dir)
78    for control_file in fs_getter.get_control_file_list():
79      with open(control_file, 'rb') as f:
80        h = md5(f.read())
81        hash_to_file[h] = control_file.replace(common.autotest_dir, '')\
82                                      .lstrip('/')
83    print "HF: %d -- len=%d" % (time.time()-now, len(hash_to_file))
84
85    afe_job_ids = set(map(lambda t: t.job.afe_job_id, all_fail))
86    afe_jobs = afe_models.Job.objects.select_related('parent_job')\
87                                     .filter(id__in=afe_job_ids)
88    print "AJ: %d -- len=%d" % (time.time()-now, len(afe_jobs))
89
90    job_to_hash = {}
91    for job in afe_jobs:
92      job_to_hash[job.id] = md5(job.control_file)
93    print "JH: %d -- len=%d" % (time.time()-now, len(job_to_hash))
94
95    job_to_suite = {}
96    rgx = re.compile("test_suites/control.(\w+)")
97    for job in afe_jobs:
98      job_id = job.parent_job
99      if not job_id:
100        job_id = job
101      x = rgx.search(job_id.name)
102      if not x:
103        print job_id.name
104        continue
105      job_to_suite[job.id] = x.groups(1)[0]
106
107    def collect_by_suite_name(d, t):
108      s = job_to_suite.get(t.job.afe_job_id, None)
109      d.setdefault((s, t.test), []).append(t)
110      return d
111    by_name = reduce(collect_by_suite_name, all_fail, {})
112    print "BN: %d -- len=%d" % (time.time()-now, len(by_name))
113
114    for (s, testname), tests in by_name.iteritems():
115      for test in tests:
116        h = job_to_hash[test.job.afe_job_id]
117        if h in hash_to_file:
118          print "%s,%s,%s" % (s, testname, hash_to_file[h])
119          break
120      else:
121        print "%s,%s,?" % (s, testname)
122
123
124if __name__ == '__main__':
125    sys.exit(main())
126