1#!/usr/bin/python -u 2 3""" 4A script to help find the last few jobs that ran on a set of hosts that match 5the specified query, and rank them according to frequence across these hosts. 6Usage: 71. Get last 5 jobs from 1 day ago running on all lumpies in pool suites that are 8 currently in repair fail: 9 ./sheriff_host_utils --days_back=1 10 --query 'labels=pool:suites,board:lumpy status="Repair Failed"' 11 122. Email someone about the last 5 jobs on all Repair Failed hosts. 13 ./sheriff_host_utils --limit 5 --query 'status="Repair Failed"' 14 --email someone@something.com 15""" 16 17import argparse 18import collections 19import datetime 20import operator 21import shlex 22import sys 23 24import common 25 26from autotest_lib.client.common_lib import mail 27from autotest_lib.frontend import setup_django_environment 28from autotest_lib.frontend.afe import models 29from autotest_lib.server import frontend 30from autotest_lib.server.cros import repair_utils 31from django.utils import timezone as django_timezone 32 33 34def _parse_args(args): 35 description=('./sheriff_host_utils.py --limit 5 --days_back 5 ' 36 '--query \'status="Repair Failed" invalid=0 locked=0\'') 37 if not args: 38 print ('Too few arguments, execute %s, or try ' 39 './sheriff_host_utils.py --help' % description) 40 sys.exit(1) 41 42 parser = argparse.ArgumentParser(description=description) 43 parser.add_argument('--limit', default=5, 44 help='The number of jobs per host.Eg: --limit 5') 45 parser.add_argument('--days_back', default=5, 46 help='Number of days to search. Eg: --days_back 5') 47 default_query = 'status="Repair Failed" labels=pool:bvt,board:lumpy' 48 parser.add_argument('--query', default=default_query, 49 help='Search query.Eg: --query %s' % default_query) 50 parser.add_argument('--email', default=None, help='send results to email.') 51 return parser.parse_args(args) 52 53 54def _parse_query(query): 55 """Parses query string for a host. 56 57 All queries follow the format: 'key=value key2=value..' where all keys are 58 are columns of the host table with the exception of labels. When specifying 59 labels, the format is the same even though a label is a foreign key: 60 --query 'lable=<comma seperated list of label names>'. 61 62 @return: A dictionary into which the query has been parsed. 63 """ 64 l = shlex.split(query) 65 keys = [elem[:elem.find('=')] for elem in l] 66 values = [elem[elem.find('=')+1:] for elem in l] 67 payload = dict(zip(keys, values)) 68 return payload 69 70 71def _get_pool(host): 72 """Returns the pool of a host. 73 """ 74 labels = host.labels.all() 75 for label_name in [label.name for label in labels]: 76 if 'pool' in label_name: 77 return label_name 78 79 80def retrieve_hosts(payload): 81 """Retrieve hosts matching the payload. 82 83 @param payload: A dict with selection criteria for hosts. 84 85 @return: A queryset of hosts matching the payload. 86 """ 87 # Replace label names with a foreign key query. 88 query_hosts = models.Host.objects.all() 89 if 'labels' in payload: 90 for label in payload['labels'].split(','): 91 query_hosts = query_hosts.filter(labels__name=label) 92 del payload['labels'] 93 return query_hosts.filter(**payload) 94 95 96def analyze_jobs(hqes): 97 """Perform some aggregation on the jobs that ran on matching hosts. 98 99 @return: A string with the results of the analysis. 100 """ 101 names = [hqe.job.name for hqe in hqes] 102 ranking = collections.Counter([name[name.rfind('/')+1:] for name in names]) 103 sorted_rankings = sorted(ranking.iteritems(), key=operator.itemgetter(1)) 104 m = 'Ranking tests that ran on those hosts by frequency: \n\t' 105 for job_stat in reversed(sorted_rankings): 106 m += '%s test name: %s\n\t' % (job_stat[1], job_stat[0]) 107 return m 108 109 110def last_jobs_on_hosts(payload, limit_jobs, days_back): 111 """Find the last limit_jobs on hosts with given status within days_back. 112 113 @param payload: A dictionary specifiying the selection criteria of the hosts. 114 Eg {'stauts': "Ready", 'id': 40} 115 @param limit_jobs: The number of jobs per host. 116 @param days_back: The days back to search for jobs. 117 118 @retrurn: A string with information about the last jobs that ran on all 119 hosts matching the query mentioned in the payload. 120 """ 121 host_map = {} 122 pool_less, job_less, jobs_to_analyze = [], [], [] 123 hqes = models.HostQueueEntry.objects.all() 124 cutoff = django_timezone.now().date() - datetime.timedelta(days=days_back) 125 message = '' 126 127 for host in retrieve_hosts(payload): 128 pool = _get_pool(host) 129 if not pool: 130 pool_less.append(host.hostname) 131 continue 132 relevent_hqes = list(hqes.filter(host_id=host.id, 133 started_on__gte=cutoff).order_by('-started_on')[:limit_jobs]) 134 if relevent_hqes: 135 jobs = ['name: %s, id: %s' % 136 (hqe.job.name, hqe.job_id) for hqe in relevent_hqes] 137 message += '%s\n%s\n\t%s' % (pool, host, '\n\t'.join(jobs)) 138 jobs_to_analyze += relevent_hqes 139 else: 140 job_less.append(host.hostname) 141 142 if job_less: 143 message += ('\nNo jobs found for the following hosts within cutoff %s\n\t' % 144 cutoff) 145 message += '\n\t'.join(job_less) 146 if pool_less: 147 message += '%s%s' % ('\nNo pools found on the following hosts:', 148 '\n\t'.join(pool_less)) 149 if jobs_to_analyze: 150 message += '\n\n%s' % analyze_jobs(jobs_to_analyze) 151 152 if message: 153 return '%s\n%s' % ('Host information:', message) 154 return 'No hosts matching query %s from %s days back' % (payload, days_back) 155 156 157if __name__ == '__main__': 158 args = _parse_args(sys.argv[1:]) 159 message = last_jobs_on_hosts(_parse_query(args.query), 160 int(args.limit), int(args.days_back)) 161 if args.email: 162 mail.send('', args.email, '', 163 'Results from your sheirff script.', message) 164 print message 165