1#!/usr/bin/env python
2# Copyright 2017 gRPC authors.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15"""Measure the time between PR creation and completion of all tests.
16
17You'll need a github API token to avoid being rate-limited. See
18https://help.github.com/articles/creating-a-personal-access-token-for-the-command-line/
19
20This script goes over the most recent 100 pull requests. For PRs with a single
21commit, it uses the PR's creation as the initial time; otherwise, it uses the
22date of the last commit. This is somewhat fragile, and imposed by the fact that
23GitHub reports a PR's updated timestamp for any event that modifies the PR (e.g.
24comments), not just the addition of new commits.
25
26In addition, it ignores latencies greater than five hours, as that's likely due
27to a manual re-run of tests.
28"""
29
30from __future__ import absolute_import
31from __future__ import division
32from __future__ import print_function
33
34import json
35import logging
36import pprint
37import urllib2
38
39from datetime import datetime, timedelta
40
41logging.basicConfig(format='%(asctime)s %(message)s')
42
43PRS = 'https://api.github.com/repos/grpc/grpc/pulls?state=open&per_page=100'
44COMMITS = 'https://api.github.com/repos/grpc/grpc/pulls/{pr_number}/commits'
45
46
47def gh(url):
48    request = urllib2.Request(url)
49    if TOKEN:
50        request.add_header('Authorization', 'token {}'.format(TOKEN))
51    response = urllib2.urlopen(request)
52    return response.read()
53
54
55def print_csv_header():
56    print('pr,base_time,test_time,latency_seconds,successes,failures,errors')
57
58
59def output(pr,
60           base_time,
61           test_time,
62           diff_time,
63           successes,
64           failures,
65           errors,
66           mode='human'):
67    if mode == 'human':
68        print(
69            "PR #{} base time: {} UTC, Tests completed at: {} UTC. Latency: {}."
70            "\n\tSuccesses: {}, Failures: {}, Errors: {}".format(
71                pr, base_time, test_time, diff_time, successes, failures,
72                errors))
73    elif mode == 'csv':
74        print(','.join([
75            str(pr),
76            str(base_time),
77            str(test_time),
78            str(int((test_time - base_time).total_seconds())),
79            str(successes),
80            str(failures),
81            str(errors)
82        ]))
83
84
85def parse_timestamp(datetime_str):
86    return datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%SZ')
87
88
89def to_posix_timestamp(dt):
90    return str((dt - datetime(1970, 1, 1)).total_seconds())
91
92
93def get_pr_data():
94    latest_prs = json.loads(gh(PRS))
95    res = [{
96        'number': pr['number'],
97        'created_at': parse_timestamp(pr['created_at']),
98        'updated_at': parse_timestamp(pr['updated_at']),
99        'statuses_url': pr['statuses_url']
100    } for pr in latest_prs]
101    return res
102
103
104def get_commits_data(pr_number):
105    commits = json.loads(gh(COMMITS.format(pr_number=pr_number)))
106    return {
107        'num_commits':
108            len(commits),
109        'most_recent_date':
110            parse_timestamp(commits[-1]['commit']['author']['date'])
111    }
112
113
114def get_status_data(statuses_url, system):
115    status_url = statuses_url.replace('statuses', 'status')
116    statuses = json.loads(gh(status_url + '?per_page=100'))
117    successes = 0
118    failures = 0
119    errors = 0
120    latest_datetime = None
121    if not statuses: return None
122    if system == 'kokoro': string_in_target_url = 'kokoro'
123    elif system == 'jenkins': string_in_target_url = 'grpc-testing'
124    for status in statuses['statuses']:
125        if not status['target_url'] or string_in_target_url not in status[
126                'target_url']:
127            continue  # Ignore jenkins
128        if status['state'] == 'pending': return None
129        elif status['state'] == 'success': successes += 1
130        elif status['state'] == 'failure': failures += 1
131        elif status['state'] == 'error': errors += 1
132        if not latest_datetime:
133            latest_datetime = parse_timestamp(status['updated_at'])
134        else:
135            latest_datetime = max(latest_datetime,
136                                  parse_timestamp(status['updated_at']))
137    # First status is the most recent one.
138    if any([successes, failures, errors
139           ]) and sum([successes, failures, errors]) > 15:
140        return {
141            'latest_datetime': latest_datetime,
142            'successes': successes,
143            'failures': failures,
144            'errors': errors
145        }
146    else:
147        return None
148
149
150def build_args_parser():
151    import argparse
152    parser = argparse.ArgumentParser()
153    parser.add_argument('--format',
154                        type=str,
155                        choices=['human', 'csv'],
156                        default='human',
157                        help='Output format: are you a human or a machine?')
158    parser.add_argument('--system',
159                        type=str,
160                        choices=['jenkins', 'kokoro'],
161                        required=True,
162                        help='Consider only the given CI system')
163    parser.add_argument(
164        '--token',
165        type=str,
166        default='',
167        help='GitHub token to use its API with a higher rate limit')
168    return parser
169
170
171def main():
172    import sys
173    global TOKEN
174    args_parser = build_args_parser()
175    args = args_parser.parse_args()
176    TOKEN = args.token
177    if args.format == 'csv': print_csv_header()
178    for pr_data in get_pr_data():
179        commit_data = get_commits_data(pr_data['number'])
180        # PR with a single commit -> use the PRs creation time.
181        # else -> use the latest commit's date.
182        base_timestamp = pr_data['updated_at']
183        if commit_data['num_commits'] > 1:
184            base_timestamp = commit_data['most_recent_date']
185        else:
186            base_timestamp = pr_data['created_at']
187        last_status = get_status_data(pr_data['statuses_url'], args.system)
188        if last_status:
189            diff = last_status['latest_datetime'] - base_timestamp
190            if diff < timedelta(hours=5):
191                output(pr_data['number'],
192                       base_timestamp,
193                       last_status['latest_datetime'],
194                       diff,
195                       last_status['successes'],
196                       last_status['failures'],
197                       last_status['errors'],
198                       mode=args.format)
199
200
201if __name__ == '__main__':
202    main()
203