1#!/usr/bin/env python
2# Copyright 2016 gRPC authors.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15"""Tool to get build statistics from Jenkins and upload to BigQuery."""
16
17from __future__ import print_function
18
19import argparse
20import jenkinsapi
21from jenkinsapi.custom_exceptions import JenkinsAPIException
22from jenkinsapi.jenkins import Jenkins
23import json
24import os
25import re
26import sys
27import urllib
28
29gcp_utils_dir = os.path.abspath(
30    os.path.join(os.path.dirname(__file__), '../gcp/utils'))
31sys.path.append(gcp_utils_dir)
32import big_query_utils
33
34_PROJECT_ID = 'grpc-testing'
35_HAS_MATRIX = True
36_BUILDS = {
37    'gRPC_interop_master': not _HAS_MATRIX,
38    'gRPC_master_linux': not _HAS_MATRIX,
39    'gRPC_master_macos': not _HAS_MATRIX,
40    'gRPC_master_windows': not _HAS_MATRIX,
41    'gRPC_performance_master': not _HAS_MATRIX,
42    'gRPC_portability_master_linux': not _HAS_MATRIX,
43    'gRPC_portability_master_windows': not _HAS_MATRIX,
44    'gRPC_master_asanitizer_c': not _HAS_MATRIX,
45    'gRPC_master_asanitizer_cpp': not _HAS_MATRIX,
46    'gRPC_master_msan_c': not _HAS_MATRIX,
47    'gRPC_master_tsanitizer_c': not _HAS_MATRIX,
48    'gRPC_master_tsan_cpp': not _HAS_MATRIX,
49    'gRPC_interop_pull_requests': not _HAS_MATRIX,
50    'gRPC_performance_pull_requests': not _HAS_MATRIX,
51    'gRPC_portability_pull_requests_linux': not _HAS_MATRIX,
52    'gRPC_portability_pr_win': not _HAS_MATRIX,
53    'gRPC_pull_requests_linux': not _HAS_MATRIX,
54    'gRPC_pull_requests_macos': not _HAS_MATRIX,
55    'gRPC_pr_win': not _HAS_MATRIX,
56    'gRPC_pull_requests_asan_c': not _HAS_MATRIX,
57    'gRPC_pull_requests_asan_cpp': not _HAS_MATRIX,
58    'gRPC_pull_requests_msan_c': not _HAS_MATRIX,
59    'gRPC_pull_requests_tsan_c': not _HAS_MATRIX,
60    'gRPC_pull_requests_tsan_cpp': not _HAS_MATRIX,
61}
62_URL_BASE = 'https://grpc-testing.appspot.com/job'
63
64# This is a dynamic list where known and active issues should be added.
65# Fixed ones should be removed.
66# Also try not to add multiple messages from the same failure.
67_KNOWN_ERRORS = [
68    'Failed to build workspace Tests with scheme AllTests',
69    'Build timed out',
70    'TIMEOUT: tools/run_tests/pre_build_node.sh',
71    'TIMEOUT: tools/run_tests/pre_build_ruby.sh',
72    'FATAL: Unable to produce a script file',
73    'FAILED: build_docker_c\+\+',
74    'cannot find package \"cloud.google.com/go/compute/metadata\"',
75    'LLVM ERROR: IO failure on output stream.',
76    'MSBUILD : error MSB1009: Project file does not exist.',
77    'fatal: git fetch_pack: expected ACK/NAK',
78    'Failed to fetch from http://github.com/grpc/grpc.git',
79    ('hudson.remoting.RemotingSystemException: java.io.IOException: '
80     'Backing channel is disconnected.'),
81    'hudson.remoting.ChannelClosedException',
82    'Could not initialize class hudson.Util',
83    'Too many open files in system',
84    'FAILED: bins/tsan/qps_openloop_test GRPC_POLL_STRATEGY=epoll',
85    'FAILED: bins/tsan/qps_openloop_test GRPC_POLL_STRATEGY=legacy',
86    'FAILED: bins/tsan/qps_openloop_test GRPC_POLL_STRATEGY=poll',
87    ('tests.bins/asan/h2_proxy_test streaming_error_response '
88     'GRPC_POLL_STRATEGY=legacy'),
89    'hudson.plugins.git.GitException',
90    'Couldn\'t find any revision to build',
91    'org.jenkinsci.plugin.Diskcheck.preCheckout',
92    'Something went wrong while deleting Files',
93]
94_NO_REPORT_FILES_FOUND_ERROR = 'No test report files were found.'
95_UNKNOWN_ERROR = 'Unknown error'
96_DATASET_ID = 'build_statistics'
97
98
99def _scrape_for_known_errors(html):
100    error_list = []
101    for known_error in _KNOWN_ERRORS:
102        errors = re.findall(known_error, html)
103        this_error_count = len(errors)
104        if this_error_count > 0:
105            error_list.append({
106                'description': known_error,
107                'count': this_error_count
108            })
109            print('====> %d failures due to %s' % (this_error_count,
110                                                   known_error))
111    return error_list
112
113
114def _no_report_files_found(html):
115    return _NO_REPORT_FILES_FOUND_ERROR in html
116
117
118def _get_last_processed_buildnumber(build_name):
119    query = 'SELECT max(build_number) FROM [%s:%s.%s];' % (_PROJECT_ID,
120                                                           _DATASET_ID,
121                                                           build_name)
122    query_job = big_query_utils.sync_query_job(bq, _PROJECT_ID, query)
123    page = bq.jobs().getQueryResults(
124        pageToken=None, **query_job['jobReference']).execute(num_retries=3)
125    if page['rows'][0]['f'][0]['v']:
126        return int(page['rows'][0]['f'][0]['v'])
127    return 0
128
129
130def _process_matrix(build, url_base):
131    matrix_list = []
132    for matrix in build.get_matrix_runs():
133        matrix_str = re.match('.*\\xc2\\xbb ((?:[^,]+,?)+) #.*',
134                              matrix.name).groups()[0]
135        matrix_tuple = matrix_str.split(',')
136        json_url = '%s/config=%s,language=%s,platform=%s/testReport/api/json' % (
137            url_base, matrix_tuple[0], matrix_tuple[1], matrix_tuple[2])
138        console_url = '%s/config=%s,language=%s,platform=%s/consoleFull' % (
139            url_base, matrix_tuple[0], matrix_tuple[1], matrix_tuple[2])
140        matrix_dict = {
141            'name': matrix_str,
142            'duration': matrix.get_duration().total_seconds()
143        }
144        matrix_dict.update(_process_build(json_url, console_url))
145        matrix_list.append(matrix_dict)
146
147    return matrix_list
148
149
150def _process_build(json_url, console_url):
151    build_result = {}
152    error_list = []
153    try:
154        html = urllib.urlopen(json_url).read()
155        test_result = json.loads(html)
156        print('====> Parsing result from %s' % json_url)
157        failure_count = test_result['failCount']
158        build_result['pass_count'] = test_result['passCount']
159        build_result['failure_count'] = failure_count
160        # This means Jenkins failure occurred.
161        build_result['no_report_files_found'] = _no_report_files_found(html)
162        # Only check errors if Jenkins failure occurred.
163        if build_result['no_report_files_found']:
164            error_list = _scrape_for_known_errors(html)
165    except Exception as e:
166        print('====> Got exception for %s: %s.' % (json_url, str(e)))
167        print('====> Parsing errors from %s.' % console_url)
168        html = urllib.urlopen(console_url).read()
169        build_result['pass_count'] = 0
170        build_result['failure_count'] = 1
171        # In this case, the string doesn't exist in the result html but the fact
172        # that we fail to parse the result html indicates Jenkins failure and hence
173        # no report files were generated.
174        build_result['no_report_files_found'] = True
175        error_list = _scrape_for_known_errors(html)
176
177    if error_list:
178        build_result['error'] = error_list
179    elif build_result['no_report_files_found']:
180        build_result['error'] = [{'description': _UNKNOWN_ERROR, 'count': 1}]
181    else:
182        build_result['error'] = [{'description': '', 'count': 0}]
183
184    return build_result
185
186
187# parse command line
188argp = argparse.ArgumentParser(description='Get build statistics.')
189argp.add_argument('-u', '--username', default='jenkins')
190argp.add_argument(
191    '-b',
192    '--builds',
193    choices=['all'] + sorted(_BUILDS.keys()),
194    nargs='+',
195    default=['all'])
196args = argp.parse_args()
197
198J = Jenkins('https://grpc-testing.appspot.com', args.username, 'apiToken')
199bq = big_query_utils.create_big_query()
200
201for build_name in _BUILDS.keys() if 'all' in args.builds else args.builds:
202    print('====> Build: %s' % build_name)
203    # Since get_last_completed_build() always fails due to malformatted string
204    # error, we use get_build_metadata() instead.
205    job = None
206    try:
207        job = J[build_name]
208    except Exception as e:
209        print('====> Failed to get build %s: %s.' % (build_name, str(e)))
210        continue
211    last_processed_build_number = _get_last_processed_buildnumber(build_name)
212    last_complete_build_number = job.get_last_completed_buildnumber()
213    # To avoid processing all builds for a project never looked at. In this case,
214    # only examine 10 latest builds.
215    starting_build_number = max(last_processed_build_number + 1,
216                                last_complete_build_number - 9)
217    for build_number in xrange(starting_build_number,
218                               last_complete_build_number + 1):
219        print('====> Processing %s build %d.' % (build_name, build_number))
220        build = None
221        try:
222            build = job.get_build_metadata(build_number)
223            print('====> Build status: %s.' % build.get_status())
224            if build.get_status() == 'ABORTED':
225                continue
226            # If any build is still running, stop processing this job. Next time, we
227            # start from where it was left so that all builds are processed
228            # sequentially.
229            if build.is_running():
230                print('====> Build %d is still running.' % build_number)
231                break
232        except KeyError:
233            print('====> Build %s is missing. Skip.' % build_number)
234            continue
235        build_result = {
236            'build_number': build_number,
237            'timestamp': str(build.get_timestamp())
238        }
239        url_base = json_url = '%s/%s/%d' % (_URL_BASE, build_name, build_number)
240        if _BUILDS[build_name]:  # The build has matrix, such as gRPC_master.
241            build_result['matrix'] = _process_matrix(build, url_base)
242        else:
243            json_url = '%s/testReport/api/json' % url_base
244            console_url = '%s/consoleFull' % url_base
245            build_result['duration'] = build.get_duration().total_seconds()
246            build_stat = _process_build(json_url, console_url)
247            build_result.update(build_stat)
248        rows = [big_query_utils.make_row(build_number, build_result)]
249        if not big_query_utils.insert_rows(bq, _PROJECT_ID, _DATASET_ID,
250                                           build_name, rows):
251            print('====> Error uploading result to bigquery.')
252            sys.exit(1)
253