1#!/usr/bin/env python 2# Copyright (c) 2014 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6""" Generate bench_expectations file from a given set of bench data files. """ 7 8import argparse 9import bench_util 10import json 11import os 12import re 13import sys 14import urllib2 15 16# Parameters for calculating bench ranges. 17RANGE_RATIO_UPPER = 1.5 # Ratio of range for upper bounds. 18RANGE_RATIO_LOWER = 2.0 # Ratio of range for lower bounds. 19ERR_RATIO = 0.08 # Further widens the range by the ratio of average value. 20ERR_UB = 1.0 # Adds an absolute upper error to cope with small benches. 21ERR_LB = 1.5 22 23# List of bench configs to monitor. Ignore all other configs. 24CONFIGS_TO_INCLUDE = ['simple_viewport_1000x1000', 25 'simple_viewport_1000x1000_angle', 26 'simple_viewport_1000x1000_gpu', 27 'simple_viewport_1000x1000_scalar_1.100000', 28 'simple_viewport_1000x1000_scalar_1.100000_gpu', 29 ] 30 31# List of flaky entries that should be excluded. Each entry is defined by a list 32# of 3 strings, corresponding to the substrings of [bench, config, builder] to 33# search for. A bench expectations line is excluded when each of the 3 strings 34# in the list is a substring of the corresponding element of the given line. For 35# instance, ['desk_yahooanswers', 'gpu', 'Ubuntu'] will skip expectation entries 36# of SKP benchs whose name contains 'desk_yahooanswers' on all gpu-related 37# configs of all Ubuntu builders. 38ENTRIES_TO_EXCLUDE = [ 39 ] 40 41_GS_CLOUD_FORMAT = 'http://storage.googleapis.com/chromium-skia-gm/perfdata/%s/%s' 42 43def compute_ranges(benches, more_benches=None): 44 """Given a list of bench numbers, calculate the alert range. 45 46 Args: 47 benches: a list of float bench values. 48 more_benches: a tuple of lists of additional bench values. 49 The first value of each tuple is the number of commits before the current 50 one that set of values is at, and the second value is a list of 51 bench results. 52 53 Returns: 54 a list of float [lower_bound, upper_bound]. 55 """ 56 avg = sum(benches)/len(benches) 57 minimum = min(benches) 58 maximum = max(benches) 59 diff = maximum - minimum 60 61 return [minimum - diff*RANGE_RATIO_LOWER - avg*ERR_RATIO - ERR_LB, 62 maximum + diff*RANGE_RATIO_UPPER + avg*ERR_RATIO + ERR_UB] 63 64 65def create_expectations_dict(revision_data_points, builder, extra_data=None): 66 """Convert list of bench data points into a dictionary of expectations data. 67 68 Args: 69 revision_data_points: a list of BenchDataPoint objects. 70 builder: string of the corresponding buildbot builder name. 71 72 Returns: 73 a dictionary of this form: 74 keys = tuple of (config, bench) strings. 75 values = list of float [expected, lower_bound, upper_bound] for the key. 76 """ 77 bench_dict = {} 78 for point in revision_data_points: 79 if (point.time_type or # Not walltime which has time_type '' 80 not point.config in CONFIGS_TO_INCLUDE): 81 continue 82 to_skip = False 83 for bench_substr, config_substr, builder_substr in ENTRIES_TO_EXCLUDE: 84 if (bench_substr in point.bench and config_substr in point.config and 85 builder_substr in builder): 86 to_skip = True 87 break 88 if to_skip: 89 continue 90 key = (point.config, point.bench) 91 92 extras = [] 93 for idx, dataset in extra_data: 94 for data in dataset: 95 if (data.bench == point.bench and data.config == point.config and 96 data.time_type == point.time_type and data.per_iter_time): 97 extras.append((idx, data.per_iter_time)) 98 99 if key in bench_dict: 100 raise Exception('Duplicate bench entry: ' + str(key)) 101 bench_dict[key] = [point.time] + compute_ranges(point.per_iter_time, extras) 102 103 return bench_dict 104 105 106def get_parent_commits(start_hash, num_back): 107 """Returns a list of commits that are the parent of the commit passed in.""" 108 list_commits = urllib2.urlopen( 109 'https://skia.googlesource.com/skia/+log/%s?format=json&n=%d' % 110 (start_hash, num_back)) 111 # NOTE: Very brittle. Removes the four extraneous characters 112 # so json can be read successfully 113 trunc_list = list_commits.read()[4:] 114 json_data = json.loads(trunc_list) 115 return [revision['commit'] for revision in json_data['log']] 116 117 118def get_file_suffixes(commit_hash, directory): 119 """Gets all the suffixes available in the directory""" 120 possible_files = os.listdir(directory) 121 prefix = 'bench_' + commit_hash + '_data_' 122 return [name[len(prefix):] for name in possible_files 123 if name.startswith(prefix)] 124 125 126def download_bench_data(builder, commit_hash, suffixes, directory): 127 """Downloads data, returns the number successfully downloaded""" 128 cur_files = os.listdir(directory) 129 count = 0 130 for suffix in suffixes: 131 file_name = 'bench_'+commit_hash+'_data_'+suffix 132 if file_name in cur_files: 133 continue 134 try: 135 src = urllib2.urlopen(_GS_CLOUD_FORMAT % (builder, file_name)) 136 with open(os.path.join(directory, file_name), 'w') as dest: 137 dest.writelines(src) 138 count += 1 139 except urllib2.HTTPError: 140 pass 141 return count 142 143 144def main(): 145 """Reads bench data points, then calculate and export expectations. 146 """ 147 parser = argparse.ArgumentParser() 148 parser.add_argument( 149 '-a', '--representation_alg', default='25th', 150 help='bench representation algorithm to use, see bench_util.py.') 151 parser.add_argument( 152 '-b', '--builder', required=True, 153 help='name of the builder whose bench ranges we are computing.') 154 parser.add_argument( 155 '-d', '--input_dir', required=True, 156 help='a directory containing bench data files.') 157 parser.add_argument( 158 '-o', '--output_file', required=True, 159 help='file path and name for storing the output bench expectations.') 160 parser.add_argument( 161 '-r', '--git_revision', required=True, 162 help='the git hash to indicate the revision of input data to use.') 163 parser.add_argument( 164 '-t', '--back_track', required=False, default=10, 165 help='the number of commit hashes backwards to look to include' + 166 'in the calculations.') 167 parser.add_argument( 168 '-m', '--max_commits', required=False, default=1, 169 help='the number of commit hashes to include in the calculations.') 170 args = parser.parse_args() 171 172 builder = args.builder 173 174 data_points = bench_util.parse_skp_bench_data( 175 args.input_dir, args.git_revision, args.representation_alg) 176 177 parent_commits = get_parent_commits(args.git_revision, args.back_track) 178 print "Using commits: {}".format(parent_commits) 179 suffixes = get_file_suffixes(args.git_revision, args.input_dir) 180 print "Using suffixes: {}".format(suffixes) 181 182 # TODO(kelvinly): Find a better approach to than directly copying from 183 # the GS server? 184 downloaded_commits = [] 185 for idx, commit in enumerate(parent_commits): 186 num_downloaded = download_bench_data( 187 builder, commit, suffixes, args.input_dir) 188 if num_downloaded > 0: 189 downloaded_commits.append((num_downloaded, idx, commit)) 190 191 if len(downloaded_commits) < args.max_commits: 192 print ('Less than desired number of commits found. Please increase' 193 '--back_track in later runs') 194 trunc_commits = sorted(downloaded_commits, reverse=True)[:args.max_commits] 195 extra_data = [] 196 for _, idx, commit in trunc_commits: 197 extra_data.append((idx, bench_util.parse_skp_bench_data( 198 args.input_dir, commit, args.representation_alg))) 199 200 expectations_dict = create_expectations_dict(data_points, builder, 201 extra_data) 202 203 out_lines = [] 204 keys = expectations_dict.keys() 205 keys.sort() 206 for (config, bench) in keys: 207 (expected, lower_bound, upper_bound) = expectations_dict[(config, bench)] 208 out_lines.append('%(bench)s_%(config)s_,%(builder)s-%(representation)s,' 209 '%(expected)s,%(lower_bound)s,%(upper_bound)s' % { 210 'bench': bench, 211 'config': config, 212 'builder': builder, 213 'representation': args.representation_alg, 214 'expected': expected, 215 'lower_bound': lower_bound, 216 'upper_bound': upper_bound}) 217 218 with open(args.output_file, 'w') as file_handle: 219 file_handle.write('\n'.join(out_lines)) 220 221 222if __name__ == "__main__": 223 main() 224