1# Copyright 2016 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4"""Computes the metrics for functions, Chrome OS components and benchmarks.""" 5 6from collections import defaultdict 7 8 9def ComputeDistanceForFunction(child_functions_statistics_sample, 10 child_functions_statistics_reference): 11 """Computes the distance metric for a function. 12 13 Args: 14 child_functions_statistics_sample: A dict that has as a key the name of a 15 function and as a value the inclusive count fraction. The keys are 16 the child functions of a sample parent function. 17 child_functions_statistics_reference: A dict that has as a key the name of 18 a function and as a value the inclusive count fraction. The keys are 19 the child functions of a reference parent function. 20 21 Returns: 22 A float value representing the sum of inclusive count fraction 23 differences of pairs of common child functions. If a child function is 24 present in a single data set, then we consider the missing inclusive 25 count fraction as 0. This value describes the difference in behaviour 26 between a sample and the reference parent function. 27 """ 28 # We initialize the distance with a small value to avoid the further 29 # division by zero. 30 distance = 1.0 31 32 for child_function, inclusive_count_fraction_reference in \ 33 child_functions_statistics_reference.iteritems(): 34 inclusive_count_fraction_sample = 0.0 35 36 if child_function in child_functions_statistics_sample: 37 inclusive_count_fraction_sample = \ 38 child_functions_statistics_sample[child_function] 39 distance += \ 40 abs(inclusive_count_fraction_sample - 41 inclusive_count_fraction_reference) 42 43 for child_function, inclusive_count_fraction_sample in \ 44 child_functions_statistics_sample.iteritems(): 45 if child_function not in child_functions_statistics_reference: 46 distance += inclusive_count_fraction_sample 47 48 return distance 49 50 51def ComputeScoreForFunction(distance, reference_fraction, sample_fraction): 52 """Computes the score for a function. 53 54 Args: 55 distance: A float value representing the difference in behaviour between 56 the sample and the reference function. 57 reference_fraction: A float value representing the inclusive count 58 fraction of the reference function. 59 sample_fraction: A float value representing the inclusive count 60 fraction of the sample function. 61 62 Returns: 63 A float value representing the score of the function. 64 """ 65 return reference_fraction * sample_fraction / distance 66 67 68def ComputeMetricsForComponents(cwp_function_groups, function_metrics): 69 """Computes the metrics for a set of Chrome OS components. 70 71 For every Chrome OS group, we compute the number of functions matching the 72 group, the cumulative and average score, the cumulative and average distance 73 of all those functions. A function matches a group if the path of the file 74 containing its definition contains the common path describing the group. 75 76 Args: 77 cwp_function_groups: A dict having as a key the name of the group and as a 78 value a common path describing the group. 79 function_metrics: A dict having as a key the name of the function and the 80 name of the file where it is declared concatenated by a ',', and as a 81 value a tuple containing the distance and the score metrics. 82 83 Returns: 84 A dict containing as a key the name of the group and as a value a tuple 85 with the group file path, the number of functions matching the group, 86 the cumulative and average score, cumulative and average distance of all 87 those functions. 88 """ 89 function_groups_metrics = defaultdict(lambda: (0, 0.0, 0.0, 0.0, 0.0)) 90 91 for function_key, metric in function_metrics.iteritems(): 92 _, function_file = function_key.split(',') 93 94 for group, common_path in cwp_function_groups: 95 if common_path not in function_file: 96 continue 97 98 function_distance = metric[0] 99 function_score = metric[1] 100 group_statistic = function_groups_metrics[group] 101 102 function_count = group_statistic[1] + 1 103 function_distance_cum = function_distance + group_statistic[2] 104 function_distance_avg = function_distance_cum / float(function_count) 105 function_score_cum = function_score + group_statistic[4] 106 function_score_avg = function_score_cum / float(function_count) 107 108 function_groups_metrics[group] = \ 109 (common_path, 110 function_count, 111 function_distance_cum, 112 function_distance_avg, 113 function_score_cum, 114 function_score_avg) 115 break 116 117 return function_groups_metrics 118 119 120def ComputeMetricsForBenchmark(function_metrics): 121 function_count = len(function_metrics.keys()) 122 distance_cum = 0.0 123 distance_avg = 0.0 124 score_cum = 0.0 125 score_avg = 0.0 126 127 for distance, score in function_metrics.values(): 128 distance_cum += distance 129 score_cum += score 130 131 distance_avg = distance_cum / float(function_count) 132 score_avg = score_cum / float(function_count) 133 return function_count, distance_cum, distance_avg, score_cum, score_avg 134 135 136def ComputeFunctionCountForBenchmarkSet(set_function_metrics, cwp_functions, 137 metric_string): 138 """Computes the function count metric pair for the benchmark set. 139 140 For the function count metric, we count the unique functions covered by the 141 set of benchmarks. We compute the fraction of unique functions out 142 of the amount of CWP functions given. 143 144 We compute also the same metric pair for every group from the keys of the 145 set_function_metrics dict. 146 147 Args: 148 set_function_metrics: A list of dicts having as a key the name of a group 149 and as value a list of functions that match the given group. 150 cwp_functions: A dict having as a key the name of the groups and as a value 151 the list of CWP functions that match an individual group. 152 metric_string: A tuple of strings that will be mapped to the tuple of metric 153 values in the returned function group dict. This is done for convenience 154 for the JSON output. 155 156 Returns: 157 A tuple with the metric pair and a dict with the group names and values 158 of the metric pair. The first value of the metric pair represents the 159 function count and the second value the function count fraction. 160 The dict has as a key the name of the group and as a value a dict that 161 maps the metric_string to the values of the metric pair of the group. 162 """ 163 cwp_functions_count = sum(len(functions) 164 for functions in cwp_functions.itervalues()) 165 set_groups_functions = defaultdict(set) 166 for benchmark_function_metrics in set_function_metrics: 167 for group_name in benchmark_function_metrics: 168 set_groups_functions[group_name] |= \ 169 set(benchmark_function_metrics[group_name]) 170 171 set_groups_functions_count = {} 172 set_functions_count = 0 173 for group_name, functions \ 174 in set_groups_functions.iteritems(): 175 set_group_functions_count = len(functions) 176 if group_name in cwp_functions: 177 set_groups_functions_count[group_name] = { 178 metric_string[0]: set_group_functions_count, 179 metric_string[1]: 180 set_group_functions_count / float(len(cwp_functions[group_name]))} 181 else: 182 set_groups_functions_count[group_name] = \ 183 {metric_string[0]: set_group_functions_count, metric_string[1]: 0.0} 184 set_functions_count += set_group_functions_count 185 186 set_functions_count_fraction = \ 187 set_functions_count / float(cwp_functions_count) 188 return (set_functions_count, set_functions_count_fraction), \ 189 set_groups_functions_count 190 191 192def ComputeDistanceForBenchmarkSet(set_function_metrics, cwp_functions, 193 metric_string): 194 """Computes the distance variation metric pair for the benchmark set. 195 196 For the distance variation metric, we compute the sum of the distance 197 variations of the functions covered by a set of benchmarks. 198 We define the distance variation as the difference between the distance 199 value of a functions and the ideal distance value (1.0). 200 If a function appears in multiple common functions files, we consider 201 only the minimum value. We compute also the distance variation per 202 function. 203 204 In addition, we compute also the same metric pair for every group from 205 the keys of the set_function_metrics dict. 206 207 Args: 208 set_function_metrics: A list of dicts having as a key the name of a group 209 and as value a list of functions that match the given group. 210 cwp_functions: A dict having as a key the name of the groups and as a value 211 the list of CWP functions that match an individual group. 212 metric_string: A tuple of strings that will be mapped to the tuple of metric 213 values in the returned function group dict. This is done for convenience 214 for the JSON output. 215 216 Returns: 217 A tuple with the metric pair and a dict with the group names and values 218 of the metric pair. The first value of the metric pair represents the 219 distance variation per function and the second value the distance variation. 220 The dict has as a key the name of the group and as a value a dict that 221 maps the metric_string to the values of the metric pair of the group. 222 """ 223 set_unique_functions = defaultdict(lambda: defaultdict(lambda: float('inf'))) 224 set_function_count = 0 225 total_distance_variation = 0.0 226 for benchmark_function_metrics in set_function_metrics: 227 for group_name in benchmark_function_metrics: 228 for function_key, metrics in \ 229 benchmark_function_metrics[group_name].iteritems(): 230 previous_distance = \ 231 set_unique_functions[group_name][function_key] 232 min_distance = min(metrics[0], previous_distance) 233 set_unique_functions[group_name][function_key] = min_distance 234 groups_distance_variations = defaultdict(lambda: (0.0, 0.0)) 235 for group_name, functions_distances in set_unique_functions.iteritems(): 236 group_function_count = len(functions_distances) 237 group_distance_variation = \ 238 sum(functions_distances.itervalues()) - float(group_function_count) 239 total_distance_variation += group_distance_variation 240 set_function_count += group_function_count 241 groups_distance_variations[group_name] = \ 242 {metric_string[0]: 243 group_distance_variation / float(group_function_count), 244 metric_string[1]: group_distance_variation} 245 246 return (total_distance_variation / set_function_count, 247 total_distance_variation), groups_distance_variations 248 249 250def ComputeScoreForBenchmarkSet(set_function_metrics, cwp_functions, 251 metric_string): 252 """Computes the function count metric pair for the benchmark set. 253 254 For the score metric, we compute the sum of the scores of the functions 255 from a set of benchmarks. If a function appears in multiple common 256 functions files, we consider only the maximum value. We compute also the 257 fraction of this sum from the sum of all the scores of the functions from 258 the CWP data covering the given groups, in the ideal case (the ideal 259 score of a function is 1.0). 260 261 In addition, we compute the same metric pair for every group from the 262 keys of the set_function_metrics dict. 263 264 Args: 265 set_function_metrics: A list of dicts having as a key the name of a group 266 and as value a list of functions that match the given group. 267 cwp_functions: A dict having as a key the name of the groups and as a value 268 the list of CWP functions that match an individual group. 269 metric_string: A tuple of strings that will be mapped to the tuple of metric 270 values in the returned function group dict. This is done for convenience 271 for the JSON output. 272 273 Returns: 274 A tuple with the metric pair and a dict with the group names and values 275 of the metric pair. The first value of the pair is the fraction of the sum 276 of the scores from the ideal case and the second value represents the 277 sum of scores of the functions. The dict has as a key the name of the group 278 and as a value a dict that maps the metric_string to the values of the 279 metric pair of the group. 280 """ 281 cwp_functions_count = sum(len(functions) 282 for functions in cwp_functions.itervalues()) 283 set_unique_functions = defaultdict(lambda: defaultdict(lambda: 0.0)) 284 total_score = 0.0 285 286 for benchmark_function_metrics in set_function_metrics: 287 for group_name in benchmark_function_metrics: 288 for function_key, metrics in \ 289 benchmark_function_metrics[group_name].iteritems(): 290 previous_score = \ 291 set_unique_functions[group_name][function_key] 292 max_score = max(metrics[1], previous_score) 293 set_unique_functions[group_name][function_key] = max_score 294 295 groups_scores = defaultdict(lambda: (0.0, 0.0)) 296 297 for group_name, function_scores in set_unique_functions.iteritems(): 298 group_function_count = float(len(cwp_functions[group_name])) 299 group_score = sum(function_scores.itervalues()) 300 total_score += group_score 301 groups_scores[group_name] = { 302 metric_string[0]: group_score / group_function_count, 303 metric_string[1]: group_score 304 } 305 306 return (total_score / cwp_functions_count, total_score), groups_scores 307