1# Copyright 2016 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4"""Computes the metrics for functions, Chrome OS components and benchmarks."""
5
6from collections import defaultdict
7
8
9def ComputeDistanceForFunction(child_functions_statistics_sample,
10                               child_functions_statistics_reference):
11  """Computes the distance metric for a function.
12
13  Args:
14    child_functions_statistics_sample: A dict that has as a key the name of a
15      function and as a value the inclusive count fraction. The keys are
16      the child functions of a sample parent function.
17    child_functions_statistics_reference: A dict that has as a key the name of
18      a function and as a value the inclusive count fraction. The keys are
19      the child functions of a reference parent function.
20
21  Returns:
22    A float value representing the sum of inclusive count fraction
23    differences of pairs of common child functions. If a child function is
24    present in a single data set, then we consider the missing inclusive
25    count fraction as 0. This value describes the difference in behaviour
26    between a sample and the reference parent function.
27  """
28  # We initialize the distance with a small value to avoid the further
29  # division by zero.
30  distance = 1.0
31
32  for child_function, inclusive_count_fraction_reference in \
33      child_functions_statistics_reference.iteritems():
34    inclusive_count_fraction_sample = 0.0
35
36    if child_function in child_functions_statistics_sample:
37      inclusive_count_fraction_sample = \
38          child_functions_statistics_sample[child_function]
39    distance += \
40        abs(inclusive_count_fraction_sample -
41            inclusive_count_fraction_reference)
42
43  for child_function, inclusive_count_fraction_sample in \
44      child_functions_statistics_sample.iteritems():
45    if child_function not in child_functions_statistics_reference:
46      distance += inclusive_count_fraction_sample
47
48  return distance
49
50
51def ComputeScoreForFunction(distance, reference_fraction, sample_fraction):
52  """Computes the score for a function.
53
54  Args:
55    distance: A float value representing the difference in behaviour between
56      the sample and the reference function.
57    reference_fraction: A float value representing the inclusive count
58      fraction of the reference function.
59    sample_fraction: A float value representing the inclusive count
60      fraction of the sample function.
61
62  Returns:
63    A float value representing the score of the function.
64  """
65  return reference_fraction * sample_fraction / distance
66
67
68def ComputeMetricsForComponents(cwp_function_groups, function_metrics):
69  """Computes the metrics for a set of Chrome OS components.
70
71  For every Chrome OS group, we compute the number of functions matching the
72  group, the cumulative and average score, the cumulative and average distance
73  of all those functions. A function matches a group if the path of the file
74  containing its definition contains the common path describing the group.
75
76  Args:
77    cwp_function_groups: A dict having as a key the name of the group and as a
78      value a common path describing the group.
79    function_metrics: A dict having as a key the name of the function and the
80      name of the file where it is declared concatenated by a ',', and as a
81      value a tuple containing the distance and the score metrics.
82
83  Returns:
84    A dict containing as a key the name of the group and as a value a tuple
85    with the group file path, the number of functions matching the group,
86    the cumulative and average score, cumulative and average distance of all
87    those functions.
88  """
89  function_groups_metrics = defaultdict(lambda: (0, 0.0, 0.0, 0.0, 0.0))
90
91  for function_key, metric in function_metrics.iteritems():
92    _, function_file = function_key.split(',')
93
94    for group, common_path in cwp_function_groups:
95      if common_path not in function_file:
96        continue
97
98      function_distance = metric[0]
99      function_score = metric[1]
100      group_statistic = function_groups_metrics[group]
101
102      function_count = group_statistic[1] + 1
103      function_distance_cum = function_distance + group_statistic[2]
104      function_distance_avg = function_distance_cum / float(function_count)
105      function_score_cum = function_score + group_statistic[4]
106      function_score_avg = function_score_cum / float(function_count)
107
108      function_groups_metrics[group] = \
109          (common_path,
110           function_count,
111           function_distance_cum,
112           function_distance_avg,
113           function_score_cum,
114           function_score_avg)
115      break
116
117  return function_groups_metrics
118
119
120def ComputeMetricsForBenchmark(function_metrics):
121  function_count = len(function_metrics.keys())
122  distance_cum = 0.0
123  distance_avg = 0.0
124  score_cum = 0.0
125  score_avg = 0.0
126
127  for distance, score in function_metrics.values():
128    distance_cum += distance
129    score_cum += score
130
131  distance_avg = distance_cum / float(function_count)
132  score_avg = score_cum / float(function_count)
133  return function_count, distance_cum, distance_avg, score_cum, score_avg
134
135
136def ComputeFunctionCountForBenchmarkSet(set_function_metrics, cwp_functions,
137                                        metric_string):
138  """Computes the function count metric pair for the benchmark set.
139
140     For the function count metric, we count the unique functions covered by the
141     set of benchmarks. We compute the fraction of unique functions out
142     of the amount of CWP functions given.
143
144     We compute also the same metric pair for every group from the keys of the
145     set_function_metrics dict.
146
147  Args:
148    set_function_metrics: A list of dicts having as a key the name of a group
149      and as value a list of functions that match the given group.
150    cwp_functions: A dict having as a key the name of the groups and as a value
151      the list of CWP functions that match an individual group.
152    metric_string: A tuple of strings that will be mapped to the tuple of metric
153      values in the returned function group dict. This is done for convenience
154      for the JSON output.
155
156  Returns:
157    A tuple with the metric pair and a dict with the group names and values
158    of the metric pair. The first value of the metric pair represents the
159    function count and the second value the function count fraction.
160    The dict has as a key the name of the group and as a value a dict that
161    maps the metric_string  to the values of the metric pair of the group.
162  """
163  cwp_functions_count = sum(len(functions)
164                            for functions in cwp_functions.itervalues())
165  set_groups_functions = defaultdict(set)
166  for benchmark_function_metrics in set_function_metrics:
167    for group_name in benchmark_function_metrics:
168      set_groups_functions[group_name] |= \
169          set(benchmark_function_metrics[group_name])
170
171  set_groups_functions_count = {}
172  set_functions_count = 0
173  for group_name, functions \
174      in set_groups_functions.iteritems():
175    set_group_functions_count = len(functions)
176    if group_name in cwp_functions:
177      set_groups_functions_count[group_name] = {
178          metric_string[0]: set_group_functions_count,
179          metric_string[1]:
180          set_group_functions_count / float(len(cwp_functions[group_name]))}
181    else:
182      set_groups_functions_count[group_name] = \
183          {metric_string[0]: set_group_functions_count, metric_string[1]: 0.0}
184    set_functions_count += set_group_functions_count
185
186  set_functions_count_fraction = \
187      set_functions_count / float(cwp_functions_count)
188  return (set_functions_count, set_functions_count_fraction), \
189      set_groups_functions_count
190
191
192def ComputeDistanceForBenchmarkSet(set_function_metrics, cwp_functions,
193                                   metric_string):
194  """Computes the distance variation metric pair for the benchmark set.
195
196     For the distance variation metric, we compute the sum of the distance
197     variations of the functions covered by a set of benchmarks.
198     We define the distance variation as the difference between the distance
199     value of a functions and the ideal distance value (1.0).
200     If a function appears in multiple common functions files, we consider
201     only the minimum value. We compute also the distance variation per
202     function.
203
204     In addition, we compute also the same metric pair for every group from
205     the keys of the set_function_metrics dict.
206
207  Args:
208    set_function_metrics: A list of dicts having as a key the name of a group
209      and as value a list of functions that match the given group.
210    cwp_functions: A dict having as a key the name of the groups and as a value
211      the list of CWP functions that match an individual group.
212    metric_string: A tuple of strings that will be mapped to the tuple of metric
213      values in the returned function group dict. This is done for convenience
214      for the JSON output.
215
216  Returns:
217    A tuple with the metric pair and a dict with the group names and values
218    of the metric pair. The first value of the metric pair represents the
219    distance variation per function and the second value the distance variation.
220    The dict has as a key the name of the group and as a value a dict that
221    maps the metric_string to the values of the metric pair of the group.
222  """
223  set_unique_functions = defaultdict(lambda: defaultdict(lambda: float('inf')))
224  set_function_count = 0
225  total_distance_variation = 0.0
226  for benchmark_function_metrics in set_function_metrics:
227    for group_name in benchmark_function_metrics:
228      for function_key, metrics in \
229          benchmark_function_metrics[group_name].iteritems():
230        previous_distance = \
231            set_unique_functions[group_name][function_key]
232        min_distance = min(metrics[0], previous_distance)
233        set_unique_functions[group_name][function_key] = min_distance
234  groups_distance_variations = defaultdict(lambda: (0.0, 0.0))
235  for group_name, functions_distances in set_unique_functions.iteritems():
236    group_function_count = len(functions_distances)
237    group_distance_variation = \
238        sum(functions_distances.itervalues()) - float(group_function_count)
239    total_distance_variation += group_distance_variation
240    set_function_count += group_function_count
241    groups_distance_variations[group_name] = \
242        {metric_string[0]:
243         group_distance_variation / float(group_function_count),
244         metric_string[1]: group_distance_variation}
245
246  return (total_distance_variation / set_function_count,
247          total_distance_variation), groups_distance_variations
248
249
250def ComputeScoreForBenchmarkSet(set_function_metrics, cwp_functions,
251                                metric_string):
252  """Computes the function count metric pair for the benchmark set.
253
254     For the score metric, we compute the sum of the scores of the functions
255     from a set of benchmarks. If a function appears in multiple common
256     functions files, we consider only the maximum value. We compute also the
257     fraction of this sum from the sum of all the scores of the functions from
258     the CWP data covering the given groups, in the ideal case (the ideal
259     score of a function is 1.0).
260
261     In addition, we compute the same metric pair for every group from the
262     keys of the set_function_metrics dict.
263
264  Args:
265    set_function_metrics: A list of dicts having as a key the name of a group
266      and as value a list of functions that match the given group.
267    cwp_functions: A dict having as a key the name of the groups and as a value
268      the list of CWP functions that match an individual group.
269    metric_string: A tuple of strings that will be mapped to the tuple of metric
270      values in the returned function group dict. This is done for convenience
271      for the JSON output.
272
273  Returns:
274    A tuple with the metric pair and a dict with the group names and values
275    of the metric pair. The first value of the pair is the fraction of the sum
276    of the scores from the ideal case and the second value represents the
277    sum of scores of the functions. The dict has as a key the name of the group
278    and as a value a dict that maps the metric_string to the values of the
279    metric pair of the group.
280  """
281  cwp_functions_count = sum(len(functions)
282                            for functions in cwp_functions.itervalues())
283  set_unique_functions = defaultdict(lambda: defaultdict(lambda: 0.0))
284  total_score = 0.0
285
286  for benchmark_function_metrics in set_function_metrics:
287    for group_name in benchmark_function_metrics:
288      for function_key, metrics in \
289          benchmark_function_metrics[group_name].iteritems():
290        previous_score = \
291            set_unique_functions[group_name][function_key]
292        max_score = max(metrics[1], previous_score)
293        set_unique_functions[group_name][function_key] = max_score
294
295  groups_scores = defaultdict(lambda: (0.0, 0.0))
296
297  for group_name, function_scores in set_unique_functions.iteritems():
298    group_function_count = float(len(cwp_functions[group_name]))
299    group_score = sum(function_scores.itervalues())
300    total_score += group_score
301    groups_scores[group_name] = {
302        metric_string[0]: group_score / group_function_count,
303        metric_string[1]: group_score
304    }
305
306  return (total_score / cwp_functions_count, total_score), groups_scores
307