1#!/usr/bin/python2
2#
3# Copyright 2019 Google Inc.
4#
5# Use of this source code is governed by a BSD-style license that can be
6# found in the LICENSE file.
7#
8# Helper script that takes as input 2 CSVs downloaded from perf.skia.org and
9# outputs a CSV with test_name, avg_value1 (from CSV1), avg_value2 (from CSV2),
10# perc_diff between avg_value1 and avg_value2.
11# This script also discards NUM_OUTLIERS_TO_REMOVE min values and
12# NUM_OUTLIERS_TO_REMOVE max values.
13
14
15import csv
16import optparse
17import sys
18import re
19
20
21MISSING_STR = 'N/A'
22NUM_OUTLIERS_TO_REMOVE = 2
23
24
25def read_from_csv(csv_file):
26  test_to_avg = {}
27  with open(csv_file, 'rb') as f:
28    csv_reader = csv.reader(f, delimiter=',')
29    # First row should contain headers. Validate that it does.
30    header_row = csv_reader.next()
31    if header_row[0] != 'id':
32      raise Exception('%s in unexpected format' % csv_file)
33    p = re.compile('^.*,test=(.*),$')
34    for v in csv_reader:
35      # Extract the test name.
36      result = p.search(v[0])
37      test_name = result.group(1)
38
39      vals = [float(i) for i in v[1:]]
40      vals.sort()
41      # Discard outliers.
42      vals = vals[NUM_OUTLIERS_TO_REMOVE:-NUM_OUTLIERS_TO_REMOVE]
43      # Find the avg val.
44      avg_val = reduce(lambda x, y: x+y, vals) / float(len(vals))
45      test_to_avg[test_name] = avg_val
46  return test_to_avg
47
48
49def combine_results(d1, d2):
50  test_to_result = {}
51  for test1, v1 in d1.items():
52    v2 = d2.get(test1, MISSING_STR)
53    perc_diff = MISSING_STR
54    if v2 != MISSING_STR:
55      diff = v2 - v1
56      avg = (v2 + v1)/2
57      perc_diff = 0 if avg == 0 else diff/avg * 100
58    result = {
59        'test_name': test1,
60        'csv1': v1,
61        'csv2': v2,
62        'perc_diff': perc_diff,
63    }
64    test_to_result[test1] = result
65
66  # Also add keys in d2 and not d1.
67  for test2, v2 in d2.items():
68    if test2 in test_to_result:
69      continue
70    test_to_result[test2] = {
71      'test_name': test2,
72      'csv1': MISSING_STR,
73      'csv2': v2,
74      'perc_diff': MISSING_STR,
75    }
76
77  return test_to_result
78
79
80def write_to_csv(output_dict, output_csv):
81  with open(output_csv, 'w') as f:
82    fieldnames = ['test_name', 'csv1', 'csv2', 'perc_diff']
83    writer = csv.DictWriter(f, fieldnames=fieldnames)
84    writer.writeheader()
85    tests = output_dict.keys()
86    tests.sort()
87    for test in tests:
88      writer.writerow(output_dict[test])
89
90
91def parse_and_output(csv1, csv2, output_csv):
92  test_to_avg1 = read_from_csv(csv1)
93  test_to_avg2 = read_from_csv(csv2)
94  output_dict = combine_results(test_to_avg1, test_to_avg2)
95  write_to_csv(output_dict, output_csv)
96
97
98def main():
99  option_parser = optparse.OptionParser()
100  option_parser.add_option(
101      '', '--csv1', type=str,
102      help='The first CSV to parse.')
103  option_parser.add_option(
104      '', '--csv2', type=str,
105      help='The second CSV to parse.')
106  option_parser.add_option(
107      '', '--output_csv', type=str,
108      help='The file to write the output CSV to.')
109  options, _ = option_parser.parse_args()
110  sys.exit(parse_and_output(options.csv1, options.csv2, options.output_csv))
111
112
113if __name__ == '__main__':
114  main()
115