1#!/usr/bin/env python2
2# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Script to compare the performance of two different chromeOS builds.
7
8This script is meant to be used when the performance impact of a change in
9chromeOS needs to be analyzed. It requires that you have already created two
10chromeOS test images (one with the change, and one without), and that you have
11at least one device available on which to run performance tests.
12
13This script is actually a light-weight wrapper around crosperf, a tool for
14automatically imaging one or more chromeOS devices with particular builds,
15running a set of tests on those builds, and then notifying the user of test
16results (along with some statistical analysis of perf keyvals). This wrapper
17script performs the following tasks:
18
191) Creates a crosperf "experiment" file to be consumed by crosperf.
202) Invokes crosperf using the created experiment file. Crosperf produces 2
21outputs: an e-mail that is sent to the user who invoked it; and an output
22folder that is named based on the given --experiment-name, which is created in
23the directory in which this script was run.
243) Parses the results of crosperf and outputs a summary of relevant data. This
25script produces output in a CSV file, as well as in stdout.
26
27Before running this script for the first time, you should set up your system to
28run sudo without prompting for a password (otherwise, crosperf prompts for a
29sudo password). You should only have to do that once per host machine.
30
31Once you're set up with passwordless sudo, you can run the script (preferably
32from an empty directory, since several output files are produced):
33
34> python perf_compare.py --crosperf=CROSPERF_EXE --image-1=IMAGE_1 \
35  --image-2=IMAGE_2 --board-1=BOARD_1 --board-2=BOARD_2 --remote-1=REMOTE_1 \
36  --remote-2=REMOTE_2
37
38You'll need to specify the following inputs: the full path to the crosperf
39executable; the absolute paths to 2 locally-built chromeOS images (which must
40reside in the "typical location" relative to the chroot, as required by
41crosperf); the name of the boards associated with the 2 images (if both images
42have the same board, you can specify that single board with --board=BOARD); and
43the IP addresses of the 2 remote devices on which to run crosperf (if you have
44only a single device available, specify it with --remote=REMOTE). Run with -h to
45see the full set of accepted command-line arguments.
46
47Notes:
48
491) When you run this script, it will delete any previously-created crosperf
50output directories and created CSV files based on the specified
51--experiment-name.  If you don't want to lose any old crosperf/CSV data, either
52move it to another location, or run this script with a different
53--experiment-name.
542) This script will only run the benchmarks and process the perf keys specified
55in the file "perf_benchmarks.json".  Some benchmarks output more perf keys than
56what are specified in perf_benchmarks.json, and these will appear in the
57crosperf outputs, but not in the outputs produced specifically by this script.
58"""
59
60
61from __future__ import absolute_import
62from __future__ import division
63from __future__ import print_function
64import json
65import logging
66import math
67import optparse
68import os
69import re
70import shutil
71import subprocess
72import sys
73from six.moves import input
74
75
76_ITERATIONS = 5
77_IMAGE_1_NAME = 'Image1'
78_IMAGE_2_NAME = 'Image2'
79_DEFAULT_EXPERIMENT_NAME = 'perf_comparison'
80_ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
81_BENCHMARK_INFO_FILE_NAME = os.path.join(_ROOT_DIR, 'perf_benchmarks.json')
82_CROSPERF_REPORT_LINE_DELIMITER = '\t'
83_EXPERIMENT_FILE_NAME = 'experiment.txt'
84
85_BENCHMARK_INFO_TEMPLATE = """
86benchmark: {benchmark} {{
87  autotest_name: {autotest_name}
88  autotest_args: --use_emerged {autotest_args}
89  iterations: {iterations}
90}}
91"""
92
93_IMAGE_INFO_TEMPLATE = """
94label: {label} {{
95  chromeos_image: {image}
96  board: {board}
97  remote: {remote}
98}}
99"""
100
101
102def identify_benchmarks_to_run(benchmark_info, iteration_nums, perf_keys):
103    """Identifies which benchmarks to run, and for how many iterations.
104
105    @param benchmark_info: A list of dictionaries containing information about
106        the complete set of default perf benchmarks to run.
107    @param iteration_nums: See output_benchmarks_info().
108    @param perf_keys: See output_benchmarks_info().
109
110    @return A tuple (X, Y), where X is a list of dictionaries containing
111        information about the set of benchmarks to run, and Y is the set of
112        perf keys requested to be run.
113    """
114    perf_keys_requested = set()
115    benchmarks_to_run = []
116    if not perf_keys:
117        # Run every benchmark for the specified number of iterations.
118        benchmarks_to_run = benchmark_info
119        for benchmark in benchmarks_to_run:
120            benchmark['iterations'] = iteration_nums[0]
121            for perf_key in benchmark['perf_keys']:
122                perf_keys_requested.add(perf_key)
123    else:
124        # Identify which benchmarks to run, and for how many iterations.
125        identified_benchmarks = {}
126        for i, perf_key in enumerate(perf_keys):
127            perf_keys_requested.add(perf_key)
128            benchmarks = [benchmark for benchmark in benchmark_info
129                          if perf_key in benchmark['perf_keys']]
130            if not benchmarks:
131                logging.error('Perf key "%s" isn\'t associated with a known '
132                              'benchmark.', perf_key)
133                sys.exit(1)
134            elif len(benchmarks) > 1:
135                logging.error('Perf key "%s" is associated with more than one '
136                              'benchmark, but should be unique.', perf_key)
137                sys.exit(1)
138            benchmark_to_add = benchmarks[0]
139            benchmark_to_add = identified_benchmarks.setdefault(
140                benchmark_to_add['benchmark'], benchmark_to_add)
141            if len(iteration_nums) == 1:
142                # If only a single iteration number is specified, we assume
143                # that applies to every benchmark.
144                benchmark_to_add['iterations'] = iteration_nums[0]
145            else:
146                # The user must have specified a separate iteration number for
147                # each perf key.  If the benchmark associated with the current
148                # perf key already has an interation number associated with it,
149                # choose the maximum of the two.
150                iter_num = iteration_nums[i]
151                if 'iterations' in benchmark_to_add:
152                    benchmark_to_add['iterations'] = (
153                        iter_num if iter_num > benchmark_to_add['iterations']
154                        else benchmark_to_add['iterations'])
155                else:
156                    benchmark_to_add['iterations'] = iter_num
157        benchmarks_to_run = list(identified_benchmarks.values())
158
159    return benchmarks_to_run, perf_keys_requested
160
161
162def output_benchmarks_info(f, iteration_nums, perf_keys):
163    """Identifies details of benchmarks to run, and writes that info to a file.
164
165    @param f: A file object that is writeable.
166    @param iteration_nums: A list of one or more integers representing the
167        number of iterations to run for one or more benchmarks.
168    @param perf_keys: A list of one or more string perf keys we need to
169        run, or None if we should use the complete set of default perf keys.
170
171    @return Set of perf keys actually requested to be run in the output file.
172    """
173    benchmark_info = []
174    with open(_BENCHMARK_INFO_FILE_NAME, 'r') as f_bench:
175        benchmark_info = json.load(f_bench)
176
177    benchmarks_to_run, perf_keys_requested = identify_benchmarks_to_run(
178        benchmark_info, iteration_nums, perf_keys)
179
180    for benchmark in benchmarks_to_run:
181        f.write(_BENCHMARK_INFO_TEMPLATE.format(
182                    benchmark=benchmark['benchmark'],
183                    autotest_name=benchmark['autotest_name'],
184                    autotest_args=benchmark.get('autotest_args', ''),
185                    iterations=benchmark['iterations']))
186
187    return perf_keys_requested
188
189
190def output_image_info(f, label, image, board, remote):
191    """Writes information about a given image to an output file.
192
193    @param f: A file object that is writeable.
194    @param label: A string label for the given image.
195    @param image: The string path to the image on disk.
196    @param board: The string board associated with the image.
197    @param remote: The string IP address on which to install the image.
198    """
199    f.write(_IMAGE_INFO_TEMPLATE.format(
200                label=label, image=image, board=board, remote=remote))
201
202
203def invoke_crosperf(crosperf_exe, result_dir, experiment_name, board_1, board_2,
204                    remote_1, remote_2, iteration_nums, perf_keys, image_1,
205                    image_2, image_1_name, image_2_name):
206    """Invokes crosperf with a set of benchmarks and waits for it to complete.
207
208    @param crosperf_exe: The string path to a crosperf executable.
209    @param result_dir: The string name of the directory in which crosperf is
210        expected to write its output.
211    @param experiment_name: A string name to give the crosperf invocation.
212    @param board_1: The string board associated with the first image.
213    @param board_2: The string board associated with the second image.
214    @param remote_1: The string IP address/name of the first remote device.
215    @param remote_2: The string IP address/name of the second remote device.
216    @param iteration_nums: A list of integers representing the number of
217        iterations to run for the different benchmarks.
218    @param perf_keys: A list of perf keys to run, or None to run the full set
219        of default perf benchmarks.
220    @param image_1: The string path to the first image.
221    @param image_2: The string path to the second image.
222    @param image_1_name: A string label to give the first image.
223    @param image_2_name: A string label to give the second image.
224
225    @return A tuple (X, Y), where X is the path to the created crosperf report
226        file, and Y is the set of perf keys actually requested to be run.
227    """
228    # Create experiment file for crosperf.
229    with open(_EXPERIMENT_FILE_NAME, 'w') as f:
230        f.write('name: {name}\n'.format(name=experiment_name))
231        perf_keys_requested = output_benchmarks_info(
232            f, iteration_nums, perf_keys)
233        output_image_info(f, image_1_name, image_1, board_1, remote_1)
234        output_image_info(f, image_2_name, image_2, board_2, remote_2)
235
236    # Invoke crosperf with the experiment file.
237    logging.info('Invoking crosperf with created experiment file...')
238    p = subprocess.Popen([crosperf_exe, _EXPERIMENT_FILE_NAME],
239                         stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
240
241    # Pass through crosperf output as debug messages until crosperf run is
242    # complete.
243    while True:
244        next_line = p.stdout.readline().strip()
245        if not next_line and p.poll() != None:
246            break
247        logging.debug(next_line)
248        sys.stdout.flush()
249    p.communicate()
250    exit_code = p.returncode
251
252    if exit_code:
253        logging.error('Crosperf returned exit code %s', exit_code)
254        sys.exit(1)
255
256    report_file = os.path.join(result_dir, 'results.html')
257    if not os.path.exists(report_file):
258        logging.error('Crosperf report file missing, cannot proceed.')
259        sys.exit(1)
260
261    logging.info('Crosperf run complete.')
262    logging.info('Crosperf results available in "%s"', result_dir)
263    return report_file, perf_keys_requested
264
265
266def parse_crosperf_report_file(report_file, perf_keys_requested):
267    """Reads in and parses a crosperf report file for relevant perf data.
268
269    @param report_file: See generate_results().
270    @param perf_keys_requested: See generate_results().
271
272    @return A dictionary containing perf information extracted from the crosperf
273        report file.
274    """
275    results = {}
276    with open(report_file, 'r') as f:
277        contents = f.read()
278
279        match = re.search(r'summary-tsv.+?/pre', contents, flags=re.DOTALL)
280        contents = match.group(0)
281
282        curr_benchmark = None
283        for line in contents.splitlines():
284            delimiter = r'\s+?'
285            match = re.search(
286                r'Benchmark:%s(?P<benchmark>\w+?);%sIterations:%s'
287                 '(?P<iterations>\w+?)\s' % (delimiter, delimiter, delimiter),
288                line)
289            if match:
290                curr_benchmark = match.group('benchmark')
291                iterations = match.group('iterations')
292                results[curr_benchmark] = {'iterations': iterations,
293                                           'p_values': []}
294                continue
295            split = line.strip().split(_CROSPERF_REPORT_LINE_DELIMITER)
296            if (len(split) == 12 and split[-2] == '--' and
297                split[0] not in ['retval', 'iterations'] and
298                split[0] in perf_keys_requested):
299                results[curr_benchmark]['p_values'].append(
300                    (split[0], split[-1]))
301
302    return results
303
304
305def generate_results(report_file, result_file, perf_keys_requested):
306    """Output relevant crosperf results to a CSV file, and to stdout.
307
308    This code parses the "results.html" output file of crosperf. It then creates
309    a CSV file that has the following format per line:
310
311    benchmark_name,num_iterations,perf_key,p_value[,perf_key,p_value]
312
313    @param report_file: The string name of the report file created by crosperf.
314    @param result_file: A string name for the CSV file to output.
315    @param perf_keys_requested: The set of perf keys originally requested to be
316        run.
317    """
318    results = parse_crosperf_report_file(report_file, perf_keys_requested)
319
320    # Output p-value data to a CSV file.
321    with open(result_file, 'w') as f:
322        for bench in results:
323            perf_key_substring = ','.join(
324                ['%s,%s' % (x[0], x[1]) for x in results[bench]['p_values']])
325            f.write('%s,%s,%s\n' % (
326                bench, results[bench]['iterations'], perf_key_substring))
327
328    logging.info('P-value results available in "%s"', result_file)
329
330    # Collect and output some additional summary results to stdout.
331    small_p_value = []
332    nan_p_value = []
333    perf_keys_obtained = set()
334    for benchmark in results:
335        p_values = results[benchmark]['p_values']
336        for key, p_val in p_values:
337            perf_keys_obtained.add(key)
338            if float(p_val) <= 0.05:
339                small_p_value.append((benchmark, key, p_val))
340            elif math.isnan(float(p_val)):
341                nan_p_value.append((benchmark, key, p_val))
342
343    if small_p_value:
344        logging.info('The following perf keys showed statistically significant '
345             'result differences (p-value <= 0.05):')
346        for item in small_p_value:
347            logging.info('* [%s] %s (p-value %s)', item[0], item[1], item[2])
348    else:
349        logging.info('No perf keys showed statistically significant result '
350                     'differences (p-value <= 0.05)')
351
352    if nan_p_value:
353        logging.info('The following perf keys had "NaN" p-values:')
354        for item in nan_p_value:
355            logging.info('* [%s] %s (p-value %s)', item[0], item[1], item[2])
356
357    # Check if any perf keys are missing from what was requested, and notify
358    # the user if so.
359    for key_requested in perf_keys_requested:
360        if key_requested not in perf_keys_obtained:
361            logging.warning('Could not find results for requested perf key '
362                            '"%s".', key_requested)
363
364
365def parse_options():
366    """Parses command-line arguments."""
367    parser = optparse.OptionParser()
368
369    parser.add_option('--crosperf', metavar='PATH', type='string', default=None,
370                      help='Absolute path to the crosperf executable '
371                           '(required).')
372    parser.add_option('--image-1', metavar='PATH', type='string', default=None,
373                      help='Absolute path to the first image .bin file '
374                           '(required).')
375    parser.add_option('--image-2', metavar='PATH', type='string', default=None,
376                      help='Absolute path to the second image .bin file '
377                           '(required).')
378
379    board_group = optparse.OptionGroup(
380        parser, 'Specifying the boards (required)')
381    board_group.add_option('--board', metavar='BOARD', type='string',
382                           default=None,
383                           help='Name of the board associated with the images, '
384                                'if both images have the same board. If each '
385                                'image has a different board, use '
386                                'options --board-1 and --board-2 instead.')
387    board_group.add_option('--board-1', metavar='BOARD', type='string',
388                           default=None,
389                           help='Board associated with the first image.')
390    board_group.add_option('--board-2', metavar='BOARD', type='string',
391                           default=None,
392                           help='Board associated with the second image.')
393    parser.add_option_group(board_group)
394
395    remote_group = optparse.OptionGroup(
396        parser, 'Specifying the remote devices (required)')
397    remote_group.add_option('--remote', metavar='IP', type='string',
398                            default=None,
399                            help='IP address/name of remote device to use, if '
400                                 'only one physical device is to be used. If '
401                                 'using two devices, use options --remote-1 '
402                                 'and --remote-2 instead.')
403    remote_group.add_option('--remote-1', metavar='IP', type='string',
404                            default=None,
405                            help='IP address/name of first device to use.')
406    remote_group.add_option('--remote-2', metavar='IP', type='string',
407                            default=None,
408                            help='IP address/name of second device to use.')
409    parser.add_option_group(remote_group)
410
411    optional_group = optparse.OptionGroup(parser, 'Optional settings')
412    optional_group.add_option('--image-1-name', metavar='NAME', type='string',
413                              default=_IMAGE_1_NAME,
414                              help='Descriptive name for the first image. '
415                                   'Defaults to "%default".')
416    optional_group.add_option('--image-2-name', metavar='NAME', type='string',
417                              default=_IMAGE_2_NAME,
418                              help='Descriptive name for the second image. '
419                                    'Defaults to "%default".')
420    optional_group.add_option('--experiment-name', metavar='NAME',
421                              type='string', default=_DEFAULT_EXPERIMENT_NAME,
422                              help='A descriptive name for the performance '
423                                   'comparison experiment to run. Defaults to '
424                                   '"%default".')
425    optional_group.add_option('--perf-keys', metavar='KEY1[,KEY2...]',
426                              type='string', default=None,
427                              help='Comma-separated list of perf keys to '
428                                   'evaluate, if you do not want to run the '
429                                   'complete set. By default, will evaluate '
430                                   'with the complete set of perf keys.')
431    optional_group.add_option('--iterations', metavar='N1[,N2...]',
432                              type='string', default=str(_ITERATIONS),
433                              help='Number of iterations to use to evaluate '
434                                   'each perf key (defaults to %default). If '
435                                   'specifying a custom list of perf keys '
436                                   '(with --perf-keys) and you want to have a '
437                                   'different number of iterations for each '
438                                   'perf key, specify a comma-separated list '
439                                   'of iteration numbers where N1 corresponds '
440                                   'to KEY1, N2 corresponds to KEY2, etc.')
441    optional_group.add_option('-v', '--verbose', action='store_true',
442                              default=False, help='Use verbose logging.')
443    parser.add_option_group(optional_group)
444
445    options, _ = parser.parse_args()
446    return options
447
448
449def verify_command_line_options(options, iteration_nums, perf_keys):
450    """Verifies there are no errors in the specified command-line options.
451
452    @param options: An optparse.Options object.
453    @param iteration_nums: An array of numbers representing the number of
454        iterations to perform to evaluate each perf key.
455    @param perf_keys: A list of strings representing perf keys to evaluate, or
456        None if no particular perf keys are specified.
457
458    @return True, if there were no errors in the command-line options, or
459        False if any error was detected.
460    """
461    success = True
462    if not options.crosperf:
463        logging.error('You must specify the path to a crosperf executable.')
464        success = False
465    if options.crosperf and not os.path.isfile(options.crosperf):
466        logging.error('Could not locate crosperf executable "%s".',
467                      options.crosperf)
468        if options.crosperf.startswith('/google'):
469            logging.error('Did you remember to run prodaccess?')
470        success = False
471    if not options.image_1 or not options.image_2:
472        logging.error('You must specify the paths for 2 image .bin files.')
473        success = False
474    if not options.board and (not options.board_1 or not options.board_2):
475        logging.error('You must specify the board name(s): either a single '
476                      'board with --board, or else two board names with '
477                      '--board-1 and --board-2.')
478        success = False
479    if options.board and options.board_1 and options.board_2:
480        logging.error('Specify either one board with --board, or two boards '
481                      'with --board-1 and --board-2, but not both.')
482        success = False
483    if not options.remote and (not options.remote_1 or not options.remote_2):
484        logging.error('You must specify the remote device(s) to use: either a '
485                      'single device with --remote, or else two devices with '
486                      '--remote-1 and --remote-2.')
487        success = False
488    if options.remote and options.remote_1 and options.remote_2:
489        logging.error('Specify either one remote device with --remote, or two '
490                      'devices with --remote-1 and --remote-2, but not both.')
491        success = False
492    if len(iteration_nums) > 1 and not perf_keys:
493        logging.error('You should only specify multiple iteration numbers '
494                      'if you\'re specifying a custom list of perf keys to '
495                      'evaluate.')
496        success = False
497    if (options.perf_keys and len(iteration_nums) > 1 and
498        len(options.perf_keys.split(',')) > len(iteration_nums)):
499        logging.error('You specified %d custom perf keys, but only %d '
500                      'iteration numbers.', len(options.perf_keys.split(',')),
501                      len(iteration_nums))
502        success = False
503    return success
504
505
506def main():
507    """Main script logic."""
508    options = parse_options()
509
510    log_level = logging.DEBUG if options.verbose else logging.INFO
511    logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s',
512                        level=log_level)
513
514    iteration_nums = [int(i) for i in options.iterations.split(',')]
515    perf_keys = options.perf_keys.split(',') if options.perf_keys else None
516
517    # Verify there are no errors in the specified command-line options.
518    if not verify_command_line_options(options, iteration_nums, perf_keys):
519        return 1
520
521    # Clean up any old results that will be overwritten.
522    result_dir = options.experiment_name + '_results'
523    if os.path.isdir(result_dir):
524        shutil.rmtree(result_dir)
525    result_file = options.experiment_name + '_results.csv'
526    if os.path.isfile(result_file):
527        os.remove(result_file)
528
529    if options.remote:
530        remote_1, remote_2 = options.remote, options.remote
531    else:
532        remote_1, remote_2 = options.remote_1, options.remote_2
533
534    if options.board:
535        board_1, board_2 = options.board, options.board
536    else:
537        board_1, board_2 = options.board_1, options.board_2
538
539    report_file, perf_keys_requested = invoke_crosperf(
540        options.crosperf, result_dir, options.experiment_name, board_1, board_2,
541        remote_1, remote_2, iteration_nums, perf_keys, options.image_1,
542        options.image_2, options.image_1_name, options.image_2_name)
543    generate_results(report_file, result_file, perf_keys_requested)
544
545    return 0
546
547
548if __name__ == '__main__':
549    sys.exit(main())
550