1#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Script to compare the performance of two different chromeOS builds.
7
8This script is meant to be used when the performance impact of a change in
9chromeOS needs to be analyzed. It requires that you have already created two
10chromeOS test images (one with the change, and one without), and that you have
11at least one device available on which to run performance tests.
12
13This script is actually a light-weight wrapper around crosperf, a tool for
14automatically imaging one or more chromeOS devices with particular builds,
15running a set of tests on those builds, and then notifying the user of test
16results (along with some statistical analysis of perf keyvals). This wrapper
17script performs the following tasks:
18
191) Creates a crosperf "experiment" file to be consumed by crosperf.
202) Invokes crosperf using the created experiment file. Crosperf produces 2
21outputs: an e-mail that is sent to the user who invoked it; and an output
22folder that is named based on the given --experiment-name, which is created in
23the directory in which this script was run.
243) Parses the results of crosperf and outputs a summary of relevant data. This
25script produces output in a CSV file, as well as in stdout.
26
27Before running this script for the first time, you should set up your system to
28run sudo without prompting for a password (otherwise, crosperf prompts for a
29sudo password). You should only have to do that once per host machine.
30
31Once you're set up with passwordless sudo, you can run the script (preferably
32from an empty directory, since several output files are produced):
33
34> python perf_compare.py --crosperf=CROSPERF_EXE --image-1=IMAGE_1 \
35  --image-2=IMAGE_2 --board-1=BOARD_1 --board-2=BOARD_2 --remote-1=REMOTE_1 \
36  --remote-2=REMOTE_2
37
38You'll need to specify the following inputs: the full path to the crosperf
39executable; the absolute paths to 2 locally-built chromeOS images (which must
40reside in the "typical location" relative to the chroot, as required by
41crosperf); the name of the boards associated with the 2 images (if both images
42have the same board, you can specify that single board with --board=BOARD); and
43the IP addresses of the 2 remote devices on which to run crosperf (if you have
44only a single device available, specify it with --remote=REMOTE). Run with -h to
45see the full set of accepted command-line arguments.
46
47Notes:
48
491) When you run this script, it will delete any previously-created crosperf
50output directories and created CSV files based on the specified
51--experiment-name.  If you don't want to lose any old crosperf/CSV data, either
52move it to another location, or run this script with a different
53--experiment-name.
542) This script will only run the benchmarks and process the perf keys specified
55in the file "perf_benchmarks.json".  Some benchmarks output more perf keys than
56what are specified in perf_benchmarks.json, and these will appear in the
57crosperf outputs, but not in the outputs produced specifically by this script.
58"""
59
60
61import json
62import logging
63import math
64import optparse
65import os
66import re
67import shutil
68import subprocess
69import sys
70
71
72_ITERATIONS = 5
73_IMAGE_1_NAME = 'Image1'
74_IMAGE_2_NAME = 'Image2'
75_DEFAULT_EXPERIMENT_NAME = 'perf_comparison'
76_ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
77_BENCHMARK_INFO_FILE_NAME = os.path.join(_ROOT_DIR, 'perf_benchmarks.json')
78_CROSPERF_REPORT_LINE_DELIMITER = '\t'
79_EXPERIMENT_FILE_NAME = 'experiment.txt'
80
81_BENCHMARK_INFO_TEMPLATE = """
82benchmark: {benchmark} {{
83  autotest_name: {autotest_name}
84  autotest_args: --use_emerged {autotest_args}
85  iterations: {iterations}
86}}
87"""
88
89_IMAGE_INFO_TEMPLATE = """
90label: {label} {{
91  chromeos_image: {image}
92  board: {board}
93  remote: {remote}
94}}
95"""
96
97
98def prompt_for_input(prompt_message):
99    """Prompts for user input and returns the inputted text as a string."""
100    return raw_input('%s:> ' % prompt_message)
101
102
103def identify_benchmarks_to_run(benchmark_info, iteration_nums, perf_keys):
104    """Identifies which benchmarks to run, and for how many iterations.
105
106    @param benchmark_info: A list of dictionaries containing information about
107        the complete set of default perf benchmarks to run.
108    @param iteration_nums: See output_benchmarks_info().
109    @param perf_keys: See output_benchmarks_info().
110
111    @return A tuple (X, Y), where X is a list of dictionaries containing
112        information about the set of benchmarks to run, and Y is the set of
113        perf keys requested to be run.
114    """
115    perf_keys_requested = set()
116    benchmarks_to_run = []
117    if not perf_keys:
118        # Run every benchmark for the specified number of iterations.
119        benchmarks_to_run = benchmark_info
120        for benchmark in benchmarks_to_run:
121            benchmark['iterations'] = iteration_nums[0]
122            for perf_key in benchmark['perf_keys']:
123                perf_keys_requested.add(perf_key)
124    else:
125        # Identify which benchmarks to run, and for how many iterations.
126        identified_benchmarks = {}
127        for i, perf_key in enumerate(perf_keys):
128            perf_keys_requested.add(perf_key)
129            benchmarks = [benchmark for benchmark in benchmark_info
130                          if perf_key in benchmark['perf_keys']]
131            if not benchmarks:
132                logging.error('Perf key "%s" isn\'t associated with a known '
133                              'benchmark.', perf_key)
134                sys.exit(1)
135            elif len(benchmarks) > 1:
136                logging.error('Perf key "%s" is associated with more than one '
137                              'benchmark, but should be unique.', perf_key)
138                sys.exit(1)
139            benchmark_to_add = benchmarks[0]
140            benchmark_to_add = identified_benchmarks.setdefault(
141                benchmark_to_add['benchmark'], benchmark_to_add)
142            if len(iteration_nums) == 1:
143                # If only a single iteration number is specified, we assume
144                # that applies to every benchmark.
145                benchmark_to_add['iterations'] = iteration_nums[0]
146            else:
147                # The user must have specified a separate iteration number for
148                # each perf key.  If the benchmark associated with the current
149                # perf key already has an interation number associated with it,
150                # choose the maximum of the two.
151                iter_num = iteration_nums[i]
152                if 'iterations' in benchmark_to_add:
153                    benchmark_to_add['iterations'] = (
154                        iter_num if iter_num > benchmark_to_add['iterations']
155                        else benchmark_to_add['iterations'])
156                else:
157                    benchmark_to_add['iterations'] = iter_num
158        benchmarks_to_run = identified_benchmarks.values()
159
160    return benchmarks_to_run, perf_keys_requested
161
162
163def output_benchmarks_info(f, iteration_nums, perf_keys):
164    """Identifies details of benchmarks to run, and writes that info to a file.
165
166    @param f: A file object that is writeable.
167    @param iteration_nums: A list of one or more integers representing the
168        number of iterations to run for one or more benchmarks.
169    @param perf_keys: A list of one or more string perf keys we need to
170        run, or None if we should use the complete set of default perf keys.
171
172    @return Set of perf keys actually requested to be run in the output file.
173    """
174    benchmark_info = []
175    with open(_BENCHMARK_INFO_FILE_NAME, 'r') as f_bench:
176        benchmark_info = json.load(f_bench)
177
178    benchmarks_to_run, perf_keys_requested = identify_benchmarks_to_run(
179        benchmark_info, iteration_nums, perf_keys)
180
181    for benchmark in benchmarks_to_run:
182        f.write(_BENCHMARK_INFO_TEMPLATE.format(
183                    benchmark=benchmark['benchmark'],
184                    autotest_name=benchmark['autotest_name'],
185                    autotest_args=benchmark.get('autotest_args', ''),
186                    iterations=benchmark['iterations']))
187
188    return perf_keys_requested
189
190
191def output_image_info(f, label, image, board, remote):
192    """Writes information about a given image to an output file.
193
194    @param f: A file object that is writeable.
195    @param label: A string label for the given image.
196    @param image: The string path to the image on disk.
197    @param board: The string board associated with the image.
198    @param remote: The string IP address on which to install the image.
199    """
200    f.write(_IMAGE_INFO_TEMPLATE.format(
201                label=label, image=image, board=board, remote=remote))
202
203
204def invoke_crosperf(crosperf_exe, result_dir, experiment_name, board_1, board_2,
205                    remote_1, remote_2, iteration_nums, perf_keys, image_1,
206                    image_2, image_1_name, image_2_name):
207    """Invokes crosperf with a set of benchmarks and waits for it to complete.
208
209    @param crosperf_exe: The string path to a crosperf executable.
210    @param result_dir: The string name of the directory in which crosperf is
211        expected to write its output.
212    @param experiment_name: A string name to give the crosperf invocation.
213    @param board_1: The string board associated with the first image.
214    @param board_2: The string board associated with the second image.
215    @param remote_1: The string IP address/name of the first remote device.
216    @param remote_2: The string IP address/name of the second remote device.
217    @param iteration_nums: A list of integers representing the number of
218        iterations to run for the different benchmarks.
219    @param perf_keys: A list of perf keys to run, or None to run the full set
220        of default perf benchmarks.
221    @param image_1: The string path to the first image.
222    @param image_2: The string path to the second image.
223    @param image_1_name: A string label to give the first image.
224    @param image_2_name: A string label to give the second image.
225
226    @return A tuple (X, Y), where X is the path to the created crosperf report
227        file, and Y is the set of perf keys actually requested to be run.
228    """
229    # Create experiment file for crosperf.
230    with open(_EXPERIMENT_FILE_NAME, 'w') as f:
231        f.write('name: {name}\n'.format(name=experiment_name))
232        perf_keys_requested = output_benchmarks_info(
233            f, iteration_nums, perf_keys)
234        output_image_info(f, image_1_name, image_1, board_1, remote_1)
235        output_image_info(f, image_2_name, image_2, board_2, remote_2)
236
237    # Invoke crosperf with the experiment file.
238    logging.info('Invoking crosperf with created experiment file...')
239    p = subprocess.Popen([crosperf_exe, _EXPERIMENT_FILE_NAME],
240                         stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
241
242    # Pass through crosperf output as debug messages until crosperf run is
243    # complete.
244    while True:
245        next_line = p.stdout.readline().strip()
246        if not next_line and p.poll() != None:
247            break
248        logging.debug(next_line)
249        sys.stdout.flush()
250    p.communicate()
251    exit_code = p.returncode
252
253    if exit_code:
254        logging.error('Crosperf returned exit code %s', exit_code)
255        sys.exit(1)
256
257    report_file = os.path.join(result_dir, 'results.html')
258    if not os.path.exists(report_file):
259        logging.error('Crosperf report file missing, cannot proceed.')
260        sys.exit(1)
261
262    logging.info('Crosperf run complete.')
263    logging.info('Crosperf results available in "%s"', result_dir)
264    return report_file, perf_keys_requested
265
266
267def parse_crosperf_report_file(report_file, perf_keys_requested):
268    """Reads in and parses a crosperf report file for relevant perf data.
269
270    @param report_file: See generate_results().
271    @param perf_keys_requested: See generate_results().
272
273    @return A dictionary containing perf information extracted from the crosperf
274        report file.
275    """
276    results = {}
277    with open(report_file, 'r') as f:
278        contents = f.read()
279
280        match = re.search(r'summary-tsv.+?/pre', contents, flags=re.DOTALL)
281        contents = match.group(0)
282
283        curr_benchmark = None
284        for line in contents.splitlines():
285            delimiter = r'\s+?'
286            match = re.search(
287                r'Benchmark:%s(?P<benchmark>\w+?);%sIterations:%s'
288                 '(?P<iterations>\w+?)\s' % (delimiter, delimiter, delimiter),
289                line)
290            if match:
291                curr_benchmark = match.group('benchmark')
292                iterations = match.group('iterations')
293                results[curr_benchmark] = {'iterations': iterations,
294                                           'p_values': []}
295                continue
296            split = line.strip().split(_CROSPERF_REPORT_LINE_DELIMITER)
297            if (len(split) == 12 and split[-2] == '--' and
298                split[0] not in ['retval', 'iterations'] and
299                split[0] in perf_keys_requested):
300                results[curr_benchmark]['p_values'].append(
301                    (split[0], split[-1]))
302
303    return results
304
305
306def generate_results(report_file, result_file, perf_keys_requested):
307    """Output relevant crosperf results to a CSV file, and to stdout.
308
309    This code parses the "results.html" output file of crosperf. It then creates
310    a CSV file that has the following format per line:
311
312    benchmark_name,num_iterations,perf_key,p_value[,perf_key,p_value]
313
314    @param report_file: The string name of the report file created by crosperf.
315    @param result_file: A string name for the CSV file to output.
316    @param perf_keys_requested: The set of perf keys originally requested to be
317        run.
318    """
319    results = parse_crosperf_report_file(report_file, perf_keys_requested)
320
321    # Output p-value data to a CSV file.
322    with open(result_file, 'w') as f:
323        for bench in results:
324            perf_key_substring = ','.join(
325                ['%s,%s' % (x[0], x[1]) for x in results[bench]['p_values']])
326            f.write('%s,%s,%s\n' % (
327                bench, results[bench]['iterations'], perf_key_substring))
328
329    logging.info('P-value results available in "%s"', result_file)
330
331    # Collect and output some additional summary results to stdout.
332    small_p_value = []
333    nan_p_value = []
334    perf_keys_obtained = set()
335    for benchmark in results:
336        p_values = results[benchmark]['p_values']
337        for key, p_val in p_values:
338            perf_keys_obtained.add(key)
339            if float(p_val) <= 0.05:
340                small_p_value.append((benchmark, key, p_val))
341            elif math.isnan(float(p_val)):
342                nan_p_value.append((benchmark, key, p_val))
343
344    if small_p_value:
345        logging.info('The following perf keys showed statistically significant '
346             'result differences (p-value <= 0.05):')
347        for item in small_p_value:
348            logging.info('* [%s] %s (p-value %s)', item[0], item[1], item[2])
349    else:
350        logging.info('No perf keys showed statistically significant result '
351                     'differences (p-value <= 0.05)')
352
353    if nan_p_value:
354        logging.info('The following perf keys had "NaN" p-values:')
355        for item in nan_p_value:
356            logging.info('* [%s] %s (p-value %s)', item[0], item[1], item[2])
357
358    # Check if any perf keys are missing from what was requested, and notify
359    # the user if so.
360    for key_requested in perf_keys_requested:
361        if key_requested not in perf_keys_obtained:
362            logging.warning('Could not find results for requested perf key '
363                            '"%s".', key_requested)
364
365
366def parse_options():
367    """Parses command-line arguments."""
368    parser = optparse.OptionParser()
369
370    parser.add_option('--crosperf', metavar='PATH', type='string', default=None,
371                      help='Absolute path to the crosperf executable '
372                           '(required).')
373    parser.add_option('--image-1', metavar='PATH', type='string', default=None,
374                      help='Absolute path to the first image .bin file '
375                           '(required).')
376    parser.add_option('--image-2', metavar='PATH', type='string', default=None,
377                      help='Absolute path to the second image .bin file '
378                           '(required).')
379
380    board_group = optparse.OptionGroup(
381        parser, 'Specifying the boards (required)')
382    board_group.add_option('--board', metavar='BOARD', type='string',
383                           default=None,
384                           help='Name of the board associated with the images, '
385                                'if both images have the same board. If each '
386                                'image has a different board, use '
387                                'options --board-1 and --board-2 instead.')
388    board_group.add_option('--board-1', metavar='BOARD', type='string',
389                           default=None,
390                           help='Board associated with the first image.')
391    board_group.add_option('--board-2', metavar='BOARD', type='string',
392                           default=None,
393                           help='Board associated with the second image.')
394    parser.add_option_group(board_group)
395
396    remote_group = optparse.OptionGroup(
397        parser, 'Specifying the remote devices (required)')
398    remote_group.add_option('--remote', metavar='IP', type='string',
399                            default=None,
400                            help='IP address/name of remote device to use, if '
401                                 'only one physical device is to be used. If '
402                                 'using two devices, use options --remote-1 '
403                                 'and --remote-2 instead.')
404    remote_group.add_option('--remote-1', metavar='IP', type='string',
405                            default=None,
406                            help='IP address/name of first device to use.')
407    remote_group.add_option('--remote-2', metavar='IP', type='string',
408                            default=None,
409                            help='IP address/name of second device to use.')
410    parser.add_option_group(remote_group)
411
412    optional_group = optparse.OptionGroup(parser, 'Optional settings')
413    optional_group.add_option('--image-1-name', metavar='NAME', type='string',
414                              default=_IMAGE_1_NAME,
415                              help='Descriptive name for the first image. '
416                                   'Defaults to "%default".')
417    optional_group.add_option('--image-2-name', metavar='NAME', type='string',
418                              default=_IMAGE_2_NAME,
419                              help='Descriptive name for the second image. '
420                                    'Defaults to "%default".')
421    optional_group.add_option('--experiment-name', metavar='NAME',
422                              type='string', default=_DEFAULT_EXPERIMENT_NAME,
423                              help='A descriptive name for the performance '
424                                   'comparison experiment to run. Defaults to '
425                                   '"%default".')
426    optional_group.add_option('--perf-keys', metavar='KEY1[,KEY2...]',
427                              type='string', default=None,
428                              help='Comma-separated list of perf keys to '
429                                   'evaluate, if you do not want to run the '
430                                   'complete set. By default, will evaluate '
431                                   'with the complete set of perf keys.')
432    optional_group.add_option('--iterations', metavar='N1[,N2...]',
433                              type='string', default=str(_ITERATIONS),
434                              help='Number of iterations to use to evaluate '
435                                   'each perf key (defaults to %default). If '
436                                   'specifying a custom list of perf keys '
437                                   '(with --perf-keys) and you want to have a '
438                                   'different number of iterations for each '
439                                   'perf key, specify a comma-separated list '
440                                   'of iteration numbers where N1 corresponds '
441                                   'to KEY1, N2 corresponds to KEY2, etc.')
442    optional_group.add_option('-v', '--verbose', action='store_true',
443                              default=False, help='Use verbose logging.')
444    parser.add_option_group(optional_group)
445
446    options, _ = parser.parse_args()
447    return options
448
449
450def verify_command_line_options(options, iteration_nums, perf_keys):
451    """Verifies there are no errors in the specified command-line options.
452
453    @param options: An optparse.Options object.
454    @param iteration_nums: An array of numbers representing the number of
455        iterations to perform to evaluate each perf key.
456    @param perf_keys: A list of strings representing perf keys to evaluate, or
457        None if no particular perf keys are specified.
458
459    @return True, if there were no errors in the command-line options, or
460        False if any error was detected.
461    """
462    success = True
463    if not options.crosperf:
464        logging.error('You must specify the path to a crosperf executable.')
465        success = False
466    if options.crosperf and not os.path.isfile(options.crosperf):
467        logging.error('Could not locate crosperf executable "%s".',
468                      options.crosperf)
469        if options.crosperf.startswith('/google'):
470            logging.error('Did you remember to run prodaccess?')
471        success = False
472    if not options.image_1 or not options.image_2:
473        logging.error('You must specify the paths for 2 image .bin files.')
474        success = False
475    if not options.board and (not options.board_1 or not options.board_2):
476        logging.error('You must specify the board name(s): either a single '
477                      'board with --board, or else two board names with '
478                      '--board-1 and --board-2.')
479        success = False
480    if options.board and options.board_1 and options.board_2:
481        logging.error('Specify either one board with --board, or two boards '
482                      'with --board-1 and --board-2, but not both.')
483        success = False
484    if not options.remote and (not options.remote_1 or not options.remote_2):
485        logging.error('You must specify the remote device(s) to use: either a '
486                      'single device with --remote, or else two devices with '
487                      '--remote-1 and --remote-2.')
488        success = False
489    if options.remote and options.remote_1 and options.remote_2:
490        logging.error('Specify either one remote device with --remote, or two '
491                      'devices with --remote-1 and --remote-2, but not both.')
492        success = False
493    if len(iteration_nums) > 1 and not perf_keys:
494        logging.error('You should only specify multiple iteration numbers '
495                      'if you\'re specifying a custom list of perf keys to '
496                      'evaluate.')
497        success = False
498    if (options.perf_keys and len(iteration_nums) > 1 and
499        len(options.perf_keys.split(',')) > len(iteration_nums)):
500        logging.error('You specified %d custom perf keys, but only %d '
501                      'iteration numbers.', len(options.perf_keys.split(',')),
502                      len(iteration_nums))
503        success = False
504    return success
505
506
507def main():
508    options = parse_options()
509
510    log_level = logging.DEBUG if options.verbose else logging.INFO
511    logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s',
512                        level=log_level)
513
514    iteration_nums = [int(i) for i in options.iterations.split(',')]
515    perf_keys = options.perf_keys.split(',') if options.perf_keys else None
516
517    # Verify there are no errors in the specified command-line options.
518    if not verify_command_line_options(options, iteration_nums, perf_keys):
519        return 1
520
521    # Clean up any old results that will be overwritten.
522    result_dir = options.experiment_name + '_results'
523    if os.path.isdir(result_dir):
524        shutil.rmtree(result_dir)
525    result_file = options.experiment_name + '_results.csv'
526    if os.path.isfile(result_file):
527        os.remove(result_file)
528
529    if options.remote:
530        remote_1, remote_2 = options.remote, options.remote
531    else:
532        remote_1, remote_2 = options.remote_1, options.remote_2
533
534    if options.board:
535        board_1, board_2 = options.board, options.board
536    else:
537        board_1, board_2 = options.board_1, options.board_2
538
539    report_file, perf_keys_requested = invoke_crosperf(
540        options.crosperf, result_dir, options.experiment_name, board_1, board_2,
541        remote_1, remote_2, iteration_nums, perf_keys, options.image_1,
542        options.image_2, options.image_1_name, options.image_2_name)
543    generate_results(report_file, result_file, perf_keys_requested)
544
545    return 0
546
547
548if __name__ == '__main__':
549    sys.exit(main())
550