1#!/usr/bin/env python 2# Copyright 2016 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Tests for results_stats.""" 7 8import os 9import sys 10 11import unittest 12 13try: 14 import numpy as np 15except ImportError: 16 np = None 17 18sys.path.insert(1, os.path.abspath(os.path.join(os.path.dirname(__file__), 19 '..'))) 20from statistical_analysis import results_stats 21 22 23class StatisticalBenchmarkResultsAnalysisTest(unittest.TestCase): 24 """Unit testing of several functions in results_stats.""" 25 26 def testGetChartsFromBenchmarkResultJson(self): 27 """Unit test for errors raised when getting the charts element. 28 29 Also makes sure that the 'trace' element is deleted if it exists. 30 """ 31 input_json_wrong_format = {'charts_wrong': {}} 32 input_json_empty = {'charts': {}} 33 with self.assertRaises(ValueError): 34 (results_stats.GetChartsFromBenchmarkResultJson(input_json_wrong_format)) 35 with self.assertRaises(ValueError): 36 (results_stats.GetChartsFromBenchmarkResultJson(input_json_empty)) 37 38 input_json_with_trace = {'charts': 39 {'trace': {}, 40 'Ex_metric_1': 41 {'Ex_page_1': {'type': 'list_of_scalar_values', 42 'values': [1, 2]}, 43 'Ex_page_2': {'type': 'histogram', 44 'values': [1, 2]}}, 45 'Ex_metric_2': 46 {'Ex_page_1': {'type': 'list_of_scalar_values'}, 47 'Ex_page_2': {'type': 'list_of_scalar_values', 48 'values': [1, 2]}}}} 49 50 output = (results_stats. 51 GetChartsFromBenchmarkResultJson(input_json_with_trace)) 52 expected_output = {'Ex_metric_1': 53 {'Ex_page_1': {'type': 'list_of_scalar_values', 54 'values': [1, 2]}}, 55 'Ex_metric_2': 56 {'Ex_page_2': {'type': 'list_of_scalar_values', 57 'values': [1, 2]}}} 58 self.assertEqual(output, expected_output) 59 60 def testCreateBenchmarkResultDict(self): 61 """Unit test for benchmark result dict created from a benchmark json. 62 63 Creates a json of the format created by tools/perf/run_benchmark and then 64 compares the output dict against an expected predefined output dict. 65 """ 66 metric_names = ['messageloop_start_time', 67 'open_tabs_time', 68 'window_display_time'] 69 metric_values = [[55, 72, 60], [54, 42, 65], [44, 89]] 70 71 input_json = {'charts': {}} 72 for metric, metric_vals in zip(metric_names, metric_values): 73 input_json['charts'][metric] = {'summary': 74 {'values': metric_vals, 75 'type': 'list_of_scalar_values'}} 76 77 output = results_stats.CreateBenchmarkResultDict(input_json) 78 expected_output = {'messageloop_start_time': [55, 72, 60], 79 'open_tabs_time': [54, 42, 65], 80 'window_display_time': [44, 89]} 81 82 self.assertEqual(output, expected_output) 83 84 def testCreatePagesetBenchmarkResultDict(self): 85 """Unit test for pageset benchmark result dict created from benchmark json. 86 87 Creates a json of the format created by tools/perf/run_benchmark when it 88 includes a pageset and then compares the output dict against an expected 89 predefined output dict. 90 """ 91 metric_names = ['messageloop_start_time', 92 'open_tabs_time', 93 'window_display_time'] 94 metric_values = [[55, 72, 60], [54, 42, 65], [44, 89]] 95 page_names = ['Ex_page_1', 'Ex_page_2'] 96 97 input_json = {'charts': {}} 98 for metric, metric_vals in zip(metric_names, metric_values): 99 input_json['charts'][metric] = {'summary': 100 {'values': [0, 1, 2, 3], 101 'type': 'list_of_scalar_values'}} 102 for page in page_names: 103 input_json['charts'][metric][page] = {'values': metric_vals, 104 'type': 'list_of_scalar_values'} 105 106 output = results_stats.CreatePagesetBenchmarkResultDict(input_json) 107 expected_output = {'messageloop_start_time': {'Ex_page_1': [55, 72, 60], 108 'Ex_page_2': [55, 72, 60]}, 109 'open_tabs_time': {'Ex_page_1': [54, 42, 65], 110 'Ex_page_2': [54, 42, 65]}, 111 'window_display_time': {'Ex_page_1': [44, 89], 112 'Ex_page_2': [44, 89]}} 113 114 self.assertEqual(output, expected_output) 115 116 def testCombinePValues(self): 117 """Unit test for Fisher's Method that combines multiple p-values.""" 118 test_p_values = [0.05, 0.04, 0.10, 0.07, 0.01] 119 120 expected_output = 0.00047334256271885721 121 output = results_stats.CombinePValues(test_p_values) 122 123 self.assertEqual(output, expected_output) 124 125 def CreateRandomNormalDistribution(self, mean=0, size=30): 126 """Creates two pseudo random samples for testing in multiple methods.""" 127 if not np: 128 raise ImportError('This function requires Numpy.') 129 130 np.random.seed(0) 131 sample = np.random.normal(loc=mean, scale=1, size=size) 132 133 return sample 134 135 def testIsNormallyDistributed(self): 136 """Unit test for values returned when testing for normality.""" 137 if not np: 138 self.skipTest("Numpy is not installed.") 139 140 test_samples = [self.CreateRandomNormalDistribution(0), 141 self.CreateRandomNormalDistribution(1)] 142 143 expected_outputs = [(True, 0.5253966450691223), 144 (True, 0.5253913402557373)] 145 for sample, expected_output in zip(test_samples, expected_outputs): 146 output = results_stats.IsNormallyDistributed(sample) 147 148 self.assertEqual(output, expected_output) 149 150 def testAreSamplesDifferent(self): 151 """Unit test for values returned after running the statistical tests. 152 153 Creates two pseudo-random normally distributed samples to run the 154 statistical tests and compares the resulting answer and p-value against 155 their pre-calculated values. 156 """ 157 test_samples = [3 * [0, 0, 2, 4, 4], 3 * [5, 5, 7, 9, 9]] 158 with self.assertRaises(results_stats.SampleSizeError): 159 results_stats.AreSamplesDifferent(test_samples[0], test_samples[1], 160 test=results_stats.MANN) 161 with self.assertRaises(results_stats.NonNormalSampleError): 162 results_stats.AreSamplesDifferent(test_samples[0], test_samples[1], 163 test=results_stats.WELCH) 164 165 test_samples_equal = (20 * [1], 20 * [1]) 166 expected_output_equal = (False, 1.0) 167 output_equal = results_stats.AreSamplesDifferent(test_samples_equal[0], 168 test_samples_equal[1], 169 test=results_stats.MANN) 170 self.assertEqual(output_equal, expected_output_equal) 171 172 if not np: 173 self.skipTest("Numpy is not installed.") 174 175 test_samples = [self.CreateRandomNormalDistribution(0), 176 self.CreateRandomNormalDistribution(1)] 177 test_options = results_stats.ALL_TEST_OPTIONS 178 179 expected_outputs = [(True, 2 * 0.00068516628052438266), 180 (True, 0.0017459498829507842), 181 (True, 0.00084765230478226514)] 182 183 for test, expected_output in zip(test_options, expected_outputs): 184 output = results_stats.AreSamplesDifferent(test_samples[0], 185 test_samples[1], 186 test=test) 187 self.assertEqual(output, expected_output) 188 189 def testAssertThatKeysMatch(self): 190 """Unit test for exception raised when input dicts' metrics don't match.""" 191 differing_input_dicts = [{'messageloop_start_time': [55, 72, 60], 192 'display_time': [44, 89]}, 193 {'messageloop_start_time': [55, 72, 60]}] 194 with self.assertRaises(results_stats.DictMismatchError): 195 results_stats.AssertThatKeysMatch(differing_input_dicts[0], 196 differing_input_dicts[1]) 197 198 def testAreBenchmarkResultsDifferent(self): 199 """Unit test for statistical test outcome dict.""" 200 test_input_dicts = [{'open_tabs_time': 201 self.CreateRandomNormalDistribution(0), 202 'display_time': 203 self.CreateRandomNormalDistribution(0)}, 204 {'open_tabs_time': 205 self.CreateRandomNormalDistribution(0), 206 'display_time': 207 self.CreateRandomNormalDistribution(1)}] 208 test_options = results_stats.ALL_TEST_OPTIONS 209 210 expected_outputs = [{'open_tabs_time': (False, 2 * 0.49704973080841425), 211 'display_time': (True, 2 * 0.00068516628052438266)}, 212 {'open_tabs_time': (False, 1.0), 213 'display_time': (True, 0.0017459498829507842)}, 214 {'open_tabs_time': (False, 1.0), 215 'display_time': (True, 0.00084765230478226514)}] 216 217 for test, expected_output in zip(test_options, expected_outputs): 218 output = results_stats.AreBenchmarkResultsDifferent(test_input_dicts[0], 219 test_input_dicts[1], 220 test=test) 221 self.assertEqual(output, expected_output) 222 223 def testArePagesetBenchmarkResultsDifferent(self): 224 """Unit test for statistical test outcome dict.""" 225 distributions = (self.CreateRandomNormalDistribution(0), 226 self.CreateRandomNormalDistribution(1)) 227 test_input_dicts = ({'open_tabs_time': {'Ex_page_1': distributions[0], 228 'Ex_page_2': distributions[0]}, 229 'display_time': {'Ex_page_1': distributions[1], 230 'Ex_page_2': distributions[1]}}, 231 {'open_tabs_time': {'Ex_page_1': distributions[0], 232 'Ex_page_2': distributions[1]}, 233 'display_time': {'Ex_page_1': distributions[1], 234 'Ex_page_2': distributions[0]}}) 235 test_options = results_stats.ALL_TEST_OPTIONS 236 237 expected_outputs = ({'open_tabs_time': # Mann. 238 {'Ex_page_1': (False, 2 * 0.49704973080841425), 239 'Ex_page_2': (True, 2 * 0.00068516628052438266)}, 240 'display_time': 241 {'Ex_page_1': (False, 2 * 0.49704973080841425), 242 'Ex_page_2': (True, 2 * 0.00068516628052438266)}}, 243 {'open_tabs_time': # Kolmogorov. 244 {'Ex_page_1': (False, 1.0), 245 'Ex_page_2': (True, 0.0017459498829507842)}, 246 'display_time': 247 {'Ex_page_1': (False, 1.0), 248 'Ex_page_2': (True, 0.0017459498829507842)}}, 249 {'open_tabs_time': # Welch. 250 {'Ex_page_1': (False, 1.0), 251 'Ex_page_2': (True, 0.00084765230478226514)}, 252 'display_time': 253 {'Ex_page_1': (False, 1.0), 254 'Ex_page_2': (True, 0.00084765230478226514)}}) 255 256 for test, expected_output in zip(test_options, expected_outputs): 257 output = (results_stats. 258 ArePagesetBenchmarkResultsDifferent(test_input_dicts[0], 259 test_input_dicts[1], 260 test=test)) 261 self.assertEqual(output, expected_output) 262 263 264if __name__ == '__main__': 265 sys.exit(unittest.main()) 266