1#!/usr/bin/env python
2# Copyright 2016 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Tests for results_stats."""
7
8import os
9import sys
10
11import unittest
12
13try:
14  import numpy as np
15except ImportError:
16  np = None
17
18sys.path.insert(1, os.path.abspath(os.path.join(os.path.dirname(__file__),
19                                                '..')))
20from statistical_analysis import results_stats
21
22
23class StatisticalBenchmarkResultsAnalysisTest(unittest.TestCase):
24  """Unit testing of several functions in results_stats."""
25
26  def testGetChartsFromBenchmarkResultJson(self):
27    """Unit test for errors raised when getting the charts element.
28
29    Also makes sure that the 'trace' element is deleted if it exists.
30    """
31    input_json_wrong_format = {'charts_wrong': {}}
32    input_json_empty = {'charts': {}}
33    with self.assertRaises(ValueError):
34      (results_stats.GetChartsFromBenchmarkResultJson(input_json_wrong_format))
35    with self.assertRaises(ValueError):
36      (results_stats.GetChartsFromBenchmarkResultJson(input_json_empty))
37
38    input_json_with_trace = {'charts':
39                             {'trace': {},
40                              'Ex_metric_1':
41                              {'Ex_page_1': {'type': 'list_of_scalar_values',
42                                             'values': [1, 2]},
43                               'Ex_page_2': {'type': 'histogram',
44                                             'values': [1, 2]}},
45                              'Ex_metric_2':
46                              {'Ex_page_1': {'type': 'list_of_scalar_values'},
47                               'Ex_page_2': {'type': 'list_of_scalar_values',
48                                             'values': [1, 2]}}}}
49
50    output = (results_stats.
51              GetChartsFromBenchmarkResultJson(input_json_with_trace))
52    expected_output = {'Ex_metric_1':
53                       {'Ex_page_1': {'type': 'list_of_scalar_values',
54                                      'values': [1, 2]}},
55                       'Ex_metric_2':
56                       {'Ex_page_2': {'type': 'list_of_scalar_values',
57                                      'values': [1, 2]}}}
58    self.assertEqual(output, expected_output)
59
60  def testCreateBenchmarkResultDict(self):
61    """Unit test for benchmark result dict created from a benchmark json.
62
63    Creates a json of the format created by tools/perf/run_benchmark and then
64    compares the output dict against an expected predefined output dict.
65    """
66    metric_names = ['messageloop_start_time',
67                    'open_tabs_time',
68                    'window_display_time']
69    metric_values = [[55, 72, 60], [54, 42, 65], [44, 89]]
70
71    input_json = {'charts': {}}
72    for metric, metric_vals in zip(metric_names, metric_values):
73      input_json['charts'][metric] = {'summary':
74                                      {'values': metric_vals,
75                                       'type': 'list_of_scalar_values'}}
76
77    output = results_stats.CreateBenchmarkResultDict(input_json)
78    expected_output = {'messageloop_start_time': [55, 72, 60],
79                       'open_tabs_time': [54, 42, 65],
80                       'window_display_time': [44, 89]}
81
82    self.assertEqual(output, expected_output)
83
84  def testCreatePagesetBenchmarkResultDict(self):
85    """Unit test for pageset benchmark result dict created from benchmark json.
86
87    Creates a json of the format created by tools/perf/run_benchmark when it
88    includes a pageset and then compares the output dict against an expected
89    predefined output dict.
90    """
91    metric_names = ['messageloop_start_time',
92                    'open_tabs_time',
93                    'window_display_time']
94    metric_values = [[55, 72, 60], [54, 42, 65], [44, 89]]
95    page_names = ['Ex_page_1', 'Ex_page_2']
96
97    input_json = {'charts': {}}
98    for metric, metric_vals in zip(metric_names, metric_values):
99      input_json['charts'][metric] = {'summary':
100                                      {'values': [0, 1, 2, 3],
101                                       'type': 'list_of_scalar_values'}}
102      for page in page_names:
103        input_json['charts'][metric][page] = {'values': metric_vals,
104                                              'type': 'list_of_scalar_values'}
105
106    output = results_stats.CreatePagesetBenchmarkResultDict(input_json)
107    expected_output = {'messageloop_start_time': {'Ex_page_1': [55, 72, 60],
108                                                  'Ex_page_2': [55, 72, 60]},
109                       'open_tabs_time': {'Ex_page_1': [54, 42, 65],
110                                          'Ex_page_2': [54, 42, 65]},
111                       'window_display_time': {'Ex_page_1': [44, 89],
112                                               'Ex_page_2': [44, 89]}}
113
114    self.assertEqual(output, expected_output)
115
116  def testCombinePValues(self):
117    """Unit test for Fisher's Method that combines multiple p-values."""
118    test_p_values = [0.05, 0.04, 0.10, 0.07, 0.01]
119
120    expected_output = 0.00047334256271885721
121    output = results_stats.CombinePValues(test_p_values)
122
123    self.assertEqual(output, expected_output)
124
125  def CreateRandomNormalDistribution(self, mean=0, size=30):
126    """Creates two pseudo random samples for testing in multiple methods."""
127    if not np:
128      raise ImportError('This function requires Numpy.')
129
130    np.random.seed(0)
131    sample = np.random.normal(loc=mean, scale=1, size=size)
132
133    return sample
134
135  def testIsNormallyDistributed(self):
136    """Unit test for values returned when testing for normality."""
137    if not np:
138      self.skipTest("Numpy is not installed.")
139
140    test_samples = [self.CreateRandomNormalDistribution(0),
141                    self.CreateRandomNormalDistribution(1)]
142
143    expected_outputs = [(True, 0.5253966450691223),
144                        (True, 0.5253913402557373)]
145    for sample, expected_output in zip(test_samples, expected_outputs):
146      output = results_stats.IsNormallyDistributed(sample)
147
148      self.assertEqual(output, expected_output)
149
150  def testAreSamplesDifferent(self):
151    """Unit test for values returned after running the statistical tests.
152
153    Creates two pseudo-random normally distributed samples to run the
154    statistical tests and compares the resulting answer and p-value against
155    their pre-calculated values.
156    """
157    test_samples = [3 * [0, 0, 2, 4, 4], 3 * [5, 5, 7, 9, 9]]
158    with self.assertRaises(results_stats.SampleSizeError):
159      results_stats.AreSamplesDifferent(test_samples[0], test_samples[1],
160                                        test=results_stats.MANN)
161    with self.assertRaises(results_stats.NonNormalSampleError):
162      results_stats.AreSamplesDifferent(test_samples[0], test_samples[1],
163                                        test=results_stats.WELCH)
164
165    test_samples_equal = (20 * [1], 20 * [1])
166    expected_output_equal = (False, 1.0)
167    output_equal = results_stats.AreSamplesDifferent(test_samples_equal[0],
168                                                     test_samples_equal[1],
169                                                     test=results_stats.MANN)
170    self.assertEqual(output_equal, expected_output_equal)
171
172    if not np:
173      self.skipTest("Numpy is not installed.")
174
175    test_samples = [self.CreateRandomNormalDistribution(0),
176                    self.CreateRandomNormalDistribution(1)]
177    test_options = results_stats.ALL_TEST_OPTIONS
178
179    expected_outputs = [(True, 2 * 0.00068516628052438266),
180                        (True, 0.0017459498829507842),
181                        (True, 0.00084765230478226514)]
182
183    for test, expected_output in zip(test_options, expected_outputs):
184      output = results_stats.AreSamplesDifferent(test_samples[0],
185                                                 test_samples[1],
186                                                 test=test)
187      self.assertEqual(output, expected_output)
188
189  def testAssertThatKeysMatch(self):
190    """Unit test for exception raised when input dicts' metrics don't match."""
191    differing_input_dicts = [{'messageloop_start_time': [55, 72, 60],
192                              'display_time': [44, 89]},
193                             {'messageloop_start_time': [55, 72, 60]}]
194    with self.assertRaises(results_stats.DictMismatchError):
195      results_stats.AssertThatKeysMatch(differing_input_dicts[0],
196                                        differing_input_dicts[1])
197
198  def testAreBenchmarkResultsDifferent(self):
199    """Unit test for statistical test outcome dict."""
200    test_input_dicts = [{'open_tabs_time':
201                         self.CreateRandomNormalDistribution(0),
202                         'display_time':
203                         self.CreateRandomNormalDistribution(0)},
204                        {'open_tabs_time':
205                         self.CreateRandomNormalDistribution(0),
206                         'display_time':
207                         self.CreateRandomNormalDistribution(1)}]
208    test_options = results_stats.ALL_TEST_OPTIONS
209
210    expected_outputs = [{'open_tabs_time': (False, 2 * 0.49704973080841425),
211                         'display_time': (True, 2 * 0.00068516628052438266)},
212                        {'open_tabs_time': (False, 1.0),
213                         'display_time': (True, 0.0017459498829507842)},
214                        {'open_tabs_time': (False, 1.0),
215                         'display_time': (True, 0.00084765230478226514)}]
216
217    for test, expected_output in zip(test_options, expected_outputs):
218      output = results_stats.AreBenchmarkResultsDifferent(test_input_dicts[0],
219                                                          test_input_dicts[1],
220                                                          test=test)
221      self.assertEqual(output, expected_output)
222
223  def testArePagesetBenchmarkResultsDifferent(self):
224    """Unit test for statistical test outcome dict."""
225    distributions = (self.CreateRandomNormalDistribution(0),
226                     self.CreateRandomNormalDistribution(1))
227    test_input_dicts = ({'open_tabs_time': {'Ex_page_1': distributions[0],
228                                            'Ex_page_2': distributions[0]},
229                         'display_time': {'Ex_page_1': distributions[1],
230                                          'Ex_page_2': distributions[1]}},
231                        {'open_tabs_time': {'Ex_page_1': distributions[0],
232                                            'Ex_page_2': distributions[1]},
233                         'display_time': {'Ex_page_1': distributions[1],
234                                          'Ex_page_2': distributions[0]}})
235    test_options = results_stats.ALL_TEST_OPTIONS
236
237    expected_outputs = ({'open_tabs_time':  # Mann.
238                         {'Ex_page_1': (False, 2 * 0.49704973080841425),
239                          'Ex_page_2': (True, 2 * 0.00068516628052438266)},
240                         'display_time':
241                         {'Ex_page_1': (False, 2 * 0.49704973080841425),
242                          'Ex_page_2': (True, 2 * 0.00068516628052438266)}},
243                        {'open_tabs_time':  # Kolmogorov.
244                         {'Ex_page_1': (False, 1.0),
245                          'Ex_page_2': (True, 0.0017459498829507842)},
246                         'display_time':
247                         {'Ex_page_1': (False, 1.0),
248                          'Ex_page_2': (True, 0.0017459498829507842)}},
249                        {'open_tabs_time':  # Welch.
250                         {'Ex_page_1': (False, 1.0),
251                          'Ex_page_2': (True, 0.00084765230478226514)},
252                         'display_time':
253                         {'Ex_page_1': (False, 1.0),
254                          'Ex_page_2': (True, 0.00084765230478226514)}})
255
256    for test, expected_output in zip(test_options, expected_outputs):
257      output = (results_stats.
258                ArePagesetBenchmarkResultsDifferent(test_input_dicts[0],
259                                                    test_input_dicts[1],
260                                                    test=test))
261      self.assertEqual(output, expected_output)
262
263
264if __name__ == '__main__':
265  sys.exit(unittest.main())
266