1# Copyright 2017 The PDFium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4"""Classes that draw conclusions out of a comparison and represent them."""
5
6from collections import Counter
7
8FORMAT_RED = '\033[01;31m{0}\033[00m'
9FORMAT_GREEN = '\033[01;32m{0}\033[00m'
10FORMAT_MAGENTA = '\033[01;35m{0}\033[00m'
11FORMAT_CYAN = '\033[01;36m{0}\033[00m'
12FORMAT_NORMAL = '{0}'
13
14RATING_FAILURE = 'failure'
15RATING_REGRESSION = 'regression'
16RATING_IMPROVEMENT = 'improvement'
17RATING_NO_CHANGE = 'no_change'
18RATING_SMALL_CHANGE = 'small_change'
19
20RATINGS = [
21    RATING_FAILURE, RATING_REGRESSION, RATING_IMPROVEMENT, RATING_NO_CHANGE,
22    RATING_SMALL_CHANGE
23]
24
25RATING_TO_COLOR = {
26    RATING_FAILURE: FORMAT_MAGENTA,
27    RATING_REGRESSION: FORMAT_RED,
28    RATING_IMPROVEMENT: FORMAT_CYAN,
29    RATING_NO_CHANGE: FORMAT_GREEN,
30    RATING_SMALL_CHANGE: FORMAT_NORMAL,
31}
32
33
34class ComparisonConclusions(object):
35  """All conclusions drawn from a comparison.
36
37  This is initialized empty and then processes pairs of results for each test
38  case, determining the rating for that case, which can be:
39  "failure" if either or both runs for the case failed.
40  "regression" if there is a significant increase in time for the test case.
41  "improvement" if there is a significant decrease in time for the test case.
42  "no_change" if the time for the test case did not change at all.
43  "small_change" if the time for the test case changed but within the threshold.
44  """
45
46  def __init__(self, threshold_significant):
47    """Initializes an empty ComparisonConclusions.
48
49    Args:
50      threshold_significant: Float with the tolerance beyond which changes in
51          measurements are considered significant.
52
53          The change is considered as a multiplication rather than an addition
54          of a fraction of the previous measurement, that is, a
55          threshold_significant of 1.0 will flag test cases that became over
56          100% slower (> 200% of the previous time measured) or over 100% faster
57          (< 50% of the previous time measured).
58
59          threshold_significant 0.02 -> 98.04% to 102% is not significant
60          threshold_significant 0.1 -> 90.9% to 110% is not significant
61          threshold_significant 0.25 -> 80% to 125% is not significant
62          threshold_significant 1 -> 50% to 200% is not significant
63          threshold_significant 4 -> 20% to 500% is not significant
64
65    """
66    self.threshold_significant = threshold_significant
67    self.threshold_significant_negative = (1 / (1 + threshold_significant)) - 1
68
69    self.params = {'threshold': threshold_significant}
70    self.summary = ComparisonSummary()
71    self.case_results = {}
72
73  def ProcessCase(self, case_name, before, after):
74    """Feeds a test case results to the ComparisonConclusions.
75
76    Args:
77      case_name: String identifying the case.
78      before: Measurement for the "before" version of the code.
79      after: Measurement for the "after" version of the code.
80    """
81
82    # Switch 0 to None to simplify the json dict output. All zeros are
83    # considered failed runs, so they will be represented by "null".
84    if not before:
85      before = None
86    if not after:
87      after = None
88
89    if not before or not after:
90      ratio = None
91      rating = RATING_FAILURE
92    else:
93      ratio = (float(after) / before) - 1.0
94      if ratio > self.threshold_significant:
95        rating = RATING_REGRESSION
96      elif ratio < self.threshold_significant_negative:
97        rating = RATING_IMPROVEMENT
98      elif ratio == 0:
99        rating = RATING_NO_CHANGE
100      else:
101        rating = RATING_SMALL_CHANGE
102
103    case_result = CaseResult(case_name, before, after, ratio, rating)
104
105    self.summary.ProcessCaseResult(case_result)
106    self.case_results[case_name] = case_result
107
108  def GetSummary(self):
109    """Gets the ComparisonSummary with consolidated totals."""
110    return self.summary
111
112  def GetCaseResults(self):
113    """Gets a dict mapping each test case identifier to its CaseResult."""
114    return self.case_results
115
116  def GetOutputDict(self):
117    """Returns a conclusions dict with all the conclusions drawn.
118
119    Returns:
120      A serializable dict with the format illustrated below:
121      {
122        "version": 1,
123        "params": {
124          "threshold": 0.02
125        },
126        "summary": {
127          "total": 123,
128          "failure": 1,
129          "regression": 2,
130          "improvement": 1,
131          "no_change": 100,
132          "small_change": 19
133        },
134        "comparison_by_case": {
135          "testing/resources/new_test.pdf": {
136            "before": None,
137            "after": 1000,
138            "ratio": None,
139            "rating": "failure"
140          },
141          "testing/resources/test1.pdf": {
142            "before": 100,
143            "after": 120,
144            "ratio": 0.2,
145            "rating": "regression"
146          },
147          "testing/resources/test2.pdf": {
148            "before": 100,
149            "after": 2000,
150            "ratio": 19.0,
151            "rating": "regression"
152          },
153          "testing/resources/test3.pdf": {
154            "before": 1000,
155            "after": 1005,
156            "ratio": 0.005,
157            "rating": "small_change"
158          },
159          "testing/resources/test4.pdf": {
160            "before": 1000,
161            "after": 1000,
162            "ratio": 0.0,
163            "rating": "no_change"
164          },
165          "testing/resources/test5.pdf": {
166            "before": 1000,
167            "after": 600,
168            "ratio": -0.4,
169            "rating": "improvement"
170          }
171        }
172      }
173    """
174    output_dict = {}
175    output_dict['version'] = 1
176    output_dict['params'] = {'threshold': self.threshold_significant}
177    output_dict['summary'] = self.summary.GetOutputDict()
178    output_dict['comparison_by_case'] = {
179        cr.case_name.decode('utf-8'): cr.GetOutputDict()
180        for cr in self.GetCaseResults().values()
181    }
182    return output_dict
183
184
185class ComparisonSummary(object):
186  """Totals computed for a comparison."""
187
188  def __init__(self):
189    self.rating_counter = Counter()
190
191  def ProcessCaseResult(self, case_result):
192    self.rating_counter[case_result.rating] += 1
193
194  def GetTotal(self):
195    """Gets the number of test cases processed."""
196    return sum(self.rating_counter.values())
197
198  def GetCount(self, rating):
199    """Gets the number of test cases processed with a given rating."""
200    return self.rating_counter[rating]
201
202  def GetOutputDict(self):
203    """Returns a dict that can be serialized with all the totals."""
204    result = {'total': self.GetTotal()}
205    for rating in RATINGS:
206      result[rating] = self.GetCount(rating)
207    return result
208
209
210class CaseResult(object):
211  """The conclusion for the comparison of a single test case."""
212
213  def __init__(self, case_name, before, after, ratio, rating):
214    """Initializes an empty ComparisonConclusions.
215
216    Args:
217      case_name: String identifying the case.
218      before: Measurement for the "before" version of the code.
219      after: Measurement for the "after" version of the code.
220      ratio: Difference between |after| and |before| as a fraction of |before|.
221      rating: Rating for this test case.
222    """
223    self.case_name = case_name
224    self.before = before
225    self.after = after
226    self.ratio = ratio
227    self.rating = rating
228
229  def GetOutputDict(self):
230    """Returns a dict with the test case's conclusions."""
231    return {
232        'before': self.before,
233        'after': self.after,
234        'ratio': self.ratio,
235        'rating': self.rating
236    }
237
238
239def PrintConclusionsDictHumanReadable(conclusions_dict, colored, key=None):
240  """Prints a conclusions dict in a human-readable way.
241
242  Args:
243    conclusions_dict: Dict to print.
244    colored: Whether to color the output to highlight significant changes.
245    key: String with the CaseResult dictionary key to sort the cases.
246  """
247  # Print header
248  print '=' * 80
249  print '{0:>11s} {1:>15s}  {2}'.format('% Change', 'Time after', 'Test case')
250  print '-' * 80
251
252  color = FORMAT_NORMAL
253
254  # Print cases
255  if key is not None:
256    case_pairs = sorted(
257        conclusions_dict['comparison_by_case'].iteritems(),
258        key=lambda kv: kv[1][key])
259  else:
260    case_pairs = sorted(conclusions_dict['comparison_by_case'].iteritems())
261
262  for case_name, case_dict in case_pairs:
263    if colored:
264      color = RATING_TO_COLOR[case_dict['rating']]
265
266    if case_dict['rating'] == RATING_FAILURE:
267      print u'{} to measure time for {}'.format(
268          color.format('Failed'), case_name).encode('utf-8')
269      continue
270
271    print u'{0} {1:15,d}  {2}'.format(
272        color.format('{:+11.4%}'.format(case_dict['ratio'])),
273        case_dict['after'], case_name).encode('utf-8')
274
275  # Print totals
276  totals = conclusions_dict['summary']
277  print '=' * 80
278  print 'Test cases run: %d' % totals['total']
279
280  if colored:
281    color = FORMAT_MAGENTA if totals[RATING_FAILURE] else FORMAT_GREEN
282  print('Failed to measure: %s' % color.format(totals[RATING_FAILURE]))
283
284  if colored:
285    color = FORMAT_RED if totals[RATING_REGRESSION] else FORMAT_GREEN
286  print('Regressions: %s' % color.format(totals[RATING_REGRESSION]))
287
288  if colored:
289    color = FORMAT_CYAN if totals[RATING_IMPROVEMENT] else FORMAT_GREEN
290  print('Improvements: %s' % color.format(totals[RATING_IMPROVEMENT]))
291