# Copyright 2017 The PDFium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """Classes that draw conclusions out of a comparison and represent them.""" from collections import Counter FORMAT_RED = '\033[01;31m{0}\033[00m' FORMAT_GREEN = '\033[01;32m{0}\033[00m' FORMAT_MAGENTA = '\033[01;35m{0}\033[00m' FORMAT_CYAN = '\033[01;36m{0}\033[00m' FORMAT_NORMAL = '{0}' RATING_FAILURE = 'failure' RATING_REGRESSION = 'regression' RATING_IMPROVEMENT = 'improvement' RATING_NO_CHANGE = 'no_change' RATING_SMALL_CHANGE = 'small_change' RATINGS = [ RATING_FAILURE, RATING_REGRESSION, RATING_IMPROVEMENT, RATING_NO_CHANGE, RATING_SMALL_CHANGE ] RATING_TO_COLOR = { RATING_FAILURE: FORMAT_MAGENTA, RATING_REGRESSION: FORMAT_RED, RATING_IMPROVEMENT: FORMAT_CYAN, RATING_NO_CHANGE: FORMAT_GREEN, RATING_SMALL_CHANGE: FORMAT_NORMAL, } class ComparisonConclusions(object): """All conclusions drawn from a comparison. This is initialized empty and then processes pairs of results for each test case, determining the rating for that case, which can be: "failure" if either or both runs for the case failed. "regression" if there is a significant increase in time for the test case. "improvement" if there is a significant decrease in time for the test case. "no_change" if the time for the test case did not change at all. "small_change" if the time for the test case changed but within the threshold. """ def __init__(self, threshold_significant): """Initializes an empty ComparisonConclusions. Args: threshold_significant: Float with the tolerance beyond which changes in measurements are considered significant. The change is considered as a multiplication rather than an addition of a fraction of the previous measurement, that is, a threshold_significant of 1.0 will flag test cases that became over 100% slower (> 200% of the previous time measured) or over 100% faster (< 50% of the previous time measured). threshold_significant 0.02 -> 98.04% to 102% is not significant threshold_significant 0.1 -> 90.9% to 110% is not significant threshold_significant 0.25 -> 80% to 125% is not significant threshold_significant 1 -> 50% to 200% is not significant threshold_significant 4 -> 20% to 500% is not significant """ self.threshold_significant = threshold_significant self.threshold_significant_negative = (1 / (1 + threshold_significant)) - 1 self.params = {'threshold': threshold_significant} self.summary = ComparisonSummary() self.case_results = {} def ProcessCase(self, case_name, before, after): """Feeds a test case results to the ComparisonConclusions. Args: case_name: String identifying the case. before: Measurement for the "before" version of the code. after: Measurement for the "after" version of the code. """ # Switch 0 to None to simplify the json dict output. All zeros are # considered failed runs, so they will be represented by "null". if not before: before = None if not after: after = None if not before or not after: ratio = None rating = RATING_FAILURE else: ratio = (float(after) / before) - 1.0 if ratio > self.threshold_significant: rating = RATING_REGRESSION elif ratio < self.threshold_significant_negative: rating = RATING_IMPROVEMENT elif ratio == 0: rating = RATING_NO_CHANGE else: rating = RATING_SMALL_CHANGE case_result = CaseResult(case_name, before, after, ratio, rating) self.summary.ProcessCaseResult(case_result) self.case_results[case_name] = case_result def GetSummary(self): """Gets the ComparisonSummary with consolidated totals.""" return self.summary def GetCaseResults(self): """Gets a dict mapping each test case identifier to its CaseResult.""" return self.case_results def GetOutputDict(self): """Returns a conclusions dict with all the conclusions drawn. Returns: A serializable dict with the format illustrated below: { "version": 1, "params": { "threshold": 0.02 }, "summary": { "total": 123, "failure": 1, "regression": 2, "improvement": 1, "no_change": 100, "small_change": 19 }, "comparison_by_case": { "testing/resources/new_test.pdf": { "before": None, "after": 1000, "ratio": None, "rating": "failure" }, "testing/resources/test1.pdf": { "before": 100, "after": 120, "ratio": 0.2, "rating": "regression" }, "testing/resources/test2.pdf": { "before": 100, "after": 2000, "ratio": 19.0, "rating": "regression" }, "testing/resources/test3.pdf": { "before": 1000, "after": 1005, "ratio": 0.005, "rating": "small_change" }, "testing/resources/test4.pdf": { "before": 1000, "after": 1000, "ratio": 0.0, "rating": "no_change" }, "testing/resources/test5.pdf": { "before": 1000, "after": 600, "ratio": -0.4, "rating": "improvement" } } } """ output_dict = {} output_dict['version'] = 1 output_dict['params'] = {'threshold': self.threshold_significant} output_dict['summary'] = self.summary.GetOutputDict() output_dict['comparison_by_case'] = { cr.case_name.decode('utf-8'): cr.GetOutputDict() for cr in self.GetCaseResults().values() } return output_dict class ComparisonSummary(object): """Totals computed for a comparison.""" def __init__(self): self.rating_counter = Counter() def ProcessCaseResult(self, case_result): self.rating_counter[case_result.rating] += 1 def GetTotal(self): """Gets the number of test cases processed.""" return sum(self.rating_counter.values()) def GetCount(self, rating): """Gets the number of test cases processed with a given rating.""" return self.rating_counter[rating] def GetOutputDict(self): """Returns a dict that can be serialized with all the totals.""" result = {'total': self.GetTotal()} for rating in RATINGS: result[rating] = self.GetCount(rating) return result class CaseResult(object): """The conclusion for the comparison of a single test case.""" def __init__(self, case_name, before, after, ratio, rating): """Initializes an empty ComparisonConclusions. Args: case_name: String identifying the case. before: Measurement for the "before" version of the code. after: Measurement for the "after" version of the code. ratio: Difference between |after| and |before| as a fraction of |before|. rating: Rating for this test case. """ self.case_name = case_name self.before = before self.after = after self.ratio = ratio self.rating = rating def GetOutputDict(self): """Returns a dict with the test case's conclusions.""" return { 'before': self.before, 'after': self.after, 'ratio': self.ratio, 'rating': self.rating } def PrintConclusionsDictHumanReadable(conclusions_dict, colored, key=None): """Prints a conclusions dict in a human-readable way. Args: conclusions_dict: Dict to print. colored: Whether to color the output to highlight significant changes. key: String with the CaseResult dictionary key to sort the cases. """ # Print header print '=' * 80 print '{0:>11s} {1:>15s} {2}'.format('% Change', 'Time after', 'Test case') print '-' * 80 color = FORMAT_NORMAL # Print cases if key is not None: case_pairs = sorted( conclusions_dict['comparison_by_case'].iteritems(), key=lambda kv: kv[1][key]) else: case_pairs = sorted(conclusions_dict['comparison_by_case'].iteritems()) for case_name, case_dict in case_pairs: if colored: color = RATING_TO_COLOR[case_dict['rating']] if case_dict['rating'] == RATING_FAILURE: print u'{} to measure time for {}'.format( color.format('Failed'), case_name).encode('utf-8') continue print u'{0} {1:15,d} {2}'.format( color.format('{:+11.4%}'.format(case_dict['ratio'])), case_dict['after'], case_name).encode('utf-8') # Print totals totals = conclusions_dict['summary'] print '=' * 80 print 'Test cases run: %d' % totals['total'] if colored: color = FORMAT_MAGENTA if totals[RATING_FAILURE] else FORMAT_GREEN print('Failed to measure: %s' % color.format(totals[RATING_FAILURE])) if colored: color = FORMAT_RED if totals[RATING_REGRESSION] else FORMAT_GREEN print('Regressions: %s' % color.format(totals[RATING_REGRESSION])) if colored: color = FORMAT_CYAN if totals[RATING_IMPROVEMENT] else FORMAT_GREEN print('Improvements: %s' % color.format(totals[RATING_IMPROVEMENT]))