1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3# Copyright 2019 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""One-line documentation for perf_diff module.
8
9A detailed description of perf_diff.
10"""
11
12from __future__ import print_function
13
14__author__ = 'asharif@google.com (Ahmad Sharif)'
15
16import argparse
17import functools
18import re
19import sys
20
21from cros_utils import misc
22from cros_utils import tabulator
23
24ROWS_TO_SHOW = 'Rows_to_show_in_the_perf_table'
25TOTAL_EVENTS = 'Total_events_of_this_profile'
26
27
28def GetPerfDictFromReport(report_file):
29  output = {}
30  perf_report = PerfReport(report_file)
31  for k, v in perf_report.sections.items():
32    if k not in output:
33      output[k] = {}
34    output[k][ROWS_TO_SHOW] = 0
35    output[k][TOTAL_EVENTS] = 0
36    for function in v.functions:
37      out_key = '%s' % (function.name)
38      output[k][out_key] = function.count
39      output[k][TOTAL_EVENTS] += function.count
40      if function.percent > 1:
41        output[k][ROWS_TO_SHOW] += 1
42  return output
43
44
45def _SortDictionaryByValue(d):
46  l = d.items()
47
48  def GetFloat(x):
49    if misc.IsFloat(x):
50      return float(x)
51    else:
52      return x
53
54  sorted_l = sorted(l, key=lambda x: GetFloat(x[1]))
55  sorted_l.reverse()
56  return [f[0] for f in sorted_l]
57
58
59class Tabulator(object):
60  """Make tables."""
61
62  def __init__(self, all_dicts):
63    self._all_dicts = all_dicts
64
65  def PrintTable(self):
66    for dicts in self._all_dicts:
67      self.PrintTableHelper(dicts)
68
69  def PrintTableHelper(self, dicts):
70    """Transfrom dicts to tables."""
71    fields = {}
72    for d in dicts:
73      for f in d.keys():
74        if f not in fields:
75          fields[f] = d[f]
76        else:
77          fields[f] = max(fields[f], d[f])
78    table = []
79    header = ['name']
80    for i in range(len(dicts)):
81      header.append(i)
82
83    table.append(header)
84
85    sorted_fields = _SortDictionaryByValue(fields)
86
87    for f in sorted_fields:
88      row = [f]
89      for d in dicts:
90        if f in d:
91          row.append(d[f])
92        else:
93          row.append('0')
94      table.append(row)
95
96    print(tabulator.GetSimpleTable(table))
97
98
99class Function(object):
100  """Function for formatting."""
101
102  def __init__(self):
103    self.count = 0
104    self.name = ''
105    self.percent = 0
106
107
108class Section(object):
109  """Section formatting."""
110
111  def __init__(self, contents):
112    self.name = ''
113    self.raw_contents = contents
114    self._ParseSection()
115
116  def _ParseSection(self):
117    matches = re.findall(r'Events: (\w+)\s+(.*)', self.raw_contents)
118    assert len(matches) <= 1, 'More than one event found in 1 section'
119    if not matches:
120      return
121    match = matches[0]
122    self.name = match[1]
123    self.count = misc.UnitToNumber(match[0])
124
125    self.functions = []
126    for line in self.raw_contents.splitlines():
127      if not line.strip():
128        continue
129      if '%' not in line:
130        continue
131      if not line.startswith('#'):
132        fields = [f for f in line.split(' ') if f]
133        function = Function()
134        function.percent = float(fields[0].strip('%'))
135        function.count = int(fields[1])
136        function.name = ' '.join(fields[2:])
137        self.functions.append(function)
138
139
140class PerfReport(object):
141  """Get report from raw report."""
142
143  def __init__(self, perf_file):
144    self.perf_file = perf_file
145    self._ReadFile()
146    self.sections = {}
147    self.metadata = {}
148    self._section_contents = []
149    self._section_header = ''
150    self._SplitSections()
151    self._ParseSections()
152    self._ParseSectionHeader()
153
154  def _ParseSectionHeader(self):
155    """Parse a header of a perf report file."""
156    # The "captured on" field is inaccurate - this actually refers to when the
157    # report was generated, not when the data was captured.
158    for line in self._section_header.splitlines():
159      line = line[2:]
160      if ':' in line:
161        key, val = line.strip().split(':', 1)
162        key = key.strip()
163        val = val.strip()
164        self.metadata[key] = val
165
166  def _ReadFile(self):
167    self._perf_contents = open(self.perf_file).read()
168
169  def _ParseSections(self):
170    self.event_counts = {}
171    self.sections = {}
172    for section_content in self._section_contents:
173      section = Section(section_content)
174      section.name = self._GetHumanReadableName(section.name)
175      self.sections[section.name] = section
176
177  # TODO(asharif): Do this better.
178  def _GetHumanReadableName(self, section_name):
179    if not 'raw' in section_name:
180      return section_name
181    raw_number = section_name.strip().split(' ')[-1]
182    for line in self._section_header.splitlines():
183      if raw_number in line:
184        name = line.strip().split(' ')[5]
185        return name
186
187  def _SplitSections(self):
188    self._section_contents = []
189    indices = [m.start() for m in re.finditer('# Events:', self._perf_contents)]
190    indices.append(len(self._perf_contents))
191    for i in range(len(indices) - 1):
192      section_content = self._perf_contents[indices[i]:indices[i + 1]]
193      self._section_contents.append(section_content)
194    self._section_header = ''
195    if indices:
196      self._section_header = self._perf_contents[0:indices[0]]
197
198
199class PerfDiffer(object):
200  """Perf differ class."""
201
202  def __init__(self, reports, num_symbols, common_only):
203    self._reports = reports
204    self._num_symbols = num_symbols
205    self._common_only = common_only
206    self._common_function_names = {}
207
208  def DoDiff(self):
209    """The function that does the diff."""
210    section_names = self._FindAllSections()
211
212    filename_dicts = []
213    summary_dicts = []
214    for report in self._reports:
215      d = {}
216      filename_dicts.append({'file': report.perf_file})
217      for section_name in section_names:
218        if section_name in report.sections:
219          d[section_name] = report.sections[section_name].count
220      summary_dicts.append(d)
221
222    all_dicts = [filename_dicts, summary_dicts]
223
224    for section_name in section_names:
225      function_names = self._GetTopFunctions(section_name, self._num_symbols)
226      self._FindCommonFunctions(section_name)
227      dicts = []
228      for report in self._reports:
229        d = {}
230        if section_name in report.sections:
231          section = report.sections[section_name]
232
233          # Get a common scaling factor for this report.
234          common_scaling_factor = self._GetCommonScalingFactor(section)
235
236          for function in section.functions:
237            if function.name in function_names:
238              key = '%s %s' % (section.name, function.name)
239              d[key] = function.count
240              # Compute a factor to scale the function count by in common_only
241              # mode.
242              if self._common_only and (
243                  function.name in self._common_function_names[section.name]):
244                d[key + ' scaled'] = common_scaling_factor * function.count
245        dicts.append(d)
246
247      all_dicts.append(dicts)
248
249    mytabulator = Tabulator(all_dicts)
250    mytabulator.PrintTable()
251
252  def _FindAllSections(self):
253    sections = {}
254    for report in self._reports:
255      for section in report.sections.values():
256        if section.name not in sections:
257          sections[section.name] = section.count
258        else:
259          sections[section.name] = max(sections[section.name], section.count)
260    return _SortDictionaryByValue(sections)
261
262  def _GetCommonScalingFactor(self, section):
263    unique_count = self._GetCount(
264        section, lambda x: x in self._common_function_names[section.name])
265    return 100.0 / unique_count
266
267  def _GetCount(self, section, filter_fun=None):
268    total_count = 0
269    for function in section.functions:
270      if not filter_fun or filter_fun(function.name):
271        total_count += int(function.count)
272    return total_count
273
274  def _FindCommonFunctions(self, section_name):
275    function_names_list = []
276    for report in self._reports:
277      if section_name in report.sections:
278        section = report.sections[section_name]
279        function_names = {f.name for f in section.functions}
280        function_names_list.append(function_names)
281
282    self._common_function_names[section_name] = (
283        functools.reduce(set.intersection, function_names_list))
284
285  def _GetTopFunctions(self, section_name, num_functions):
286    all_functions = {}
287    for report in self._reports:
288      if section_name in report.sections:
289        section = report.sections[section_name]
290        for f in section.functions[:num_functions]:
291          if f.name in all_functions:
292            all_functions[f.name] = max(all_functions[f.name], f.count)
293          else:
294            all_functions[f.name] = f.count
295    # FIXME(asharif): Don't really need to sort these...
296    return _SortDictionaryByValue(all_functions)
297
298  def _GetFunctionsDict(self, section, function_names):
299    d = {}
300    for function in section.functions:
301      if function.name in function_names:
302        d[function.name] = function.count
303    return d
304
305
306def Main(argv):
307  """The entry of the main."""
308  parser = argparse.ArgumentParser()
309  parser.add_argument(
310      '-n',
311      '--num_symbols',
312      dest='num_symbols',
313      default='5',
314      help='The number of symbols to show.')
315  parser.add_argument(
316      '-c',
317      '--common_only',
318      dest='common_only',
319      action='store_true',
320      default=False,
321      help='Diff common symbols only.')
322
323  options, args = parser.parse_known_args(argv)
324
325  try:
326    reports = []
327    for report in args[1:]:
328      report = PerfReport(report)
329      reports.append(report)
330    pd = PerfDiffer(reports, int(options.num_symbols), options.common_only)
331    pd.DoDiff()
332  finally:
333    pass
334
335  return 0
336
337
338if __name__ == '__main__':
339  sys.exit(Main(sys.argv))
340