1#!/usr/bin/python2
2
3# Copyright 2016 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6"""Processes the functions from the pprof(go/pprof) files and CWP(go/cwp) data.
7
8The pprof --top and pprof --tree outputs should be extracted from the benchmark
9profiles. The outputs contain the hot functions and the call chains.
10
11For each pair of pprof --top and --tree output files, the tool will create a
12file that contains the hot functions present also in the extracted CWP data.
13The common functions are organized in groups that represent a Chrome OS
14component. A function belongs to a group that is defined by a given file path
15if it is declared in a file that shares that path.
16
17A set of metrics are computed for each function, benchmark and Chrome OS group
18covered by a benchmark.
19
20Afterwards, this script extracts the functions that are present in the CWP
21data and not in the benchmark profiles. The extra functions are also groupped
22in Chrome OS components.
23"""
24
25from collections import defaultdict
26
27import argparse
28import os
29import shutil
30import sys
31
32import benchmark_metrics
33import utils
34
35
36class HotFunctionsProcessor(object):
37  """Does the pprof and CWP output processing.
38
39  Extracts the common, extra functions from the pprof files, groups them in
40  Chrome OS components. Computes the metrics for the common functions,
41  benchmark and Chrome OS groups covered by a benchmark.
42  """
43
44  def __init__(self, pprof_top_path, pprof_tree_path, cwp_inclusive_count_file,
45               cwp_pairwise_inclusive_count_file, cwp_function_groups_file,
46               common_functions_path, common_functions_groups_path,
47               benchmark_set_metrics_file, extra_cwp_functions_file,
48               extra_cwp_functions_groups_file,
49               extra_cwp_functions_groups_path):
50    """Initializes the HotFunctionsProcessor.
51
52    Args:
53      pprof_top_path: The directory containing the files with the pprof --top
54        output.
55      pprof_tree_path: The directory containing the files with the pprof --tree
56        output.
57      cwp_inclusive_count_file: The CSV file containing the CWP functions with
58        the inclusive count values.
59      cwp_pairwise_inclusive_count_file: The CSV file containing the CWP pairs
60        of parent and child functions with their inclusive count values.
61      cwp_function_groups_file: The file that contains the CWP function groups.
62      common_functions_path: The directory containing the CSV output files
63        with the common functions of the benchmark profiles and CWP data.
64      common_functions_groups_path: The directory containing the CSV output
65        files with the CWP groups and their metrics that match the common
66        functions of the benchmark profiles and CWP.
67      benchmark_set_metrics_file: The CSV output file containing the metrics for
68        each benchmark.
69      extra_cwp_functions_file: The CSV output file containing the functions
70        that are in the CWP data, but are not in any of the benchmark profiles.
71      extra_cwp_functions_groups_file: The CSV output file containing the groups
72        that match the extra CWP functions and their statistics.
73      extra_cwp_functions_groups_path: The directory containing the CSV output
74        files with the extra CWP functions that match a particular group.
75    """
76    self._pprof_top_path = pprof_top_path
77    self._pprof_tree_path = pprof_tree_path
78    self._cwp_inclusive_count_file = cwp_inclusive_count_file
79    self._cwp_pairwise_inclusive_count_file = cwp_pairwise_inclusive_count_file
80    self._cwp_function_groups_file = cwp_function_groups_file
81    self._common_functions_path = common_functions_path
82    self._common_functions_groups_path = common_functions_groups_path
83    self._benchmark_set_metrics_file = benchmark_set_metrics_file
84    self._extra_cwp_functions_file = extra_cwp_functions_file
85    self._extra_cwp_functions_groups_file = extra_cwp_functions_groups_file
86    self._extra_cwp_functions_groups_path = extra_cwp_functions_groups_path
87
88  def ProcessHotFunctions(self):
89    """Does the processing of the hot functions."""
90    with open(self._cwp_function_groups_file) as input_file:
91      cwp_function_groups = utils.ParseFunctionGroups(input_file.readlines())
92    cwp_statistics = \
93      self.ExtractCommonFunctions(self._pprof_top_path,
94                                  self._pprof_tree_path,
95                                  self._cwp_inclusive_count_file,
96                                  self._cwp_pairwise_inclusive_count_file,
97                                  cwp_function_groups,
98                                  self._common_functions_path,
99                                  self._common_functions_groups_path,
100                                  self._benchmark_set_metrics_file)
101    self.ExtractExtraFunctions(cwp_statistics, self._extra_cwp_functions_file)
102    self.GroupExtraFunctions(cwp_statistics, cwp_function_groups,
103                             self._extra_cwp_functions_groups_path,
104                             self._extra_cwp_functions_groups_file)
105
106  def ExtractCommonFunctions(self, pprof_top_path, pprof_tree_path,
107                             cwp_inclusive_count_file,
108                             cwp_pairwise_inclusive_count_file,
109                             cwp_function_groups, common_functions_path,
110                             common_functions_groups_path,
111                             benchmark_set_metrics_file):
112    """Extracts the common functions of the benchmark profiles and the CWP data.
113
114    For each pair of pprof --top and --tree output files, it creates a separate
115    file with the same name containing the common functions specifications and
116    metrics, that will be placed in the common_functions_path directory.
117
118    The resulting file is in CSV format, containing the following fields:
119    function name, file name, object, inclusive count, inclusive_count_fraction,
120    flat, flat%, sum%, cum, cum%, distance and score.
121
122    For each pair of pprof files, an additional file is created with the
123    Chrome OS groups that match the common functions.
124
125    The file is in CSV format containing the fields: group name, group path,
126    the number of functions that match the group, the average and cumulative
127    distance, the average and cumulative score.
128    The file has the same name with the pprof file and it is placed in the
129    common_functions_groups_path directory.
130
131    For all the analyzed benchmarks, the method creates a CSV output file
132    containing the metrics for each benchmark. The CSV fields include the
133    benchmark name, the number of common functions, the average and
134    cumulative distance and score.
135
136    It builds a dict of the CWP statistics by calling the
137    utils.ParseCWPInclusiveCountFile method and if a function is common, it is
138    marked as a COMMON_FUNCTION.
139
140    Args:
141      pprof_top_path: The name of the directory with the files with the
142        pprof --top output.
143      pprof_tree_path: The name of the directory with the files with the
144        pprof --tree output.
145      cwp_inclusive_count_file: A dict with the inclusive count values.
146      cwp_pairwise_inclusive_count_file: A dict with the pairwise inclusive
147        count values.
148      cwp_function_groups: A list of tuples containing the name of the group
149        and the corresponding file path.
150      common_functions_path: The path containing the output files with the
151        common functions and their metrics.
152      common_functions_groups_path: The path containing the output files with
153        the Chrome OS groups that match the common functions and their metrics.
154      benchmark_set_metrics_file: The CSV output file containing the metrics for
155        all the analyzed benchmarks.
156
157    Returns:
158      A dict containing the CWP statistics with the common functions marked as
159      COMMON_FUNCTION.
160    """
161    cwp_inclusive_count_statistics = \
162        utils.ParseCWPInclusiveCountFile(cwp_inclusive_count_file)
163    cwp_pairwise_inclusive_count_statistics = \
164        utils.ParseCWPPairwiseInclusiveCountFile(
165            cwp_pairwise_inclusive_count_file)
166    cwp_inclusive_count_statistics_cumulative = \
167        utils.ComputeCWPCummulativeInclusiveStatistics(
168            cwp_inclusive_count_statistics)
169    cwp_pairwise_inclusive_count_fractions = \
170        utils.ComputeCWPChildFunctionsFractions(
171            cwp_inclusive_count_statistics_cumulative,
172            cwp_pairwise_inclusive_count_statistics)
173    benchmark_set_metrics = {}
174    pprof_files = os.listdir(pprof_top_path)
175
176    for pprof_file in pprof_files:
177      pprof_top_statistics = \
178          utils.ParsePprofTopOutput(os.path.join(pprof_top_path, pprof_file))
179      pprof_tree_statistics = \
180          utils.ParsePprofTreeOutput(os.path.join(pprof_tree_path, pprof_file))
181      common_functions_lines = []
182      benchmark_function_metrics = {}
183
184      for function_key, function_statistic in pprof_top_statistics.iteritems():
185        if function_key not in cwp_inclusive_count_statistics:
186          continue
187
188        cwp_dso_name, cwp_inclusive_count, cwp_inclusive_count_fraction, _ = \
189            cwp_inclusive_count_statistics[function_key]
190        cwp_inclusive_count_statistics[function_key] = \
191            (cwp_dso_name, cwp_inclusive_count, cwp_inclusive_count_fraction,
192             utils.COMMON_FUNCTION)
193
194        function_name, _ = function_key.split(',')
195        distance = benchmark_metrics.ComputeDistanceForFunction(
196            pprof_tree_statistics[function_key],
197            cwp_pairwise_inclusive_count_fractions.get(function_name, {}))
198        benchmark_cum_p = float(function_statistic[4])
199        score = benchmark_metrics.ComputeScoreForFunction(
200            distance, cwp_inclusive_count_fraction, benchmark_cum_p)
201        benchmark_function_metrics[function_key] = (distance, score)
202
203        common_functions_lines.append(','.join([function_key, cwp_dso_name, str(
204            cwp_inclusive_count), str(cwp_inclusive_count_fraction), ','.join(
205                function_statistic), str(distance), str(score)]))
206      benchmark_function_groups_statistics = \
207          benchmark_metrics.ComputeMetricsForComponents(
208              cwp_function_groups, benchmark_function_metrics)
209      benchmark_set_metrics[pprof_file] = \
210          benchmark_metrics.ComputeMetricsForBenchmark(
211              benchmark_function_metrics)
212
213      with open(os.path.join(common_functions_path, pprof_file), 'w') \
214          as output_file:
215        common_functions_lines.sort(
216            key=lambda x: float(x.split(',')[11]), reverse=True)
217        common_functions_lines.insert(0, 'function,file,dso,inclusive_count,'
218                                      'inclusive_count_fraction,flat,flat%,'
219                                      'sum%,cum,cum%,distance,score')
220        output_file.write('\n'.join(common_functions_lines))
221
222      with open(os.path.join(common_functions_groups_path, pprof_file), 'w') \
223          as output_file:
224        common_functions_groups_lines = \
225            [','.join([group_name, ','.join(
226                [str(statistic) for statistic in group_statistic])])
227             for group_name, group_statistic in
228             benchmark_function_groups_statistics.iteritems()]
229        common_functions_groups_lines.sort(
230            key=lambda x: float(x.split(',')[5]), reverse=True)
231        common_functions_groups_lines.insert(
232            0, 'group_name,file_path,number_of_functions,distance_cum,'
233            'distance_avg,score_cum,score_avg')
234        output_file.write('\n'.join(common_functions_groups_lines))
235
236    with open(benchmark_set_metrics_file, 'w') as output_file:
237      benchmark_set_metrics_lines = []
238
239      for benchmark_name, metrics in benchmark_set_metrics.iteritems():
240        benchmark_set_metrics_lines.append(','.join([benchmark_name, ','.join(
241            [str(metric) for metric in metrics])]))
242      benchmark_set_metrics_lines.sort(
243          key=lambda x: float(x.split(',')[4]), reverse=True)
244      benchmark_set_metrics_lines.insert(
245          0, 'benchmark_name,number_of_functions,distance_cum,distance_avg,'
246          'score_cum,score_avg')
247      output_file.write('\n'.join(benchmark_set_metrics_lines))
248
249    return cwp_inclusive_count_statistics
250
251  def GroupExtraFunctions(self, cwp_statistics, cwp_function_groups,
252                          extra_cwp_functions_groups_path,
253                          extra_cwp_functions_groups_file):
254    """Groups the extra functions.
255
256    Writes the data of the functions that belong to each group in a separate
257    file, sorted by their inclusive count value, in descending order. The file
258    name is the same as the group name.
259
260    The file is in CSV format, containing the fields: function name, file name,
261    object name, inclusive count, inclusive count fraction.
262
263    It creates a CSV file containing the name of the group, their
264    common path, the total inclusive count and inclusive count fraction values
265    of all the functions declared in files that share the common path, sorted
266    in descending order by the inclusive count value.
267
268    Args:
269      cwp_statistics: A dict containing the CWP statistics.
270      cwp_function_groups: A list of tuples with the groups names and the path
271        describing the groups.
272      extra_cwp_functions_groups_path: The name of the directory containing
273        the CSV output files with the extra CWP functions that match a
274        particular group.
275      extra_cwp_functions_groups_file: The CSV output file containing the groups
276        that match the extra functions and their statistics.
277    """
278    cwp_function_groups_statistics = defaultdict(lambda: ([], '', 0, 0.0))
279    for function, statistics in cwp_statistics.iteritems():
280      if statistics[3] == utils.COMMON_FUNCTION:
281        continue
282
283      file_name = function.split(',')[1]
284      group_inclusive_count = int(statistics[1])
285      group_inclusive_count_fraction = float(statistics[2])
286
287      for group in cwp_function_groups:
288        group_common_path = group[1]
289
290        if group_common_path not in file_name:
291          continue
292
293        group_name = group[0]
294        group_statistics = cwp_function_groups_statistics[group_name]
295        group_lines = group_statistics[0]
296        group_inclusive_count += group_statistics[2]
297        group_inclusive_count_fraction += group_statistics[3]
298
299        group_lines.append(','.join([function, statistics[0],
300                                     str(statistics[1]), str(statistics[2])]))
301        cwp_function_groups_statistics[group_name] = \
302            (group_lines, group_common_path, group_inclusive_count,
303             group_inclusive_count_fraction)
304        break
305
306    extra_cwp_functions_groups_lines = []
307    for group_name, group_statistics \
308        in cwp_function_groups_statistics.iteritems():
309      group_output_lines = group_statistics[0]
310      group_output_lines.sort(key=lambda x: int(x.split(',')[3]), reverse=True)
311      group_output_lines.insert(
312          0, 'function,file,dso,inclusive_count,inclusive_count_fraction')
313      with open(os.path.join(extra_cwp_functions_groups_path, group_name),
314                'w') as output_file:
315        output_file.write('\n'.join(group_output_lines))
316      extra_cwp_functions_groups_lines.append(','.join(
317          [group_name, group_statistics[1], str(group_statistics[2]), str(
318              group_statistics[3])]))
319
320    extra_cwp_functions_groups_lines.sort(
321        key=lambda x: int(x.split(',')[2]), reverse=True)
322    extra_cwp_functions_groups_lines.insert(
323        0, 'group,shared_path,inclusive_count,inclusive_count_fraction')
324    with open(extra_cwp_functions_groups_file, 'w') as output_file:
325      output_file.write('\n'.join(extra_cwp_functions_groups_lines))
326
327  def ExtractExtraFunctions(self, cwp_statistics, extra_cwp_functions_file):
328    """Gets the functions that are in the CWP data, but not in the pprof output.
329
330    Writes the functions and their statistics in the extra_cwp_functions_file
331    file. The output is sorted based on the inclusive_count value. The file is
332    in CSV format, containing the fields: function name, file name, object name,
333    inclusive count and inclusive count fraction.
334
335    Args:
336      cwp_statistics: A dict containing the CWP statistics indexed by the
337        function and the file name, comma separated.
338      extra_cwp_functions_file: The file where it should be stored the CWP
339        functions and statistics that are marked as EXTRA_FUNCTION.
340    """
341    output_lines = []
342
343    for function, statistics in cwp_statistics.iteritems():
344      if statistics[3] == utils.EXTRA_FUNCTION:
345        output_lines.append(','.join([function, statistics[0],
346                                      str(statistics[1]), str(statistics[2])]))
347
348    with open(extra_cwp_functions_file, 'w') as output_file:
349      output_lines.sort(key=lambda x: int(x.split(',')[3]), reverse=True)
350      output_lines.insert(0, 'function,file,dso,inclusive_count,'
351                          'inclusive_count_fraction')
352      output_file.write('\n'.join(output_lines))
353
354
355def ParseArguments(arguments):
356  parser = argparse.ArgumentParser()
357
358  parser.add_argument(
359      '--pprof_top_path',
360      required=True,
361      help='The directory containing the files with the pprof --top output of '
362      'the benchmark profiles (the hot functions). The name of the files '
363      'should match with the ones from the pprof tree output files.')
364  parser.add_argument(
365      '--pprof_tree_path',
366      required=True,
367      help='The directory containing the files with the pprof --tree output '
368      'of the benchmark profiles (the call chains). The name of the files '
369      'should match with the ones of the pprof top output files.')
370  parser.add_argument(
371      '--cwp_inclusive_count_file',
372      required=True,
373      help='The CSV file containing the CWP hot functions with their '
374      'inclusive_count values. The CSV fields include the name of the '
375      'function, the file and the object with the definition, the inclusive '
376      'count value and the inclusive count fraction out of the total amount of '
377      'inclusive count values.')
378  parser.add_argument(
379      '--cwp_pairwise_inclusive_count_file',
380      required=True,
381      help='The CSV file containing the CWP pairs of parent and child '
382      'functions with their inclusive count values. The CSV fields include the '
383      'name of the parent and child functions concatenated by ;;, the file '
384      'and the object with the definition of the child function, and the '
385      'inclusive count value.')
386  parser.add_argument(
387      '--cwp_function_groups_file',
388      required=True,
389      help='The file that contains the CWP function groups. A line consists in '
390      'the group name and a file path describing the group. A group must '
391      'represent a ChromeOS component.')
392  parser.add_argument(
393      '--common_functions_path',
394      required=True,
395      help='The directory containing the CSV output files with the common '
396      'functions of the benchmark profiles and CWP data. A file will contain '
397      'all the hot functions from a pprof top output file that are also '
398      'included in the file containing the cwp inclusive count values. The CSV '
399      'fields are: the function name, the file and the object where the '
400      'function is declared, the CWP inclusive count and inclusive count '
401      'fraction values, the cumulative and average distance, the cumulative '
402      'and average score. The files with the common functions will have the '
403      'same names with the corresponding pprof output files.')
404  parser.add_argument(
405      '--common_functions_groups_path',
406      required=True,
407      help='The directory containing the CSV output files with the Chrome OS '
408      'groups and their metrics that match the common functions of the '
409      'benchmark profiles and CWP. The files with the groups will have the '
410      'same names with the corresponding pprof output files. The CSV fields '
411      'include the group name, group path, the number of functions that match '
412      'the group, the average and cumulative distance, the average and '
413      'cumulative score.')
414  parser.add_argument(
415      '--benchmark_set_metrics_file',
416      required=True,
417      help='The CSV output file containing the metrics for each benchmark. The '
418      'CSV fields include the benchmark name, the number of common functions, '
419      'the average and cumulative distance and score.')
420  parser.add_argument(
421      '--extra_cwp_functions_file',
422      required=True,
423      help='The CSV output file containing the functions that are in the CWP '
424      'data, but are not in any of the benchmark profiles. The CSV fields '
425      'include the name of the function, the file name and the object with the '
426      'definition, and the CWP inclusive count and inclusive count fraction '
427      'values. The entries are sorted in descending order based on the '
428      'inclusive count value.')
429  parser.add_argument(
430      '--extra_cwp_functions_groups_file',
431      required=True,
432      help='The CSV output file containing the groups that match the extra CWP '
433      'functions and their statistics. The CSV fields include the group name, '
434      'the file path, the total inclusive count and inclusive count fraction '
435      'values of the functions matching a particular group.')
436  parser.add_argument(
437      '--extra_cwp_functions_groups_path',
438      required=True,
439      help='The directory containing the CSV output files with the extra CWP '
440      'functions that match a particular group. The name of the file is the '
441      'same as the group name. The CSV fields include the name of the '
442      'function, the file name and the object with the definition, and the CWP '
443      'inclusive count and inclusive count fraction values. The entries are '
444      'sorted in descending order based on the inclusive count value.')
445
446  options = parser.parse_args(arguments)
447
448  return options
449
450
451def Main(argv):
452  options = ParseArguments(argv)
453
454  if os.path.exists(options.common_functions_path):
455    shutil.rmtree(options.common_functions_path)
456
457  os.makedirs(options.common_functions_path)
458
459  if os.path.exists(options.common_functions_groups_path):
460    shutil.rmtree(options.common_functions_groups_path)
461
462  os.makedirs(options.common_functions_groups_path)
463
464  if os.path.exists(options.extra_cwp_functions_groups_path):
465    shutil.rmtree(options.extra_cwp_functions_groups_path)
466
467  os.makedirs(options.extra_cwp_functions_groups_path)
468
469  hot_functions_processor = HotFunctionsProcessor(
470      options.pprof_top_path, options.pprof_tree_path,
471      options.cwp_inclusive_count_file,
472      options.cwp_pairwise_inclusive_count_file,
473      options.cwp_function_groups_file, options.common_functions_path,
474      options.common_functions_groups_path, options.benchmark_set_metrics_file,
475      options.extra_cwp_functions_file, options.extra_cwp_functions_groups_file,
476      options.extra_cwp_functions_groups_path)
477
478  hot_functions_processor.ProcessHotFunctions()
479
480
481if __name__ == '__main__':
482  Main(sys.argv[1:])
483