1#!/usr/bin/python2 2 3# Copyright 2016 The Chromium OS Authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6"""Processes the functions from the pprof(go/pprof) files and CWP(go/cwp) data. 7 8The pprof --top and pprof --tree outputs should be extracted from the benchmark 9profiles. The outputs contain the hot functions and the call chains. 10 11For each pair of pprof --top and --tree output files, the tool will create a 12file that contains the hot functions present also in the extracted CWP data. 13The common functions are organized in groups that represent a Chrome OS 14component. A function belongs to a group that is defined by a given file path 15if it is declared in a file that shares that path. 16 17A set of metrics are computed for each function, benchmark and Chrome OS group 18covered by a benchmark. 19 20Afterwards, this script extracts the functions that are present in the CWP 21data and not in the benchmark profiles. The extra functions are also groupped 22in Chrome OS components. 23""" 24 25from collections import defaultdict 26 27import argparse 28import os 29import shutil 30import sys 31 32import benchmark_metrics 33import utils 34 35 36class HotFunctionsProcessor(object): 37 """Does the pprof and CWP output processing. 38 39 Extracts the common, extra functions from the pprof files, groups them in 40 Chrome OS components. Computes the metrics for the common functions, 41 benchmark and Chrome OS groups covered by a benchmark. 42 """ 43 44 def __init__(self, pprof_top_path, pprof_tree_path, cwp_inclusive_count_file, 45 cwp_pairwise_inclusive_count_file, cwp_function_groups_file, 46 common_functions_path, common_functions_groups_path, 47 benchmark_set_metrics_file, extra_cwp_functions_file, 48 extra_cwp_functions_groups_file, 49 extra_cwp_functions_groups_path): 50 """Initializes the HotFunctionsProcessor. 51 52 Args: 53 pprof_top_path: The directory containing the files with the pprof --top 54 output. 55 pprof_tree_path: The directory containing the files with the pprof --tree 56 output. 57 cwp_inclusive_count_file: The CSV file containing the CWP functions with 58 the inclusive count values. 59 cwp_pairwise_inclusive_count_file: The CSV file containing the CWP pairs 60 of parent and child functions with their inclusive count values. 61 cwp_function_groups_file: The file that contains the CWP function groups. 62 common_functions_path: The directory containing the CSV output files 63 with the common functions of the benchmark profiles and CWP data. 64 common_functions_groups_path: The directory containing the CSV output 65 files with the CWP groups and their metrics that match the common 66 functions of the benchmark profiles and CWP. 67 benchmark_set_metrics_file: The CSV output file containing the metrics for 68 each benchmark. 69 extra_cwp_functions_file: The CSV output file containing the functions 70 that are in the CWP data, but are not in any of the benchmark profiles. 71 extra_cwp_functions_groups_file: The CSV output file containing the groups 72 that match the extra CWP functions and their statistics. 73 extra_cwp_functions_groups_path: The directory containing the CSV output 74 files with the extra CWP functions that match a particular group. 75 """ 76 self._pprof_top_path = pprof_top_path 77 self._pprof_tree_path = pprof_tree_path 78 self._cwp_inclusive_count_file = cwp_inclusive_count_file 79 self._cwp_pairwise_inclusive_count_file = cwp_pairwise_inclusive_count_file 80 self._cwp_function_groups_file = cwp_function_groups_file 81 self._common_functions_path = common_functions_path 82 self._common_functions_groups_path = common_functions_groups_path 83 self._benchmark_set_metrics_file = benchmark_set_metrics_file 84 self._extra_cwp_functions_file = extra_cwp_functions_file 85 self._extra_cwp_functions_groups_file = extra_cwp_functions_groups_file 86 self._extra_cwp_functions_groups_path = extra_cwp_functions_groups_path 87 88 def ProcessHotFunctions(self): 89 """Does the processing of the hot functions.""" 90 with open(self._cwp_function_groups_file) as input_file: 91 cwp_function_groups = utils.ParseFunctionGroups(input_file.readlines()) 92 cwp_statistics = \ 93 self.ExtractCommonFunctions(self._pprof_top_path, 94 self._pprof_tree_path, 95 self._cwp_inclusive_count_file, 96 self._cwp_pairwise_inclusive_count_file, 97 cwp_function_groups, 98 self._common_functions_path, 99 self._common_functions_groups_path, 100 self._benchmark_set_metrics_file) 101 self.ExtractExtraFunctions(cwp_statistics, self._extra_cwp_functions_file) 102 self.GroupExtraFunctions(cwp_statistics, cwp_function_groups, 103 self._extra_cwp_functions_groups_path, 104 self._extra_cwp_functions_groups_file) 105 106 def ExtractCommonFunctions(self, pprof_top_path, pprof_tree_path, 107 cwp_inclusive_count_file, 108 cwp_pairwise_inclusive_count_file, 109 cwp_function_groups, common_functions_path, 110 common_functions_groups_path, 111 benchmark_set_metrics_file): 112 """Extracts the common functions of the benchmark profiles and the CWP data. 113 114 For each pair of pprof --top and --tree output files, it creates a separate 115 file with the same name containing the common functions specifications and 116 metrics, that will be placed in the common_functions_path directory. 117 118 The resulting file is in CSV format, containing the following fields: 119 function name, file name, object, inclusive count, inclusive_count_fraction, 120 flat, flat%, sum%, cum, cum%, distance and score. 121 122 For each pair of pprof files, an additional file is created with the 123 Chrome OS groups that match the common functions. 124 125 The file is in CSV format containing the fields: group name, group path, 126 the number of functions that match the group, the average and cumulative 127 distance, the average and cumulative score. 128 The file has the same name with the pprof file and it is placed in the 129 common_functions_groups_path directory. 130 131 For all the analyzed benchmarks, the method creates a CSV output file 132 containing the metrics for each benchmark. The CSV fields include the 133 benchmark name, the number of common functions, the average and 134 cumulative distance and score. 135 136 It builds a dict of the CWP statistics by calling the 137 utils.ParseCWPInclusiveCountFile method and if a function is common, it is 138 marked as a COMMON_FUNCTION. 139 140 Args: 141 pprof_top_path: The name of the directory with the files with the 142 pprof --top output. 143 pprof_tree_path: The name of the directory with the files with the 144 pprof --tree output. 145 cwp_inclusive_count_file: A dict with the inclusive count values. 146 cwp_pairwise_inclusive_count_file: A dict with the pairwise inclusive 147 count values. 148 cwp_function_groups: A list of tuples containing the name of the group 149 and the corresponding file path. 150 common_functions_path: The path containing the output files with the 151 common functions and their metrics. 152 common_functions_groups_path: The path containing the output files with 153 the Chrome OS groups that match the common functions and their metrics. 154 benchmark_set_metrics_file: The CSV output file containing the metrics for 155 all the analyzed benchmarks. 156 157 Returns: 158 A dict containing the CWP statistics with the common functions marked as 159 COMMON_FUNCTION. 160 """ 161 cwp_inclusive_count_statistics = \ 162 utils.ParseCWPInclusiveCountFile(cwp_inclusive_count_file) 163 cwp_pairwise_inclusive_count_statistics = \ 164 utils.ParseCWPPairwiseInclusiveCountFile( 165 cwp_pairwise_inclusive_count_file) 166 cwp_inclusive_count_statistics_cumulative = \ 167 utils.ComputeCWPCummulativeInclusiveStatistics( 168 cwp_inclusive_count_statistics) 169 cwp_pairwise_inclusive_count_fractions = \ 170 utils.ComputeCWPChildFunctionsFractions( 171 cwp_inclusive_count_statistics_cumulative, 172 cwp_pairwise_inclusive_count_statistics) 173 benchmark_set_metrics = {} 174 pprof_files = os.listdir(pprof_top_path) 175 176 for pprof_file in pprof_files: 177 pprof_top_statistics = \ 178 utils.ParsePprofTopOutput(os.path.join(pprof_top_path, pprof_file)) 179 pprof_tree_statistics = \ 180 utils.ParsePprofTreeOutput(os.path.join(pprof_tree_path, pprof_file)) 181 common_functions_lines = [] 182 benchmark_function_metrics = {} 183 184 for function_key, function_statistic in pprof_top_statistics.iteritems(): 185 if function_key not in cwp_inclusive_count_statistics: 186 continue 187 188 cwp_dso_name, cwp_inclusive_count, cwp_inclusive_count_fraction, _ = \ 189 cwp_inclusive_count_statistics[function_key] 190 cwp_inclusive_count_statistics[function_key] = \ 191 (cwp_dso_name, cwp_inclusive_count, cwp_inclusive_count_fraction, 192 utils.COMMON_FUNCTION) 193 194 function_name, _ = function_key.split(',') 195 distance = benchmark_metrics.ComputeDistanceForFunction( 196 pprof_tree_statistics[function_key], 197 cwp_pairwise_inclusive_count_fractions.get(function_name, {})) 198 benchmark_cum_p = float(function_statistic[4]) 199 score = benchmark_metrics.ComputeScoreForFunction( 200 distance, cwp_inclusive_count_fraction, benchmark_cum_p) 201 benchmark_function_metrics[function_key] = (distance, score) 202 203 common_functions_lines.append(','.join([function_key, cwp_dso_name, str( 204 cwp_inclusive_count), str(cwp_inclusive_count_fraction), ','.join( 205 function_statistic), str(distance), str(score)])) 206 benchmark_function_groups_statistics = \ 207 benchmark_metrics.ComputeMetricsForComponents( 208 cwp_function_groups, benchmark_function_metrics) 209 benchmark_set_metrics[pprof_file] = \ 210 benchmark_metrics.ComputeMetricsForBenchmark( 211 benchmark_function_metrics) 212 213 with open(os.path.join(common_functions_path, pprof_file), 'w') \ 214 as output_file: 215 common_functions_lines.sort( 216 key=lambda x: float(x.split(',')[11]), reverse=True) 217 common_functions_lines.insert(0, 'function,file,dso,inclusive_count,' 218 'inclusive_count_fraction,flat,flat%,' 219 'sum%,cum,cum%,distance,score') 220 output_file.write('\n'.join(common_functions_lines)) 221 222 with open(os.path.join(common_functions_groups_path, pprof_file), 'w') \ 223 as output_file: 224 common_functions_groups_lines = \ 225 [','.join([group_name, ','.join( 226 [str(statistic) for statistic in group_statistic])]) 227 for group_name, group_statistic in 228 benchmark_function_groups_statistics.iteritems()] 229 common_functions_groups_lines.sort( 230 key=lambda x: float(x.split(',')[5]), reverse=True) 231 common_functions_groups_lines.insert( 232 0, 'group_name,file_path,number_of_functions,distance_cum,' 233 'distance_avg,score_cum,score_avg') 234 output_file.write('\n'.join(common_functions_groups_lines)) 235 236 with open(benchmark_set_metrics_file, 'w') as output_file: 237 benchmark_set_metrics_lines = [] 238 239 for benchmark_name, metrics in benchmark_set_metrics.iteritems(): 240 benchmark_set_metrics_lines.append(','.join([benchmark_name, ','.join( 241 [str(metric) for metric in metrics])])) 242 benchmark_set_metrics_lines.sort( 243 key=lambda x: float(x.split(',')[4]), reverse=True) 244 benchmark_set_metrics_lines.insert( 245 0, 'benchmark_name,number_of_functions,distance_cum,distance_avg,' 246 'score_cum,score_avg') 247 output_file.write('\n'.join(benchmark_set_metrics_lines)) 248 249 return cwp_inclusive_count_statistics 250 251 def GroupExtraFunctions(self, cwp_statistics, cwp_function_groups, 252 extra_cwp_functions_groups_path, 253 extra_cwp_functions_groups_file): 254 """Groups the extra functions. 255 256 Writes the data of the functions that belong to each group in a separate 257 file, sorted by their inclusive count value, in descending order. The file 258 name is the same as the group name. 259 260 The file is in CSV format, containing the fields: function name, file name, 261 object name, inclusive count, inclusive count fraction. 262 263 It creates a CSV file containing the name of the group, their 264 common path, the total inclusive count and inclusive count fraction values 265 of all the functions declared in files that share the common path, sorted 266 in descending order by the inclusive count value. 267 268 Args: 269 cwp_statistics: A dict containing the CWP statistics. 270 cwp_function_groups: A list of tuples with the groups names and the path 271 describing the groups. 272 extra_cwp_functions_groups_path: The name of the directory containing 273 the CSV output files with the extra CWP functions that match a 274 particular group. 275 extra_cwp_functions_groups_file: The CSV output file containing the groups 276 that match the extra functions and their statistics. 277 """ 278 cwp_function_groups_statistics = defaultdict(lambda: ([], '', 0, 0.0)) 279 for function, statistics in cwp_statistics.iteritems(): 280 if statistics[3] == utils.COMMON_FUNCTION: 281 continue 282 283 file_name = function.split(',')[1] 284 group_inclusive_count = int(statistics[1]) 285 group_inclusive_count_fraction = float(statistics[2]) 286 287 for group in cwp_function_groups: 288 group_common_path = group[1] 289 290 if group_common_path not in file_name: 291 continue 292 293 group_name = group[0] 294 group_statistics = cwp_function_groups_statistics[group_name] 295 group_lines = group_statistics[0] 296 group_inclusive_count += group_statistics[2] 297 group_inclusive_count_fraction += group_statistics[3] 298 299 group_lines.append(','.join([function, statistics[0], 300 str(statistics[1]), str(statistics[2])])) 301 cwp_function_groups_statistics[group_name] = \ 302 (group_lines, group_common_path, group_inclusive_count, 303 group_inclusive_count_fraction) 304 break 305 306 extra_cwp_functions_groups_lines = [] 307 for group_name, group_statistics \ 308 in cwp_function_groups_statistics.iteritems(): 309 group_output_lines = group_statistics[0] 310 group_output_lines.sort(key=lambda x: int(x.split(',')[3]), reverse=True) 311 group_output_lines.insert( 312 0, 'function,file,dso,inclusive_count,inclusive_count_fraction') 313 with open(os.path.join(extra_cwp_functions_groups_path, group_name), 314 'w') as output_file: 315 output_file.write('\n'.join(group_output_lines)) 316 extra_cwp_functions_groups_lines.append(','.join( 317 [group_name, group_statistics[1], str(group_statistics[2]), str( 318 group_statistics[3])])) 319 320 extra_cwp_functions_groups_lines.sort( 321 key=lambda x: int(x.split(',')[2]), reverse=True) 322 extra_cwp_functions_groups_lines.insert( 323 0, 'group,shared_path,inclusive_count,inclusive_count_fraction') 324 with open(extra_cwp_functions_groups_file, 'w') as output_file: 325 output_file.write('\n'.join(extra_cwp_functions_groups_lines)) 326 327 def ExtractExtraFunctions(self, cwp_statistics, extra_cwp_functions_file): 328 """Gets the functions that are in the CWP data, but not in the pprof output. 329 330 Writes the functions and their statistics in the extra_cwp_functions_file 331 file. The output is sorted based on the inclusive_count value. The file is 332 in CSV format, containing the fields: function name, file name, object name, 333 inclusive count and inclusive count fraction. 334 335 Args: 336 cwp_statistics: A dict containing the CWP statistics indexed by the 337 function and the file name, comma separated. 338 extra_cwp_functions_file: The file where it should be stored the CWP 339 functions and statistics that are marked as EXTRA_FUNCTION. 340 """ 341 output_lines = [] 342 343 for function, statistics in cwp_statistics.iteritems(): 344 if statistics[3] == utils.EXTRA_FUNCTION: 345 output_lines.append(','.join([function, statistics[0], 346 str(statistics[1]), str(statistics[2])])) 347 348 with open(extra_cwp_functions_file, 'w') as output_file: 349 output_lines.sort(key=lambda x: int(x.split(',')[3]), reverse=True) 350 output_lines.insert(0, 'function,file,dso,inclusive_count,' 351 'inclusive_count_fraction') 352 output_file.write('\n'.join(output_lines)) 353 354 355def ParseArguments(arguments): 356 parser = argparse.ArgumentParser() 357 358 parser.add_argument( 359 '--pprof_top_path', 360 required=True, 361 help='The directory containing the files with the pprof --top output of ' 362 'the benchmark profiles (the hot functions). The name of the files ' 363 'should match with the ones from the pprof tree output files.') 364 parser.add_argument( 365 '--pprof_tree_path', 366 required=True, 367 help='The directory containing the files with the pprof --tree output ' 368 'of the benchmark profiles (the call chains). The name of the files ' 369 'should match with the ones of the pprof top output files.') 370 parser.add_argument( 371 '--cwp_inclusive_count_file', 372 required=True, 373 help='The CSV file containing the CWP hot functions with their ' 374 'inclusive_count values. The CSV fields include the name of the ' 375 'function, the file and the object with the definition, the inclusive ' 376 'count value and the inclusive count fraction out of the total amount of ' 377 'inclusive count values.') 378 parser.add_argument( 379 '--cwp_pairwise_inclusive_count_file', 380 required=True, 381 help='The CSV file containing the CWP pairs of parent and child ' 382 'functions with their inclusive count values. The CSV fields include the ' 383 'name of the parent and child functions concatenated by ;;, the file ' 384 'and the object with the definition of the child function, and the ' 385 'inclusive count value.') 386 parser.add_argument( 387 '--cwp_function_groups_file', 388 required=True, 389 help='The file that contains the CWP function groups. A line consists in ' 390 'the group name and a file path describing the group. A group must ' 391 'represent a ChromeOS component.') 392 parser.add_argument( 393 '--common_functions_path', 394 required=True, 395 help='The directory containing the CSV output files with the common ' 396 'functions of the benchmark profiles and CWP data. A file will contain ' 397 'all the hot functions from a pprof top output file that are also ' 398 'included in the file containing the cwp inclusive count values. The CSV ' 399 'fields are: the function name, the file and the object where the ' 400 'function is declared, the CWP inclusive count and inclusive count ' 401 'fraction values, the cumulative and average distance, the cumulative ' 402 'and average score. The files with the common functions will have the ' 403 'same names with the corresponding pprof output files.') 404 parser.add_argument( 405 '--common_functions_groups_path', 406 required=True, 407 help='The directory containing the CSV output files with the Chrome OS ' 408 'groups and their metrics that match the common functions of the ' 409 'benchmark profiles and CWP. The files with the groups will have the ' 410 'same names with the corresponding pprof output files. The CSV fields ' 411 'include the group name, group path, the number of functions that match ' 412 'the group, the average and cumulative distance, the average and ' 413 'cumulative score.') 414 parser.add_argument( 415 '--benchmark_set_metrics_file', 416 required=True, 417 help='The CSV output file containing the metrics for each benchmark. The ' 418 'CSV fields include the benchmark name, the number of common functions, ' 419 'the average and cumulative distance and score.') 420 parser.add_argument( 421 '--extra_cwp_functions_file', 422 required=True, 423 help='The CSV output file containing the functions that are in the CWP ' 424 'data, but are not in any of the benchmark profiles. The CSV fields ' 425 'include the name of the function, the file name and the object with the ' 426 'definition, and the CWP inclusive count and inclusive count fraction ' 427 'values. The entries are sorted in descending order based on the ' 428 'inclusive count value.') 429 parser.add_argument( 430 '--extra_cwp_functions_groups_file', 431 required=True, 432 help='The CSV output file containing the groups that match the extra CWP ' 433 'functions and their statistics. The CSV fields include the group name, ' 434 'the file path, the total inclusive count and inclusive count fraction ' 435 'values of the functions matching a particular group.') 436 parser.add_argument( 437 '--extra_cwp_functions_groups_path', 438 required=True, 439 help='The directory containing the CSV output files with the extra CWP ' 440 'functions that match a particular group. The name of the file is the ' 441 'same as the group name. The CSV fields include the name of the ' 442 'function, the file name and the object with the definition, and the CWP ' 443 'inclusive count and inclusive count fraction values. The entries are ' 444 'sorted in descending order based on the inclusive count value.') 445 446 options = parser.parse_args(arguments) 447 448 return options 449 450 451def Main(argv): 452 options = ParseArguments(argv) 453 454 if os.path.exists(options.common_functions_path): 455 shutil.rmtree(options.common_functions_path) 456 457 os.makedirs(options.common_functions_path) 458 459 if os.path.exists(options.common_functions_groups_path): 460 shutil.rmtree(options.common_functions_groups_path) 461 462 os.makedirs(options.common_functions_groups_path) 463 464 if os.path.exists(options.extra_cwp_functions_groups_path): 465 shutil.rmtree(options.extra_cwp_functions_groups_path) 466 467 os.makedirs(options.extra_cwp_functions_groups_path) 468 469 hot_functions_processor = HotFunctionsProcessor( 470 options.pprof_top_path, options.pprof_tree_path, 471 options.cwp_inclusive_count_file, 472 options.cwp_pairwise_inclusive_count_file, 473 options.cwp_function_groups_file, options.common_functions_path, 474 options.common_functions_groups_path, options.benchmark_set_metrics_file, 475 options.extra_cwp_functions_file, options.extra_cwp_functions_groups_file, 476 options.extra_cwp_functions_groups_path) 477 478 hot_functions_processor.ProcessHotFunctions() 479 480 481if __name__ == '__main__': 482 Main(sys.argv[1:]) 483