1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3# Copyright 2018 The Chromium OS Authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6 7"""Script to redact apparent ICF'ed symbolsfrom textual AFDO profiles. 8 9AFDO sampling and ICF have an unfortunate interaction that causes a huge 10inflation in sample counts. Essentially, if you have N functions ICF'ed to the 11same location, one AFDO sample in any of those N functions will count as one 12sample in *each* of those N functions. 13 14In practice, there are a few forms of function bodies that are very heavily 15ICF'ed (e.g. `ret`, `xor %eax, %eax; ret`, destructors for widely-used types 16like std::map...). Recording 28,000 samples across all N thousand logical 17functions that point to the same body really hurts our AFDO numbers, given that 18our actual sample count across all of Chrome is something around 10,000,000. 19(No, really, these are actual numbers. In practice, at the time of writing, 20this script eliminates >90% of our AFDO samples by count. Sometimes as high as 2198%.) 22 23It reads a textual AFDO profile from stdin, and prints a 'fixed' version of it 24to stdout. A summary of what the script actually did is printed to stderr. 25""" 26 27from __future__ import division, print_function 28 29import collections 30import re 31import sys 32 33 34def _count_samples(samples): 35 """Count the total number of samples in a function.""" 36 line_re = re.compile(r'^(\s*)\d+(?:\.\d+)?: (\d+)\s*$') 37 38 top_level_samples = 0 39 all_samples = 0 40 for line in samples: 41 m = line_re.match(line) 42 if not m: 43 continue 44 45 spaces, n = m.groups() 46 n = int(n) 47 all_samples += n 48 if len(spaces) == 1: 49 top_level_samples += n 50 51 return top_level_samples, all_samples 52 53 54# A ProfileRecord is a set of samples for a top-level symbol in a textual AFDO 55# profile. function_line is the top line of said function, and `samples` is 56# a list of all of the sample lines below that. 57# 58# We rely on the format of these strings in some places in this script. For 59# reference, a full function sample will look something like: 60# 61# _ZNK5blink10PaintLayer19GetCompositingStateEv:4530:185 62# 6: 83 63# 15: 126 64# 62832: 126 65# 6: _ZNK5blink10PaintLayer14GroupedMappingEv:2349 66# 1: 206 67# 1: _ZNK5blink10PaintLayer14GroupedMappersEv:2060 68# 1: 206 69# 11: _ZNK5blink10PaintLayer25GetCompositedLayerMappingEv:800 70# 2.1: 80 71# 72# 73# In that case, function_line is 74# '_ZNK5blink10PaintLayer19GetCompositingStateEv:4530:185', and samples will be 75# every line below that. 76# 77# Function lines look like; 78# function_symbol:entry_count:dont_care 79# 80# And samples look like one of: 81# arbitrary_number: sample_count 82# arbitrary_number: inlined_function_symbol:inlined_entry_count 83ProfileRecord = collections.namedtuple('ProfileRecord', 84 ['function_line', 'samples']) 85 86 87def _normalize_samples(samples): 88 """Normalizes the samples in the given function body. 89 90 Normalization just means that we redact inlined function names. This is 91 done so that a bit of templating doesn't make two function bodies look 92 distinct. Namely: 93 94 template <typename T> 95 __attribute__((noinline)) 96 int getNumber() { return 1; } 97 98 template <typename T> 99 __attribute__((noinline)) 100 int getNumberIndirectly() { return getNumber<T>(); } 101 102 int main() { 103 return getNumber<int>() + getNumber<float>(); 104 } 105 106 If the profile has the mangled name for getNumber<float> in 107 getNumberIndirectly<float> (and similar for <int>), we'll consider them to 108 be distinct when they're not. 109 """ 110 111 # I'm not actually sure if this ends up being an issue in practice, but it's 112 # simple enough to guard against. 113 inlined_re = re.compile(r'(^\s*\d+): [^:]+:(\s*\d+)\s*$') 114 result = [] 115 for s in samples: 116 m = inlined_re.match(s) 117 if m: 118 result.append('%s: __REDACTED__:%s' % m.groups()) 119 else: 120 result.append(s) 121 return tuple(result) 122 123 124def _read_textual_afdo_profile(stream): 125 """Parses an AFDO profile from a line stream into ProfileRecords.""" 126 # ProfileRecords are actually nested, due to inlining. For the purpose of 127 # this script, that doesn't matter. 128 lines = (line.rstrip() for line in stream) 129 function_line = None 130 samples = [] 131 for line in lines: 132 if not line: 133 continue 134 135 if line[0].isspace(): 136 assert function_line is not None, 'sample exists outside of a function?' 137 samples.append(line) 138 continue 139 140 if function_line is not None: 141 yield ProfileRecord(function_line=function_line, samples=tuple(samples)) 142 function_line = line 143 samples = [] 144 145 if function_line is not None: 146 yield ProfileRecord(function_line=function_line, samples=tuple(samples)) 147 148 149# The default of 100 is arbitrarily selected, but it does make the overwhelming 150# majority of obvious sample duplication disappear. 151# 152# We experimented shortly with an nm-powered version of this script (rather 153# than structural matching, we'd see which functions mapped to the same literal 154# address). Running this with a high value (100) for max_repeats produced 155# results basically indistinguishable from nm, so ... 156# 157# Non-nm based approaches are superior because they don't require any prior 158# build artifacts; just an AFDO profile. 159def dedup_records(profile_records, summary_file, max_repeats=100): 160 """Removes heavily duplicated records from profile_records. 161 162 profile_records is expected to be an iterable of ProfileRecord. 163 max_repeats ia how many functions must share identical bodies for us to 164 consider it 'heavily duplicated' and remove the results. 165 """ 166 167 # Build a mapping of function structure -> list of functions with identical 168 # structure and sample counts 169 counts = collections.defaultdict(list) 170 for record in profile_records: 171 counts[_normalize_samples(record.samples)].append(record) 172 173 # Be sure that we didn't see any duplicate functions, since that's bad... 174 total_functions_recorded = sum(len(records) for records in counts.values()) 175 176 unique_function_names = { 177 record.function_line.split(':')[0] 178 for records in counts.values() 179 for record in records 180 } 181 182 assert len(unique_function_names) == total_functions_recorded, \ 183 'duplicate function names?' 184 185 num_kept = 0 186 num_samples_kept = 0 187 num_top_samples_kept = 0 188 num_total = 0 189 num_samples_total = 0 190 num_top_samples_total = 0 191 192 for normalized_samples, records in counts.items(): 193 top_sample_count, all_sample_count = _count_samples(normalized_samples) 194 top_sample_count *= len(records) 195 all_sample_count *= len(records) 196 197 num_total += len(records) 198 num_samples_total += all_sample_count 199 num_top_samples_total += top_sample_count 200 201 if len(records) >= max_repeats: 202 continue 203 204 num_kept += len(records) 205 num_samples_kept += all_sample_count 206 num_top_samples_kept += top_sample_count 207 for record in records: 208 yield record 209 210 print( 211 'Retained {:,}/{:,} functions'.format(num_kept, num_total), 212 file=summary_file) 213 print( 214 'Retained {:,}/{:,} samples, total'.format(num_samples_kept, 215 num_samples_total), 216 file=summary_file) 217 print('Retained {:,}/{:,} top-level samples' \ 218 .format(num_top_samples_kept, num_top_samples_total), 219 file=summary_file) 220 221 222def run(profile_input_file, summary_output_file, profile_output_file): 223 profile_records = _read_textual_afdo_profile(profile_input_file) 224 225 # Sort this so we get deterministic output. AFDO doesn't care what order it's 226 # in. 227 deduped = sorted( 228 dedup_records(profile_records, summary_output_file), 229 key=lambda r: r.function_line) 230 for function_line, samples in deduped: 231 print(function_line, file=profile_output_file) 232 print('\n'.join(samples), file=profile_output_file) 233 234 235def _main(): 236 run(profile_input_file=sys.stdin, 237 summary_output_file=sys.stderr, 238 profile_output_file=sys.stdout) 239 240 241if __name__ == '__main__': 242 _main() 243