1#!/usr/bin/env python 2# 3# Copyright (C) 2017 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""debug_unwind_reporter.py: report dwarf unwinding results generated by debug-unwind cmd. 19 Below is an example using debug_unwind_reporter.py: 20 1. Record with "-g --no-unwind" option on device. 21 simpleperf record -g --no-unwind --app com.google.sample.tunnel --duration 10 22 2. Use debug-unwind cmd to unwind samples in perf.data on device. 23 simpleperf debug-unwind 24 3. Pull perf.data.debug on host, and report it with this script. 25 python debug_unwind_reporter.py 26 27 It reports below items: 28 1. time used for offline dwarf unwinding for each sample. 29 2. mem consumption before and after unwinding samples. 30 3. For samples that don't have complete callchains, report their regs, stack data, 31 unwinding failure info, callchains. Samples having the same failure reason 32 in the same function are only reported once. 33 34 It can be used to: 35 1. show unwinding failures. 36 2. show performance and memory consumption change caused by simpleperf/unwinder changes. 37""" 38 39from __future__ import print_function 40import argparse 41import bisect 42import collections 43import copy 44import re 45import subprocess 46 47from utils import bytes_to_str, log_exit, log_fatal, get_host_binary_path 48 49 50class MapEntry(object): 51 52 def __init__(self, start, end, filename): 53 self.start = start 54 self.end = end 55 self.filename = filename 56 57 def __lt__(self, other): 58 return self.start < other.start 59 60class ProcessMaps(object): 61 62 def __init__(self): 63 self.process_maps = {} # map from pid to a sorted list of MapEntry. 64 65 def add(self, pid, map_entry): 66 old_list = self.process_maps.get(pid, []) 67 new_list = [] 68 map_entry_used = False 69 for entry in old_list: 70 if entry.end <= map_entry.start: 71 new_list.append(entry) 72 elif entry.start < map_entry.start: 73 entry.end = map_entry.start 74 new_list.append(entry) 75 else: 76 if not map_entry_used: 77 new_list.append(map_entry) 78 map_entry_used = True 79 if entry.start >= map_entry.end: 80 new_list.append(entry) 81 elif entry.end > map_entry.end: 82 entry.start = map_entry.end 83 new_list.append(entry) 84 if not map_entry_used: 85 new_list.append(map_entry) 86 self.process_maps[pid] = new_list 87 88 def fork_pid(self, pid, ppid): 89 if pid == ppid: 90 return 91 entry_list = self.process_maps.get(ppid, []) 92 self.process_maps[pid] = copy.deepcopy(entry_list) 93 94 def find(self, pid, addr): 95 key = MapEntry(addr, addr, '') 96 entry_list = self.process_maps.get(pid, []) 97 pos = bisect.bisect_right(entry_list, key) 98 if pos > 0 and entry_list[pos - 1].end > addr: 99 return entry_list[pos - 1] 100 return None 101 102 def show(self): 103 for pid in sorted(self.process_maps): 104 print(' pid %d' % pid) 105 for entry in self.process_maps[pid]: 106 print(' map [%x-%x] %s' % 107 (entry.start, entry.end, entry.filename)) 108 109 110class UnwindingTimes(object): 111 112 def __init__(self): 113 self.total_time = 0 114 self.count = 0 115 self.max_time = 0 116 117 def add_time(self, used_time): 118 self.total_time += used_time 119 self.count += 1 120 self.max_time = max(self.max_time, used_time) 121 122 123class UnwindingMemConsumption(object): 124 125 def __init__(self): 126 self.before_unwinding = None 127 self.after_unwinding = None 128 129 130class CallChainNode(object): 131 132 """ Representing a node in a call chain.""" 133 134 def __init__(self, ip, sp, filename, vaddr_in_file, function_name, map_start_addr, 135 map_end_addr): 136 self.ip = ip 137 self.sp = sp 138 self.filename = filename 139 self.vaddr_in_file = vaddr_in_file 140 self.function_name = function_name 141 self.map_start_addr = map_start_addr 142 self.map_end_addr = map_end_addr 143 144 145class SampleResult(object): 146 147 """ Unwinding result per sample. """ 148 149 def __init__(self, pid, tid, unwinding_result, callchain, sample_record): 150 self.pid = pid 151 self.tid = tid 152 self.unwinding_result = unwinding_result 153 self.callchain = callchain 154 self.sample_record = sample_record 155 156 def show(self): 157 print(' pid: %d' % self.pid) 158 print(' tid: %d' % self.tid) 159 for key, value in self.unwinding_result.items(): 160 print(' %s: %s' % (key, value)) 161 for i, node in enumerate(self.callchain): 162 print(' node %d: ip 0x%x, sp 0x%x, %s (%s[+%x]), map [%x-%x]' % ( 163 i, node.ip, node.sp, node.function_name, node.filename, node.vaddr_in_file, 164 node.map_start_addr, node.map_end_addr)) 165 if self.sample_record: 166 print(' original sample record:') 167 for line in self.sample_record: 168 print(' %s' % line) 169 170 171class FunctionResult(object): 172 173 """ Unwinding result per function. """ 174 175 def __init__(self): 176 # Map from Unwinding result reason to [SampleResult]. 177 self.sample_results = {} 178 179 def add_sample_result(self, sample_result): 180 stop_reason = sample_result.unwinding_result['stop_reason'] 181 result_list = self.sample_results.get(stop_reason) 182 if not result_list: 183 result_list = self.sample_results[stop_reason] = [] 184 for result in result_list: 185 if result.callchain[-1].vaddr_in_file == sample_result.callchain[-1].vaddr_in_file: 186 # This sample_result duplicates with an existing one. 187 return 188 # We don't want to store too many sample results for a function. 189 if len(result_list) < 10: 190 result_list.append(sample_result) 191 192 def show(self): 193 for stop_reason in sorted(self.sample_results): 194 for sample_result in self.sample_results[stop_reason]: 195 sample_result.show() 196 197 198class FileResult(object): 199 200 """ Unwinding result per shared library. """ 201 202 def __init__(self): 203 self.function_results = {} # Map from function_name to FunctionResult. 204 205 def add_sample_result(self, sample_result): 206 function_name = sample_result.callchain[-1].function_name 207 function_result = self.function_results.get(function_name) 208 if not function_result: 209 function_result = self.function_results[ 210 function_name] = FunctionResult() 211 function_result.add_sample_result(sample_result) 212 213 def show(self): 214 for function_name in sorted(self.function_results): 215 print(' function %s' % function_name) 216 self.function_results[function_name].show() 217 print('\n') 218 219 220class UnwindingResultErrorReport(object): 221 222 """ Report time used for unwinding and unwinding result errors. """ 223 224 def __init__(self, omit_callchains_fixed_by_joiner): 225 self.omit_callchains_fixed_by_joiner = omit_callchains_fixed_by_joiner 226 self.process_maps = ProcessMaps() 227 self.unwinding_times = UnwindingTimes() 228 self.mem_stat = UnwindingMemConsumption() 229 self.file_results = {} # map from filename to FileResult. 230 231 def add_sample_result(self, sample_result, joined_record): 232 self.unwinding_times.add_time(int(sample_result.unwinding_result['used_time'])) 233 if self.should_omit(sample_result, joined_record): 234 return 235 filename = sample_result.callchain[-1].filename 236 file_result = self.file_results.get(filename) 237 if not file_result: 238 file_result = self.file_results[filename] = FileResult() 239 file_result.add_sample_result(sample_result) 240 241 def add_mem_stat(self, name, mem_str): 242 # mem_str is like VmPeak:202464 kB;VmSize:183456 kB;VmHWM:98256 kB;VmRSS:33680 kB. 243 items = [] 244 for p in mem_str.split(';'): 245 key, value = p.split(':') 246 items.append((key, value)) 247 if name == 'debug_unwind_mem_before': 248 self.mem_stat.before_unwinding = items 249 else: 250 self.mem_stat.after_unwinding = items 251 252 def should_omit(self, sample_result, joined_record): 253 # 1. Can't unwind code generated in memory. 254 for name in ['/dev/ashmem/dalvik-jit-code-cache', '[anon:dalvik-jit-code-cache]', '//anon']: 255 if name in sample_result.callchain[-1].filename: 256 return True 257 # 2. Don't report complete callchains, which can reach __libc_init or __start_thread in 258 # libc.so. 259 def is_callchain_complete(callchain): 260 for node in callchain: 261 if (node.filename.endswith('libc.so') and 262 node.function_name in ['__libc_init', '__start_thread']): 263 return True 264 return False 265 if is_callchain_complete(sample_result.callchain): 266 return True 267 # 3. Omit callchains made complete by callchain joiner. 268 if self.omit_callchains_fixed_by_joiner and is_callchain_complete(joined_record.callchain): 269 return True 270 return False 271 272 def show(self): 273 print('Unwinding time info:') 274 print(' total time: %f ms' % (self.unwinding_times.total_time / 1e6)) 275 print(' unwinding count: %d' % self.unwinding_times.count) 276 if self.unwinding_times.count > 0: 277 print(' average time: %f us' % ( 278 self.unwinding_times.total_time / 1e3 / self.unwinding_times.count)) 279 print(' max time: %f us' % (self.unwinding_times.max_time / 1e3)) 280 print('Unwinding mem info:') 281 for items in zip(self.mem_stat.before_unwinding, self.mem_stat.after_unwinding): 282 assert items[0][0] == items[1][0] 283 print(' %s: %s -> %s' % (items[0][0], items[0][1], items[1][1])) 284 print('Process maps:') 285 self.process_maps.show() 286 for filename in sorted(self.file_results): 287 print('filename %s' % filename) 288 self.file_results[filename].show() 289 print('\n') 290 291 292class CallChainRecord(object): 293 """ Store data of a callchain record. """ 294 295 def __init__(self): 296 self.pid = None 297 self.tid = None 298 self.callchain = [] 299 300def parse_sample_record(lines, i): 301 """ Read the lines belong to a SampleRecord.""" 302 if i == len(lines) or not lines[i].startswith('record sample:'): 303 log_fatal('unexpected dump output near line %d' % i) 304 start_line = i 305 i += 1 306 while i < len(lines) and (not lines[i] or lines[i].startswith(' ')): 307 i += 1 308 return i, lines[start_line:i] 309 310def parse_callchain_record(lines, i, chain_type, process_maps): 311 if i == len(lines) or not lines[i].startswith('record callchain:'): 312 log_fatal('unexpected dump output near line %d' % i) 313 i += 1 314 record = CallChainRecord() 315 ips = [] 316 sps = [] 317 function_names = [] 318 filenames = [] 319 vaddr_in_files = [] 320 map_start_addrs = [] 321 map_end_addrs = [] 322 in_callchain = False 323 while i < len(lines) and not lines[i].startswith('record'): 324 line = lines[i].strip() 325 items = line.split() 326 if not items: 327 i += 1 328 continue 329 if items[0] == 'pid' and len(items) == 2: 330 record.pid = int(items[1]) 331 elif items[0] == 'tid' and len(items) == 2: 332 record.tid = int(items[1]) 333 elif items[0] == 'chain_type' and len(items) == 2: 334 if items[1] != chain_type: 335 log_fatal('unexpected dump output near line %d' % i) 336 elif items[0] == 'ip': 337 m = re.search(r'ip\s+0x(\w+),\s+sp\s+0x(\w+)$', line) 338 if m: 339 ips.append(int(m.group(1), 16)) 340 sps.append(int(m.group(2), 16)) 341 elif items[0] == 'callchain:': 342 in_callchain = True 343 elif in_callchain: 344 # "dalvik-jit-code-cache (deleted)[+346c] ([anon:dalvik-jit-code-cache] 345 # (deleted)[+346c])" 346 if re.search(r'\)\[\+\w+\]\)$', line): 347 break_pos = line.rfind('(', 0, line.rfind('(')) 348 else: 349 break_pos = line.rfind('(') 350 if break_pos > 0: 351 m = re.match(r'(.*)\[\+(\w+)\]\)', line[break_pos + 1:]) 352 if m: 353 function_names.append(line[:break_pos].strip()) 354 filenames.append(m.group(1)) 355 vaddr_in_files.append(int(m.group(2), 16)) 356 i += 1 357 358 for ip in ips: 359 map_entry = process_maps.find(record.pid, ip) 360 if map_entry: 361 map_start_addrs.append(map_entry.start) 362 map_end_addrs.append(map_entry.end) 363 else: 364 map_start_addrs.append(0) 365 map_end_addrs.append(0) 366 n = len(ips) 367 if (None in [record.pid, record.tid] or n == 0 or len(sps) != n or 368 len(function_names) != n or len(filenames) != n or len(vaddr_in_files) != n or 369 len(map_start_addrs) != n or len(map_end_addrs) != n): 370 log_fatal('unexpected dump output near line %d' % i) 371 for j in range(n): 372 record.callchain.append(CallChainNode(ips[j], sps[j], filenames[j], vaddr_in_files[j], 373 function_names[j], map_start_addrs[j], 374 map_end_addrs[j])) 375 return i, record 376 377 378def build_unwinding_result_report(args): 379 simpleperf_path = get_host_binary_path('simpleperf') 380 proc = subprocess.Popen([simpleperf_path, 'dump', args.record_file[0]], 381 stdout=subprocess.PIPE) 382 (stdoutdata, _) = proc.communicate() 383 stdoutdata = bytes_to_str(stdoutdata) 384 if 'debug_unwind = true' not in stdoutdata: 385 log_exit("Can't parse unwinding result. Because " + 386 "%s was not generated by the debug-unwind cmd." % args.record_file[0]) 387 unwinding_report = UnwindingResultErrorReport(args.omit_callchains_fixed_by_joiner) 388 process_maps = unwinding_report.process_maps 389 lines = stdoutdata.split('\n') 390 i = 0 391 while i < len(lines): 392 if lines[i].startswith('record mmap:') or lines[i].startswith('record mmap2:'): 393 i += 1 394 pid = None 395 start = None 396 end = None 397 filename = None 398 while i < len(lines) and not lines[i].startswith('record'): 399 if lines[i].startswith(' pid'): 400 m = re.search(r'pid\s+(\d+).+addr\s+0x(\w+).+len\s+0x(\w+)', lines[i]) 401 if m: 402 pid = int(m.group(1)) 403 start = int(m.group(2), 16) 404 end = start + int(m.group(3), 16) 405 elif 'filename' in lines[i]: 406 pos = lines[i].find('filename') + len('filename') 407 filename = lines[i][pos:].strip() 408 i += 1 409 if None in [pid, start, end, filename]: 410 log_fatal('unexpected dump output near line %d' % i) 411 process_maps.add(pid, MapEntry(start, end, filename)) 412 elif lines[i].startswith('record unwinding_result:'): 413 i += 1 414 unwinding_result = collections.OrderedDict() 415 while i < len(lines) and not lines[i].startswith('record'): 416 strs = (lines[i].strip()).split() 417 if len(strs) == 2: 418 unwinding_result[strs[0]] = strs[1] 419 i += 1 420 for key in ['time', 'used_time', 'stop_reason']: 421 if key not in unwinding_result: 422 log_fatal('unexpected dump output near line %d' % i) 423 424 i, sample_record = parse_sample_record(lines, i) 425 i, original_record = parse_callchain_record(lines, i, 'ORIGINAL_OFFLINE', process_maps) 426 i, joined_record = parse_callchain_record(lines, i, 'JOINED_OFFLINE', process_maps) 427 if args.omit_sample: 428 sample_record = [] 429 sample_result = SampleResult(original_record.pid, original_record.tid, 430 unwinding_result, original_record.callchain, 431 sample_record) 432 unwinding_report.add_sample_result(sample_result, joined_record) 433 elif lines[i].startswith('record fork:'): 434 i += 1 435 pid = None 436 ppid = None 437 while i < len(lines) and not lines[i].startswith('record'): 438 if lines[i].startswith(' pid'): 439 m = re.search(r'pid\s+(\w+),\s+ppid\s+(\w+)', lines[i]) 440 if m: 441 pid = int(m.group(1)) 442 ppid = int(m.group(2)) 443 i += 1 444 if None in [pid, ppid]: 445 log_fatal('unexpected dump output near line %d' % i) 446 process_maps.fork_pid(pid, ppid) 447 elif lines[i].startswith(' debug_unwind_mem'): 448 items = lines[i].strip().split(' = ') 449 if len(items) == 2: 450 unwinding_report.add_mem_stat(items[0], items[1]) 451 i += 1 452 else: 453 i += 1 454 return unwinding_report 455 456 457def main(): 458 parser = argparse.ArgumentParser( 459 description='Report dwarf unwinding results generated by the debug-unwind cmd.') 460 parser.add_argument('-i', '--record_file', nargs=1, default=['perf.data.debug'], help=""" 461 Set profiling data to report. Default is perf.data.debug.""") 462 parser.add_argument('--omit-callchains-fixed-by-joiner', action='store_true', help=""" 463 Don't show incomplete callchains fixed by callchain joiner.""") 464 parser.add_argument('--omit-sample', action='store_true', help="""Don't show original sample 465 records.""") 466 args = parser.parse_args() 467 report = build_unwinding_result_report(args) 468 report.show() 469 470if __name__ == '__main__': 471 main() 472