1#!/usr/bin/env python
2#
3# Copyright (C) 2017 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""debug_unwind_reporter.py: report dwarf unwinding results generated by debug-unwind cmd.
19    Below is an example using debug_unwind_reporter.py:
20    1. Record with "-g --no-unwind" option on device.
21       simpleperf record -g --no-unwind --app com.google.sample.tunnel --duration 10
22    2. Use debug-unwind cmd to unwind samples in perf.data on device.
23       simpleperf debug-unwind
24    3. Pull perf.data.debug on host, and report it with this script.
25       python debug_unwind_reporter.py
26
27    It reports below items:
28    1. time used for offline dwarf unwinding for each sample.
29    2. mem consumption before and after unwinding samples.
30    3. For samples that don't have complete callchains, report their regs, stack data,
31       unwinding failure info, callchains. Samples having the same failure reason
32       in the same function are only reported once.
33
34    It can be used to:
35    1. show unwinding failures.
36    2. show performance and memory consumption change caused by simpleperf/unwinder changes.
37"""
38
39from __future__ import print_function
40import argparse
41import bisect
42import collections
43import copy
44import re
45import subprocess
46
47from utils import bytes_to_str, log_exit, log_fatal, get_host_binary_path
48
49
50class MapEntry(object):
51
52    def __init__(self, start, end, filename):
53        self.start = start
54        self.end = end
55        self.filename = filename
56
57    def __lt__(self, other):
58        return self.start < other.start
59
60class ProcessMaps(object):
61
62    def __init__(self):
63        self.process_maps = {}  # map from pid to a sorted list of MapEntry.
64
65    def add(self, pid, map_entry):
66        old_list = self.process_maps.get(pid, [])
67        new_list = []
68        map_entry_used = False
69        for entry in old_list:
70            if entry.end <= map_entry.start:
71                new_list.append(entry)
72            elif entry.start < map_entry.start:
73                entry.end = map_entry.start
74                new_list.append(entry)
75            else:
76                if not map_entry_used:
77                    new_list.append(map_entry)
78                    map_entry_used = True
79                if entry.start >= map_entry.end:
80                    new_list.append(entry)
81                elif entry.end > map_entry.end:
82                    entry.start = map_entry.end
83                    new_list.append(entry)
84        if not map_entry_used:
85            new_list.append(map_entry)
86        self.process_maps[pid] = new_list
87
88    def fork_pid(self, pid, ppid):
89        if pid == ppid:
90            return
91        entry_list = self.process_maps.get(ppid, [])
92        self.process_maps[pid] = copy.deepcopy(entry_list)
93
94    def find(self, pid, addr):
95        key = MapEntry(addr, addr, '')
96        entry_list = self.process_maps.get(pid, [])
97        pos = bisect.bisect_right(entry_list, key)
98        if pos > 0 and entry_list[pos - 1].end > addr:
99            return entry_list[pos - 1]
100        return None
101
102    def show(self):
103        for pid in sorted(self.process_maps):
104            print('  pid %d' % pid)
105            for entry in self.process_maps[pid]:
106                print('    map [%x-%x] %s' %
107                      (entry.start, entry.end, entry.filename))
108
109
110class UnwindingTimes(object):
111
112    def __init__(self):
113        self.total_time = 0
114        self.count = 0
115        self.max_time = 0
116
117    def add_time(self, used_time):
118        self.total_time += used_time
119        self.count += 1
120        self.max_time = max(self.max_time, used_time)
121
122
123class UnwindingMemConsumption(object):
124
125    def __init__(self):
126        self.before_unwinding = None
127        self.after_unwinding = None
128
129
130class CallChainNode(object):
131
132    """ Representing a node in a call chain."""
133
134    def __init__(self, ip, sp, filename, vaddr_in_file, function_name, map_start_addr,
135                 map_end_addr):
136        self.ip = ip
137        self.sp = sp
138        self.filename = filename
139        self.vaddr_in_file = vaddr_in_file
140        self.function_name = function_name
141        self.map_start_addr = map_start_addr
142        self.map_end_addr = map_end_addr
143
144
145class SampleResult(object):
146
147    """ Unwinding result per sample. """
148
149    def __init__(self, pid, tid, unwinding_result, callchain, sample_record):
150        self.pid = pid
151        self.tid = tid
152        self.unwinding_result = unwinding_result
153        self.callchain = callchain
154        self.sample_record = sample_record
155
156    def show(self):
157        print('  pid: %d' % self.pid)
158        print('  tid: %d' % self.tid)
159        for key, value in self.unwinding_result.items():
160            print('  %s: %s' % (key, value))
161        for i, node in enumerate(self.callchain):
162            print('  node %d: ip 0x%x, sp 0x%x, %s (%s[+%x]), map [%x-%x]' % (
163                i, node.ip, node.sp, node.function_name, node.filename, node.vaddr_in_file,
164                node.map_start_addr, node.map_end_addr))
165        if self.sample_record:
166            print('  original sample record:')
167            for line in self.sample_record:
168                print('    %s' % line)
169
170
171class FunctionResult(object):
172
173    """ Unwinding result per function. """
174
175    def __init__(self):
176        # Map from Unwinding result reason to [SampleResult].
177        self.sample_results = {}
178
179    def add_sample_result(self, sample_result):
180        stop_reason = sample_result.unwinding_result['stop_reason']
181        result_list = self.sample_results.get(stop_reason)
182        if not result_list:
183            result_list = self.sample_results[stop_reason] = []
184        for result in result_list:
185            if result.callchain[-1].vaddr_in_file == sample_result.callchain[-1].vaddr_in_file:
186                # This sample_result duplicates with an existing one.
187                return
188        # We don't want to store too many sample results for a function.
189        if len(result_list) < 10:
190            result_list.append(sample_result)
191
192    def show(self):
193        for stop_reason in sorted(self.sample_results):
194            for sample_result in self.sample_results[stop_reason]:
195                sample_result.show()
196
197
198class FileResult(object):
199
200    """ Unwinding result per shared library. """
201
202    def __init__(self):
203        self.function_results = {}  # Map from function_name to FunctionResult.
204
205    def add_sample_result(self, sample_result):
206        function_name = sample_result.callchain[-1].function_name
207        function_result = self.function_results.get(function_name)
208        if not function_result:
209            function_result = self.function_results[
210                function_name] = FunctionResult()
211        function_result.add_sample_result(sample_result)
212
213    def show(self):
214        for function_name in sorted(self.function_results):
215            print('  function %s' % function_name)
216            self.function_results[function_name].show()
217            print('\n')
218
219
220class UnwindingResultErrorReport(object):
221
222    """ Report time used for unwinding and unwinding result errors. """
223
224    def __init__(self, omit_callchains_fixed_by_joiner):
225        self.omit_callchains_fixed_by_joiner = omit_callchains_fixed_by_joiner
226        self.process_maps = ProcessMaps()
227        self.unwinding_times = UnwindingTimes()
228        self.mem_stat = UnwindingMemConsumption()
229        self.file_results = {}  # map from filename to FileResult.
230
231    def add_sample_result(self, sample_result, joined_record):
232        self.unwinding_times.add_time(int(sample_result.unwinding_result['used_time']))
233        if self.should_omit(sample_result, joined_record):
234            return
235        filename = sample_result.callchain[-1].filename
236        file_result = self.file_results.get(filename)
237        if not file_result:
238            file_result = self.file_results[filename] = FileResult()
239        file_result.add_sample_result(sample_result)
240
241    def add_mem_stat(self, name, mem_str):
242        # mem_str is like VmPeak:202464 kB;VmSize:183456 kB;VmHWM:98256 kB;VmRSS:33680 kB.
243        items = []
244        for p in mem_str.split(';'):
245            key, value = p.split(':')
246            items.append((key, value))
247        if name == 'debug_unwind_mem_before':
248            self.mem_stat.before_unwinding = items
249        else:
250            self.mem_stat.after_unwinding = items
251
252    def should_omit(self, sample_result, joined_record):
253        # 1. Can't unwind code generated in memory.
254        for name in ['/dev/ashmem/dalvik-jit-code-cache', '[anon:dalvik-jit-code-cache]', '//anon']:
255            if name in sample_result.callchain[-1].filename:
256                return True
257        # 2. Don't report complete callchains, which can reach __libc_init or __start_thread in
258        # libc.so.
259        def is_callchain_complete(callchain):
260            for node in callchain:
261                if (node.filename.endswith('libc.so') and
262                        node.function_name in ['__libc_init', '__start_thread']):
263                    return True
264            return False
265        if is_callchain_complete(sample_result.callchain):
266            return True
267        # 3. Omit callchains made complete by callchain joiner.
268        if self.omit_callchains_fixed_by_joiner and is_callchain_complete(joined_record.callchain):
269            return True
270        return False
271
272    def show(self):
273        print('Unwinding time info:')
274        print('  total time: %f ms' % (self.unwinding_times.total_time / 1e6))
275        print('  unwinding count: %d' % self.unwinding_times.count)
276        if self.unwinding_times.count > 0:
277            print('  average time: %f us' % (
278                self.unwinding_times.total_time / 1e3 / self.unwinding_times.count))
279        print('  max time: %f us' % (self.unwinding_times.max_time / 1e3))
280        print('Unwinding mem info:')
281        for items in zip(self.mem_stat.before_unwinding, self.mem_stat.after_unwinding):
282            assert items[0][0] == items[1][0]
283            print('  %s: %s -> %s' % (items[0][0], items[0][1], items[1][1]))
284        print('Process maps:')
285        self.process_maps.show()
286        for filename in sorted(self.file_results):
287            print('filename %s' % filename)
288            self.file_results[filename].show()
289            print('\n')
290
291
292class CallChainRecord(object):
293    """ Store data of a callchain record. """
294
295    def __init__(self):
296        self.pid = None
297        self.tid = None
298        self.callchain = []
299
300def parse_sample_record(lines, i):
301    """ Read the lines belong to a SampleRecord."""
302    if i == len(lines) or not lines[i].startswith('record sample:'):
303        log_fatal('unexpected dump output near line %d' % i)
304    start_line = i
305    i += 1
306    while i < len(lines) and (not lines[i] or lines[i].startswith(' ')):
307        i += 1
308    return i, lines[start_line:i]
309
310def parse_callchain_record(lines, i, chain_type, process_maps):
311    if i == len(lines) or not lines[i].startswith('record callchain:'):
312        log_fatal('unexpected dump output near line %d' % i)
313    i += 1
314    record = CallChainRecord()
315    ips = []
316    sps = []
317    function_names = []
318    filenames = []
319    vaddr_in_files = []
320    map_start_addrs = []
321    map_end_addrs = []
322    in_callchain = False
323    while i < len(lines) and not lines[i].startswith('record'):
324        line = lines[i].strip()
325        items = line.split()
326        if not items:
327            i += 1
328            continue
329        if items[0] == 'pid' and len(items) == 2:
330            record.pid = int(items[1])
331        elif items[0] == 'tid' and len(items) == 2:
332            record.tid = int(items[1])
333        elif items[0] == 'chain_type' and len(items) == 2:
334            if items[1] != chain_type:
335                log_fatal('unexpected dump output near line %d' % i)
336        elif items[0] == 'ip':
337            m = re.search(r'ip\s+0x(\w+),\s+sp\s+0x(\w+)$', line)
338            if m:
339                ips.append(int(m.group(1), 16))
340                sps.append(int(m.group(2), 16))
341        elif items[0] == 'callchain:':
342            in_callchain = True
343        elif in_callchain:
344            # "dalvik-jit-code-cache (deleted)[+346c] ([anon:dalvik-jit-code-cache]
345            #  (deleted)[+346c])"
346            if re.search(r'\)\[\+\w+\]\)$', line):
347                break_pos = line.rfind('(', 0, line.rfind('('))
348            else:
349                break_pos = line.rfind('(')
350            if break_pos > 0:
351                m = re.match(r'(.*)\[\+(\w+)\]\)', line[break_pos + 1:])
352                if m:
353                    function_names.append(line[:break_pos].strip())
354                    filenames.append(m.group(1))
355                    vaddr_in_files.append(int(m.group(2), 16))
356        i += 1
357
358    for ip in ips:
359        map_entry = process_maps.find(record.pid, ip)
360        if map_entry:
361            map_start_addrs.append(map_entry.start)
362            map_end_addrs.append(map_entry.end)
363        else:
364            map_start_addrs.append(0)
365            map_end_addrs.append(0)
366    n = len(ips)
367    if (None in [record.pid, record.tid] or n == 0 or len(sps) != n or
368            len(function_names) != n or len(filenames) != n or len(vaddr_in_files) != n or
369            len(map_start_addrs) != n or len(map_end_addrs) != n):
370        log_fatal('unexpected dump output near line %d' % i)
371    for j in range(n):
372        record.callchain.append(CallChainNode(ips[j], sps[j], filenames[j], vaddr_in_files[j],
373                                              function_names[j], map_start_addrs[j],
374                                              map_end_addrs[j]))
375    return i, record
376
377
378def build_unwinding_result_report(args):
379    simpleperf_path = get_host_binary_path('simpleperf')
380    proc = subprocess.Popen([simpleperf_path, 'dump', args.record_file[0]],
381                            stdout=subprocess.PIPE)
382    (stdoutdata, _) = proc.communicate()
383    stdoutdata = bytes_to_str(stdoutdata)
384    if 'debug_unwind = true' not in stdoutdata:
385        log_exit("Can't parse unwinding result. Because " +
386                 "%s was not generated by the debug-unwind cmd." % args.record_file[0])
387    unwinding_report = UnwindingResultErrorReport(args.omit_callchains_fixed_by_joiner)
388    process_maps = unwinding_report.process_maps
389    lines = stdoutdata.split('\n')
390    i = 0
391    while i < len(lines):
392        if lines[i].startswith('record mmap:') or lines[i].startswith('record mmap2:'):
393            i += 1
394            pid = None
395            start = None
396            end = None
397            filename = None
398            while i < len(lines) and not lines[i].startswith('record'):
399                if lines[i].startswith('  pid'):
400                    m = re.search(r'pid\s+(\d+).+addr\s+0x(\w+).+len\s+0x(\w+)', lines[i])
401                    if m:
402                        pid = int(m.group(1))
403                        start = int(m.group(2), 16)
404                        end = start + int(m.group(3), 16)
405                elif 'filename' in lines[i]:
406                    pos = lines[i].find('filename') + len('filename')
407                    filename = lines[i][pos:].strip()
408                i += 1
409            if None in [pid, start, end, filename]:
410                log_fatal('unexpected dump output near line %d' % i)
411            process_maps.add(pid, MapEntry(start, end, filename))
412        elif lines[i].startswith('record unwinding_result:'):
413            i += 1
414            unwinding_result = collections.OrderedDict()
415            while i < len(lines) and not lines[i].startswith('record'):
416                strs = (lines[i].strip()).split()
417                if len(strs) == 2:
418                    unwinding_result[strs[0]] = strs[1]
419                i += 1
420            for key in ['time', 'used_time', 'stop_reason']:
421                if key not in unwinding_result:
422                    log_fatal('unexpected dump output near line %d' % i)
423
424            i, sample_record = parse_sample_record(lines, i)
425            i, original_record = parse_callchain_record(lines, i, 'ORIGINAL_OFFLINE', process_maps)
426            i, joined_record = parse_callchain_record(lines, i, 'JOINED_OFFLINE', process_maps)
427            if args.omit_sample:
428                sample_record = []
429            sample_result = SampleResult(original_record.pid, original_record.tid,
430                                         unwinding_result, original_record.callchain,
431                                         sample_record)
432            unwinding_report.add_sample_result(sample_result, joined_record)
433        elif lines[i].startswith('record fork:'):
434            i += 1
435            pid = None
436            ppid = None
437            while i < len(lines) and not lines[i].startswith('record'):
438                if lines[i].startswith('  pid'):
439                    m = re.search(r'pid\s+(\w+),\s+ppid\s+(\w+)', lines[i])
440                    if m:
441                        pid = int(m.group(1))
442                        ppid = int(m.group(2))
443                i += 1
444            if None in [pid, ppid]:
445                log_fatal('unexpected dump output near line %d' % i)
446            process_maps.fork_pid(pid, ppid)
447        elif lines[i].startswith('    debug_unwind_mem'):
448            items = lines[i].strip().split(' = ')
449            if len(items) == 2:
450                unwinding_report.add_mem_stat(items[0], items[1])
451            i += 1
452        else:
453            i += 1
454    return unwinding_report
455
456
457def main():
458    parser = argparse.ArgumentParser(
459        description='Report dwarf unwinding results generated by the debug-unwind cmd.')
460    parser.add_argument('-i', '--record_file', nargs=1, default=['perf.data.debug'], help="""
461                        Set profiling data to report. Default is perf.data.debug.""")
462    parser.add_argument('--omit-callchains-fixed-by-joiner', action='store_true', help="""
463                        Don't show incomplete callchains fixed by callchain joiner.""")
464    parser.add_argument('--omit-sample', action='store_true', help="""Don't show original sample
465                        records.""")
466    args = parser.parse_args()
467    report = build_unwinding_result_report(args)
468    report.show()
469
470if __name__ == '__main__':
471    main()
472