1#!/usr/bin/env python3
2#
3# Copyright (C) 2016 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""annotate.py: annotate source files based on perf.data.
19"""
20
21
22import argparse
23import os
24import os.path
25import shutil
26
27from simpleperf_report_lib import ReportLib
28from simpleperf_utils import (
29    Addr2Nearestline, BinaryFinder, extant_dir, flatten_arg_list, is_windows, log_exit, log_info,
30    log_warning, ReadElf, SourceFileSearcher)
31
32
33class SourceLine(object):
34    def __init__(self, file_id, function, line):
35        self.file = file_id
36        self.function = function
37        self.line = line
38
39    @property
40    def file_key(self):
41        return self.file
42
43    @property
44    def function_key(self):
45        return (self.file, self.function)
46
47    @property
48    def line_key(self):
49        return (self.file, self.line)
50
51
52class Addr2Line(object):
53    """collect information of how to map [dso_name, vaddr] to [source_file:line].
54    """
55
56    def __init__(self, ndk_path, binary_cache_path, source_dirs):
57        binary_finder = BinaryFinder(binary_cache_path, ReadElf(ndk_path))
58        self.addr2line = Addr2Nearestline(ndk_path, binary_finder, True)
59        self.source_searcher = SourceFileSearcher(source_dirs)
60
61    def add_addr(self, dso_path: str, build_id: str, func_addr: int, addr: int):
62        self.addr2line.add_addr(dso_path, func_addr, addr)
63
64    def convert_addrs_to_lines(self):
65        self.addr2line.convert_addrs_to_lines()
66
67    def get_sources(self, dso_path, addr):
68        dso = self.addr2line.get_dso(dso_path)
69        if not dso:
70            return []
71        source = self.addr2line.get_addr_source(dso, addr)
72        if not source:
73            return []
74        result = []
75        for (source_file, source_line, function_name) in source:
76            source_file_path = self.source_searcher.get_real_path(source_file)
77            if not source_file_path:
78                source_file_path = source_file
79            result.append(SourceLine(source_file_path, function_name, source_line))
80        return result
81
82
83class Period(object):
84    """event count information. It can be used to represent event count
85       of a line, a function, a source file, or a binary. It contains two
86       parts: period and acc_period.
87       When used for a line, period is the event count occurred when running
88       that line, acc_period is the accumulated event count occurred when
89       running that line and functions called by that line. Same thing applies
90       when it is used for a function, a source file, or a binary.
91    """
92
93    def __init__(self, period=0, acc_period=0):
94        self.period = period
95        self.acc_period = acc_period
96
97    def __iadd__(self, other):
98        self.period += other.period
99        self.acc_period += other.acc_period
100        return self
101
102
103class DsoPeriod(object):
104    """Period for each shared library"""
105
106    def __init__(self, dso_name):
107        self.dso_name = dso_name
108        self.period = Period()
109
110    def add_period(self, period):
111        self.period += period
112
113
114class FilePeriod(object):
115    """Period for each source file"""
116
117    def __init__(self, file_id):
118        self.file = file_id
119        self.period = Period()
120        # Period for each line in the file.
121        self.line_dict = {}
122        # Period for each function in the source file.
123        self.function_dict = {}
124
125    def add_period(self, period):
126        self.period += period
127
128    def add_line_period(self, line, period):
129        a = self.line_dict.get(line)
130        if a is None:
131            self.line_dict[line] = a = Period()
132        a += period
133
134    def add_function_period(self, function_name, function_start_line, period):
135        a = self.function_dict.get(function_name)
136        if not a:
137            if function_start_line is None:
138                function_start_line = -1
139            self.function_dict[function_name] = a = [function_start_line, Period()]
140        a[1] += period
141
142
143class SourceFileAnnotator(object):
144    """group code for annotating source files"""
145
146    def __init__(self, config):
147        # check config variables
148        config_names = ['perf_data_list', 'source_dirs', 'comm_filters',
149                        'pid_filters', 'tid_filters', 'dso_filters', 'ndk_path']
150        for name in config_names:
151            if name not in config:
152                log_exit('config [%s] is missing' % name)
153        symfs_dir = 'binary_cache'
154        if not os.path.isdir(symfs_dir):
155            symfs_dir = None
156        kallsyms = 'binary_cache/kallsyms'
157        if not os.path.isfile(kallsyms):
158            kallsyms = None
159
160        # init member variables
161        self.config = config
162        self.symfs_dir = symfs_dir
163        self.kallsyms = kallsyms
164        self.comm_filter = set(config['comm_filters']) if config.get('comm_filters') else None
165        if config.get('pid_filters'):
166            self.pid_filter = {int(x) for x in config['pid_filters']}
167        else:
168            self.pid_filter = None
169        if config.get('tid_filters'):
170            self.tid_filter = {int(x) for x in config['tid_filters']}
171        else:
172            self.tid_filter = None
173        self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None
174
175        config['annotate_dest_dir'] = 'annotated_files'
176        output_dir = config['annotate_dest_dir']
177        if os.path.isdir(output_dir):
178            shutil.rmtree(output_dir)
179        os.makedirs(output_dir)
180
181        self.addr2line = Addr2Line(self.config['ndk_path'], symfs_dir, config.get('source_dirs'))
182        self.period = 0
183        self.dso_periods = {}
184        self.file_periods = {}
185
186    def annotate(self):
187        self._collect_addrs()
188        self._convert_addrs_to_lines()
189        self._generate_periods()
190        self._write_summary()
191        self._annotate_files()
192
193    def _collect_addrs(self):
194        """Read perf.data, collect all addresses we need to convert to
195           source file:line.
196        """
197        for perf_data in self.config['perf_data_list']:
198            lib = ReportLib()
199            lib.SetRecordFile(perf_data)
200            if self.symfs_dir:
201                lib.SetSymfs(self.symfs_dir)
202            if self.kallsyms:
203                lib.SetKallsymsFile(self.kallsyms)
204            while True:
205                sample = lib.GetNextSample()
206                if sample is None:
207                    lib.Close()
208                    break
209                if not self._filter_sample(sample):
210                    continue
211                symbols = []
212                symbols.append(lib.GetSymbolOfCurrentSample())
213                callchain = lib.GetCallChainOfCurrentSample()
214                for i in range(callchain.nr):
215                    symbols.append(callchain.entries[i].symbol)
216                for symbol in symbols:
217                    if self._filter_symbol(symbol):
218                        build_id = lib.GetBuildIdForPath(symbol.dso_name)
219                        self.addr2line.add_addr(symbol.dso_name, build_id, symbol.symbol_addr,
220                                                symbol.vaddr_in_file)
221                        self.addr2line.add_addr(symbol.dso_name, build_id, symbol.symbol_addr,
222                                                symbol.symbol_addr)
223
224    def _filter_sample(self, sample):
225        """Return true if the sample can be used."""
226        if self.comm_filter:
227            if sample.thread_comm not in self.comm_filter:
228                return False
229        if self.pid_filter:
230            if sample.pid not in self.pid_filter:
231                return False
232        if self.tid_filter:
233            if sample.tid not in self.tid_filter:
234                return False
235        return True
236
237    def _filter_symbol(self, symbol):
238        if not self.dso_filter or symbol.dso_name in self.dso_filter:
239            return True
240        return False
241
242    def _convert_addrs_to_lines(self):
243        self.addr2line.convert_addrs_to_lines()
244
245    def _generate_periods(self):
246        """read perf.data, collect Period for all types:
247            binaries, source files, functions, lines.
248        """
249        for perf_data in self.config['perf_data_list']:
250            lib = ReportLib()
251            lib.SetRecordFile(perf_data)
252            if self.symfs_dir:
253                lib.SetSymfs(self.symfs_dir)
254            if self.kallsyms:
255                lib.SetKallsymsFile(self.kallsyms)
256            while True:
257                sample = lib.GetNextSample()
258                if sample is None:
259                    lib.Close()
260                    break
261                if not self._filter_sample(sample):
262                    continue
263                self._generate_periods_for_sample(lib, sample)
264
265    def _generate_periods_for_sample(self, lib, sample):
266        symbols = []
267        symbols.append(lib.GetSymbolOfCurrentSample())
268        callchain = lib.GetCallChainOfCurrentSample()
269        for i in range(callchain.nr):
270            symbols.append(callchain.entries[i].symbol)
271        # Each sample has a callchain, but its period is only used once
272        # to add period for each function/source_line/source_file/binary.
273        # For example, if more than one entry in the callchain hits a
274        # function, the event count of that function is only increased once.
275        # Otherwise, we may get periods > 100%.
276        is_sample_used = False
277        used_dso_dict = {}
278        used_file_dict = {}
279        used_function_dict = {}
280        used_line_dict = {}
281        period = Period(sample.period, sample.period)
282        for j, symbol in enumerate(symbols):
283            if j == 1:
284                period = Period(0, sample.period)
285            if not self._filter_symbol(symbol):
286                continue
287            is_sample_used = True
288            # Add period to dso.
289            self._add_dso_period(symbol.dso_name, period, used_dso_dict)
290            # Add period to source file.
291            sources = self.addr2line.get_sources(symbol.dso_name, symbol.vaddr_in_file)
292            for source in sources:
293                if source.file:
294                    self._add_file_period(source, period, used_file_dict)
295                    # Add period to line.
296                    if source.line:
297                        self._add_line_period(source, period, used_line_dict)
298            # Add period to function.
299            sources = self.addr2line.get_sources(symbol.dso_name, symbol.symbol_addr)
300            for source in sources:
301                if source.file:
302                    self._add_file_period(source, period, used_file_dict)
303                    if source.function:
304                        self._add_function_period(source, period, used_function_dict)
305
306        if is_sample_used:
307            self.period += sample.period
308
309    def _add_dso_period(self, dso_name, period, used_dso_dict):
310        if dso_name not in used_dso_dict:
311            used_dso_dict[dso_name] = True
312            dso_period = self.dso_periods.get(dso_name)
313            if dso_period is None:
314                dso_period = self.dso_periods[dso_name] = DsoPeriod(dso_name)
315            dso_period.add_period(period)
316
317    def _add_file_period(self, source, period, used_file_dict):
318        if source.file_key not in used_file_dict:
319            used_file_dict[source.file_key] = True
320            file_period = self.file_periods.get(source.file)
321            if file_period is None:
322                file_period = self.file_periods[source.file] = FilePeriod(source.file)
323            file_period.add_period(period)
324
325    def _add_line_period(self, source, period, used_line_dict):
326        if source.line_key not in used_line_dict:
327            used_line_dict[source.line_key] = True
328            file_period = self.file_periods[source.file]
329            file_period.add_line_period(source.line, period)
330
331    def _add_function_period(self, source, period, used_function_dict):
332        if source.function_key not in used_function_dict:
333            used_function_dict[source.function_key] = True
334            file_period = self.file_periods[source.file]
335            file_period.add_function_period(source.function, source.line, period)
336
337    def _write_summary(self):
338        summary = os.path.join(self.config['annotate_dest_dir'], 'summary')
339        with open(summary, 'w') as f:
340            f.write('total period: %d\n\n' % self.period)
341            dso_periods = sorted(self.dso_periods.values(),
342                                 key=lambda x: x.period.acc_period, reverse=True)
343            for dso_period in dso_periods:
344                f.write('dso %s: %s\n' % (dso_period.dso_name,
345                                          self._get_percentage_str(dso_period.period)))
346            f.write('\n')
347
348            file_periods = sorted(self.file_periods.values(),
349                                  key=lambda x: x.period.acc_period, reverse=True)
350            for file_period in file_periods:
351                f.write('file %s: %s\n' % (file_period.file,
352                                           self._get_percentage_str(file_period.period)))
353            for file_period in file_periods:
354                f.write('\n\n%s: %s\n' % (file_period.file,
355                                          self._get_percentage_str(file_period.period)))
356                values = []
357                for func_name in file_period.function_dict.keys():
358                    func_start_line, period = file_period.function_dict[func_name]
359                    values.append((func_name, func_start_line, period))
360                values = sorted(values, key=lambda x: x[2].acc_period, reverse=True)
361                for value in values:
362                    f.write('\tfunction (%s): line %d, %s\n' % (
363                        value[0], value[1], self._get_percentage_str(value[2])))
364                f.write('\n')
365                for line in sorted(file_period.line_dict.keys()):
366                    f.write('\tline %d: %s\n' % (
367                        line, self._get_percentage_str(file_period.line_dict[line])))
368
369    def _get_percentage_str(self, period, short=False):
370        s = 'acc_p: %f%%, p: %f%%' if short else 'accumulated_period: %f%%, period: %f%%'
371        return s % self._get_percentage(period)
372
373    def _get_percentage(self, period):
374        if self.period == 0:
375            return (0, 0)
376        acc_p = 100.0 * period.acc_period / self.period
377        p = 100.0 * period.period / self.period
378        return (acc_p, p)
379
380    def _annotate_files(self):
381        """Annotate Source files: add acc_period/period for each source file.
382           1. Annotate java source files, which have $JAVA_SRC_ROOT prefix.
383           2. Annotate c++ source files.
384        """
385        dest_dir = self.config['annotate_dest_dir']
386        for key in self.file_periods:
387            from_path = key
388            if not os.path.isfile(from_path):
389                log_warning("can't find source file for path %s" % from_path)
390                continue
391            if from_path.startswith('/'):
392                to_path = os.path.join(dest_dir, from_path[1:])
393            elif is_windows() and ':\\' in from_path:
394                to_path = os.path.join(dest_dir, from_path.replace(':\\', os.sep))
395            else:
396                to_path = os.path.join(dest_dir, from_path)
397            is_java = from_path.endswith('.java')
398            self._annotate_file(from_path, to_path, self.file_periods[key], is_java)
399
400    def _annotate_file(self, from_path, to_path, file_period, is_java):
401        """Annotate a source file.
402
403        Annotate a source file in three steps:
404          1. In the first line, show periods of this file.
405          2. For each function, show periods of this function.
406          3. For each line not hitting the same line as functions, show
407             line periods.
408        """
409        log_info('annotate file %s' % from_path)
410        with open(from_path, 'r') as rf:
411            lines = rf.readlines()
412
413        annotates = {}
414        for line in file_period.line_dict.keys():
415            annotates[line] = self._get_percentage_str(file_period.line_dict[line], True)
416        for func_name in file_period.function_dict.keys():
417            func_start_line, period = file_period.function_dict[func_name]
418            if func_start_line == -1:
419                continue
420            line = func_start_line - 1 if is_java else func_start_line
421            annotates[line] = '[func] ' + self._get_percentage_str(period, True)
422        annotates[1] = '[file] ' + self._get_percentage_str(file_period.period, True)
423
424        max_annotate_cols = 0
425        for key in annotates:
426            max_annotate_cols = max(max_annotate_cols, len(annotates[key]))
427
428        empty_annotate = ' ' * (max_annotate_cols + 6)
429
430        dirname = os.path.dirname(to_path)
431        if not os.path.isdir(dirname):
432            os.makedirs(dirname)
433        with open(to_path, 'w') as wf:
434            for line in range(1, len(lines) + 1):
435                annotate = annotates.get(line)
436                if annotate is None:
437                    if not lines[line-1].strip():
438                        annotate = ''
439                    else:
440                        annotate = empty_annotate
441                else:
442                    annotate = '/* ' + annotate + (
443                        ' ' * (max_annotate_cols - len(annotate))) + ' */'
444                wf.write(annotate)
445                wf.write(lines[line-1])
446
447
448def main():
449    parser = argparse.ArgumentParser(description="""
450        Annotate source files based on profiling data. It reads line information from binary_cache
451        generated by app_profiler.py or binary_cache_builder.py, and generate annotated source
452        files in annotated_files directory.""")
453    parser.add_argument('-i', '--perf_data_list', nargs='+', action='append', help="""
454        The paths of profiling data. Default is perf.data.""")
455    parser.add_argument('-s', '--source_dirs', type=extant_dir, nargs='+', action='append', help="""
456        Directories to find source files.""")
457    parser.add_argument('--comm', nargs='+', action='append', help="""
458        Use samples only in threads with selected names.""")
459    parser.add_argument('--pid', nargs='+', action='append', help="""
460        Use samples only in processes with selected process ids.""")
461    parser.add_argument('--tid', nargs='+', action='append', help="""
462        Use samples only in threads with selected thread ids.""")
463    parser.add_argument('--dso', nargs='+', action='append', help="""
464        Use samples only in selected binaries.""")
465    parser.add_argument('--ndk_path', type=extant_dir, help='Set the path of a ndk release.')
466
467    args = parser.parse_args()
468    config = {}
469    config['perf_data_list'] = flatten_arg_list(args.perf_data_list)
470    if not config['perf_data_list']:
471        config['perf_data_list'].append('perf.data')
472    config['source_dirs'] = flatten_arg_list(args.source_dirs)
473    config['comm_filters'] = flatten_arg_list(args.comm)
474    config['pid_filters'] = flatten_arg_list(args.pid)
475    config['tid_filters'] = flatten_arg_list(args.tid)
476    config['dso_filters'] = flatten_arg_list(args.dso)
477    config['ndk_path'] = args.ndk_path
478
479    annotator = SourceFileAnnotator(config)
480    annotator.annotate()
481    log_info('annotate finish successfully, please check result in annotated_files/.')
482
483
484if __name__ == '__main__':
485    main()
486