1#!/usr/bin/env python3
2#
3# Copyright (C) 2017 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""pprof_proto_generator.py: read perf.data, generate pprof.profile, which can be
19    used by pprof.
20
21  Example:
22    python app_profiler.py
23    python pprof_proto_generator.py
24    pprof -text pprof.profile
25"""
26
27import argparse
28import os
29import os.path
30
31from simpleperf_report_lib import ReportLib
32from simpleperf_utils import (Addr2Nearestline, BinaryFinder, extant_dir,
33                              flatten_arg_list, log_info, log_exit, ReadElf, ToolFinder)
34try:
35    import profile_pb2
36except ImportError:
37    log_exit('google.protobuf module is missing. Please install it first.')
38
39
40def load_pprof_profile(filename):
41    profile = profile_pb2.Profile()
42    with open(filename, "rb") as f:
43        profile.ParseFromString(f.read())
44    return profile
45
46
47def store_pprof_profile(filename, profile):
48    with open(filename, 'wb') as f:
49        f.write(profile.SerializeToString())
50
51
52class PprofProfilePrinter(object):
53
54    def __init__(self, profile):
55        self.profile = profile
56        self.string_table = profile.string_table
57
58    def show(self):
59        p = self.profile
60        sub_space = '  '
61        print('Profile {')
62        print('%d sample_types' % len(p.sample_type))
63        for i in range(len(p.sample_type)):
64            print('sample_type[%d] = ' % i, end='')
65            self.show_value_type(p.sample_type[i])
66        print('%d samples' % len(p.sample))
67        for i in range(len(p.sample)):
68            print('sample[%d]:' % i)
69            self.show_sample(p.sample[i], sub_space)
70        print('%d mappings' % len(p.mapping))
71        for i in range(len(p.mapping)):
72            print('mapping[%d]:' % i)
73            self.show_mapping(p.mapping[i], sub_space)
74        print('%d locations' % len(p.location))
75        for i in range(len(p.location)):
76            print('location[%d]:' % i)
77            self.show_location(p.location[i], sub_space)
78        for i in range(len(p.function)):
79            print('function[%d]:' % i)
80            self.show_function(p.function[i], sub_space)
81        print('%d strings' % len(p.string_table))
82        for i in range(len(p.string_table)):
83            print('string[%d]: %s' % (i, p.string_table[i]))
84        print('drop_frames: %s' % self.string(p.drop_frames))
85        print('keep_frames: %s' % self.string(p.keep_frames))
86        print('time_nanos: %u' % p.time_nanos)
87        print('duration_nanos: %u' % p.duration_nanos)
88        print('period_type: ', end='')
89        self.show_value_type(p.period_type)
90        print('period: %u' % p.period)
91        for i in range(len(p.comment)):
92            print('comment[%d] = %s' % (i, self.string(p.comment[i])))
93        print('default_sample_type: %d' % p.default_sample_type)
94        print('} // Profile')
95        print()
96
97    def show_value_type(self, value_type, space=''):
98        print('%sValueType(typeID=%d, unitID=%d, type=%s, unit=%s)' %
99              (space, value_type.type, value_type.unit,
100               self.string(value_type.type), self.string(value_type.unit)))
101
102    def show_sample(self, sample, space=''):
103        sub_space = space + '  '
104        for i in range(len(sample.location_id)):
105            print('%slocation_id[%d]: id %d' % (space, i, sample.location_id[i]))
106            self.show_location_id(sample.location_id[i], sub_space)
107        for i in range(len(sample.value)):
108            print('%svalue[%d] = %d' % (space, i, sample.value[i]))
109        for i in range(len(sample.label)):
110            print('%slabel[%d] = ', (space, i))
111
112    def show_location_id(self, location_id, space=''):
113        location = self.profile.location[location_id - 1]
114        self.show_location(location, space)
115
116    def show_location(self, location, space=''):
117        sub_space = space + '  '
118        print('%sid: %d' % (space, location.id))
119        print('%smapping_id: %d' % (space, location.mapping_id))
120        self.show_mapping_id(location.mapping_id, sub_space)
121        print('%saddress: %x' % (space, location.address))
122        for i in range(len(location.line)):
123            print('%sline[%d]:' % (space, i))
124            self.show_line(location.line[i], sub_space)
125
126    def show_mapping_id(self, mapping_id, space=''):
127        mapping = self.profile.mapping[mapping_id - 1]
128        self.show_mapping(mapping, space)
129
130    def show_mapping(self, mapping, space=''):
131        print('%sid: %d' % (space, mapping.id))
132        print('%smemory_start: %x' % (space, mapping.memory_start))
133        print('%smemory_limit: %x' % (space, mapping.memory_limit))
134        print('%sfile_offset: %x' % (space, mapping.file_offset))
135        print('%sfilename: %s(%d)' % (space, self.string(mapping.filename),
136                                      mapping.filename))
137        print('%sbuild_id: %s(%d)' % (space, self.string(mapping.build_id),
138                                      mapping.build_id))
139        print('%shas_functions: %s' % (space, mapping.has_functions))
140        print('%shas_filenames: %s' % (space, mapping.has_filenames))
141        print('%shas_line_numbers: %s' % (space, mapping.has_line_numbers))
142        print('%shas_inline_frames: %s' % (space, mapping.has_inline_frames))
143
144    def show_line(self, line, space=''):
145        sub_space = space + '  '
146        print('%sfunction_id: %d' % (space, line.function_id))
147        self.show_function_id(line.function_id, sub_space)
148        print('%sline: %d' % (space, line.line))
149
150    def show_function_id(self, function_id, space=''):
151        function = self.profile.function[function_id - 1]
152        self.show_function(function, space)
153
154    def show_function(self, function, space=''):
155        print('%sid: %d' % (space, function.id))
156        print('%sname: %s' % (space, self.string(function.name)))
157        print('%ssystem_name: %s' % (space, self.string(function.system_name)))
158        print('%sfilename: %s' % (space, self.string(function.filename)))
159        print('%sstart_line: %d' % (space, function.start_line))
160
161    def string(self, string_id):
162        return self.string_table[string_id]
163
164
165class Sample(object):
166
167    def __init__(self):
168        self.location_ids = []
169        self.values = {}
170
171    def add_location_id(self, location_id):
172        self.location_ids.append(location_id)
173
174    def add_value(self, sample_type_id, value):
175        self.values[sample_type_id] = self.values.get(sample_type_id, 0) + value
176
177    def add_values(self, values):
178        for sample_type_id, value in values.items():
179            self.add_value(sample_type_id, value)
180
181    @property
182    def key(self):
183        return tuple(self.location_ids)
184
185
186class Location(object):
187
188    def __init__(self, mapping_id, address, vaddr_in_dso):
189        self.id = -1  # unset
190        self.mapping_id = mapping_id
191        self.address = address
192        self.vaddr_in_dso = vaddr_in_dso
193        self.lines = []
194
195    @property
196    def key(self):
197        return (self.mapping_id, self.address)
198
199
200class Line(object):
201
202    def __init__(self):
203        self.function_id = 0
204        self.line = 0
205
206
207class Mapping(object):
208
209    def __init__(self, start, end, pgoff, filename_id, build_id_id):
210        self.id = -1  # unset
211        self.memory_start = start
212        self.memory_limit = end
213        self.file_offset = pgoff
214        self.filename_id = filename_id
215        self.build_id_id = build_id_id
216
217    @property
218    def key(self):
219        return (
220            self.memory_start,
221            self.memory_limit,
222            self.file_offset,
223            self.filename_id,
224            self.build_id_id)
225
226
227class Function(object):
228
229    def __init__(self, name_id, dso_name_id, vaddr_in_dso):
230        self.id = -1  # unset
231        self.name_id = name_id
232        self.dso_name_id = dso_name_id
233        self.vaddr_in_dso = vaddr_in_dso
234        self.source_filename_id = 0
235        self.start_line = 0
236
237    @property
238    def key(self):
239        return (self.name_id, self.dso_name_id)
240
241
242# pylint: disable=no-member
243class PprofProfileGenerator(object):
244
245    def __init__(self, config):
246        self.config = config
247        self.lib = None
248
249        config['binary_cache_dir'] = 'binary_cache'
250        if not os.path.isdir(config['binary_cache_dir']):
251            config['binary_cache_dir'] = None
252        self.comm_filter = set(config['comm_filters']) if config.get('comm_filters') else None
253        if config.get('pid_filters'):
254            self.pid_filter = {int(x) for x in config['pid_filters']}
255        else:
256            self.pid_filter = None
257        if config.get('tid_filters'):
258            self.tid_filter = {int(x) for x in config['tid_filters']}
259        else:
260            self.tid_filter = None
261        self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None
262        self.max_chain_length = config['max_chain_length']
263        self.profile = profile_pb2.Profile()
264        self.profile.string_table.append('')
265        self.string_table = {}
266        self.sample_types = {}
267        self.sample_map = {}
268        self.sample_list = []
269        self.location_map = {}
270        self.location_list = []
271        self.mapping_map = {}
272        self.mapping_list = []
273        self.function_map = {}
274        self.function_list = []
275
276        # Map from dso_name in perf.data to (binary path, build_id).
277        self.binary_map = {}
278        self.read_elf = ReadElf(self.config['ndk_path'])
279        self.binary_finder = BinaryFinder(config['binary_cache_dir'], self.read_elf)
280
281    def load_record_file(self, record_file):
282        self.lib = ReportLib()
283        self.lib.SetRecordFile(record_file)
284
285        if self.config['binary_cache_dir']:
286            self.lib.SetSymfs(self.config['binary_cache_dir'])
287            kallsyms = os.path.join(self.config['binary_cache_dir'], 'kallsyms')
288            if os.path.isfile(kallsyms):
289                self.lib.SetKallsymsFile(kallsyms)
290
291        if self.config.get('show_art_frames'):
292            self.lib.ShowArtFrames()
293        for file_path in self.config['proguard_mapping_file'] or []:
294            self.lib.AddProguardMappingFile(file_path)
295
296        # Process all samples in perf.data, aggregate samples.
297        while True:
298            report_sample = self.lib.GetNextSample()
299            if report_sample is None:
300                self.lib.Close()
301                self.lib = None
302                break
303            event = self.lib.GetEventOfCurrentSample()
304            symbol = self.lib.GetSymbolOfCurrentSample()
305            callchain = self.lib.GetCallChainOfCurrentSample()
306
307            if not self._filter_report_sample(report_sample):
308                continue
309
310            sample_type_id = self.get_sample_type_id(event.name)
311            sample = Sample()
312            sample.add_value(sample_type_id, 1)
313            sample.add_value(sample_type_id + 1, report_sample.period)
314            if self._filter_symbol(symbol):
315                location_id = self.get_location_id(report_sample.ip, symbol)
316                sample.add_location_id(location_id)
317            for i in range(max(0, callchain.nr - self.max_chain_length), callchain.nr):
318                entry = callchain.entries[i]
319                if self._filter_symbol(symbol):
320                    location_id = self.get_location_id(entry.ip, entry.symbol)
321                    sample.add_location_id(location_id)
322            if sample.location_ids:
323                self.add_sample(sample)
324
325    def gen(self):
326        # 1. Generate line info for locations and functions.
327        self.gen_source_lines()
328
329        # 2. Produce samples/locations/functions in profile.
330        for sample in self.sample_list:
331            self.gen_profile_sample(sample)
332        for mapping in self.mapping_list:
333            self.gen_profile_mapping(mapping)
334        for location in self.location_list:
335            self.gen_profile_location(location)
336        for function in self.function_list:
337            self.gen_profile_function(function)
338
339        return self.profile
340
341    def _filter_report_sample(self, sample):
342        """Return true if the sample can be used."""
343        if self.comm_filter:
344            if sample.thread_comm not in self.comm_filter:
345                return False
346        if self.pid_filter:
347            if sample.pid not in self.pid_filter:
348                return False
349        if self.tid_filter:
350            if sample.tid not in self.tid_filter:
351                return False
352        return True
353
354    def _filter_symbol(self, symbol):
355        if not self.dso_filter or symbol.dso_name in self.dso_filter:
356            return True
357        return False
358
359    def get_string_id(self, str_value):
360        if not str_value:
361            return 0
362        str_id = self.string_table.get(str_value)
363        if str_id is not None:
364            return str_id
365        str_id = len(self.string_table) + 1
366        self.string_table[str_value] = str_id
367        self.profile.string_table.append(str_value)
368        return str_id
369
370    def get_string(self, str_id):
371        return self.profile.string_table[str_id]
372
373    def get_sample_type_id(self, name):
374        sample_type_id = self.sample_types.get(name)
375        if sample_type_id is not None:
376            return sample_type_id
377        sample_type_id = len(self.profile.sample_type)
378        sample_type = self.profile.sample_type.add()
379        sample_type.type = self.get_string_id('event_' + name + '_samples')
380        sample_type.unit = self.get_string_id('count')
381        sample_type = self.profile.sample_type.add()
382        sample_type.type = self.get_string_id('event_' + name + '_count')
383        sample_type.unit = self.get_string_id('count')
384        self.sample_types[name] = sample_type_id
385        return sample_type_id
386
387    def get_location_id(self, ip, symbol):
388        binary_path, build_id = self.get_binary(symbol.dso_name)
389        mapping_id = self.get_mapping_id(symbol.mapping[0], binary_path, build_id)
390        location = Location(mapping_id, ip, symbol.vaddr_in_file)
391        function_id = self.get_function_id(symbol.symbol_name, binary_path, symbol.symbol_addr)
392        if function_id:
393            # Add Line only when it has a valid function id, see http://b/36988814.
394            # Default line info only contains the function name
395            line = Line()
396            line.function_id = function_id
397            location.lines.append(line)
398
399        exist_location = self.location_map.get(location.key)
400        if exist_location:
401            return exist_location.id
402        # location_id starts from 1
403        location.id = len(self.location_list) + 1
404        self.location_list.append(location)
405        self.location_map[location.key] = location
406        return location.id
407
408    def get_mapping_id(self, report_mapping, filename, build_id):
409        filename_id = self.get_string_id(filename)
410        build_id_id = self.get_string_id(build_id)
411        mapping = Mapping(report_mapping.start, report_mapping.end,
412                          report_mapping.pgoff, filename_id, build_id_id)
413        exist_mapping = self.mapping_map.get(mapping.key)
414        if exist_mapping:
415            return exist_mapping.id
416        # mapping_id starts from 1
417        mapping.id = len(self.mapping_list) + 1
418        self.mapping_list.append(mapping)
419        self.mapping_map[mapping.key] = mapping
420        return mapping.id
421
422    def get_binary(self, dso_name):
423        """ Return (binary_path, build_id) for a given dso_name. """
424        value = self.binary_map.get(dso_name)
425        if value:
426            return value
427
428        binary_path = dso_name
429        build_id = ''
430
431        # The build ids in perf.data are padded to 20 bytes, but pprof needs without padding.
432        # So read build id from the binary in binary_cache, and check it with build id in
433        # perf.data.
434        build_id_in_perf_data = self.lib.GetBuildIdForPath(dso_name)
435        # Try elf_path in binary cache.
436        elf_path = self.binary_finder.find_binary(dso_name, build_id_in_perf_data)
437        if elf_path:
438            build_id = build_id_in_perf_data
439            binary_path = str(elf_path)
440
441        # When there is no matching elf_path, try converting build_id in perf.data.
442        if not build_id and build_id_in_perf_data.startswith('0x'):
443            # Fallback to the way used by TrimZeroesFromBuildIDString() in quipper.
444            build_id = build_id_in_perf_data[2:]  # remove '0x'
445            padding = '0' * 8
446            while build_id.endswith(padding):
447                build_id = build_id[:-len(padding)]
448
449        self.binary_map[dso_name] = (binary_path, build_id)
450        return (binary_path, build_id)
451
452    def get_mapping(self, mapping_id):
453        return self.mapping_list[mapping_id - 1] if mapping_id > 0 else None
454
455    def get_function_id(self, name, dso_name, vaddr_in_file):
456        if name == 'unknown':
457            return 0
458        function = Function(self.get_string_id(name), self.get_string_id(dso_name), vaddr_in_file)
459        exist_function = self.function_map.get(function.key)
460        if exist_function:
461            return exist_function.id
462        # function_id starts from 1
463        function.id = len(self.function_list) + 1
464        self.function_list.append(function)
465        self.function_map[function.key] = function
466        return function.id
467
468    def get_function(self, function_id):
469        return self.function_list[function_id - 1] if function_id > 0 else None
470
471    def add_sample(self, sample):
472        exist_sample = self.sample_map.get(sample.key)
473        if exist_sample:
474            exist_sample.add_values(sample.values)
475        else:
476            self.sample_list.append(sample)
477            self.sample_map[sample.key] = sample
478
479    def gen_source_lines(self):
480        # 1. Create Addr2line instance
481        if not self.config.get('binary_cache_dir'):
482            log_info("Can't generate line information because binary_cache is missing.")
483            return
484        if not ToolFinder.find_tool_path('llvm-symbolizer', self.config['ndk_path']):
485            log_info("Can't generate line information because can't find llvm-symbolizer.")
486            return
487        # We have changed dso names to paths in binary_cache in self.get_binary(). So no need to
488        # pass binary_cache_dir to BinaryFinder.
489        binary_finder = BinaryFinder(None, self.read_elf)
490        addr2line = Addr2Nearestline(self.config['ndk_path'], binary_finder, True)
491
492        # 2. Put all needed addresses to it.
493        for location in self.location_list:
494            mapping = self.get_mapping(location.mapping_id)
495            dso_name = self.get_string(mapping.filename_id)
496            if location.lines:
497                function = self.get_function(location.lines[0].function_id)
498                addr2line.add_addr(dso_name, None, function.vaddr_in_dso, location.vaddr_in_dso)
499        for function in self.function_list:
500            dso_name = self.get_string(function.dso_name_id)
501            addr2line.add_addr(dso_name, None, function.vaddr_in_dso, function.vaddr_in_dso)
502
503        # 3. Generate source lines.
504        addr2line.convert_addrs_to_lines()
505
506        # 4. Annotate locations and functions.
507        for location in self.location_list:
508            if not location.lines:
509                continue
510            mapping = self.get_mapping(location.mapping_id)
511            dso_name = self.get_string(mapping.filename_id)
512            dso = addr2line.get_dso(dso_name)
513            if not dso:
514                continue
515            sources = addr2line.get_addr_source(dso, location.vaddr_in_dso)
516            if not sources:
517                continue
518            for (source_id, source) in enumerate(sources):
519                source_file, source_line, function_name = source
520                function_id = self.get_function_id(function_name, dso_name, 0)
521                if function_id == 0:
522                    continue
523                if source_id == 0:
524                    # Clear default line info
525                    location.lines = []
526                location.lines.append(self.add_line(source_file, source_line, function_id))
527
528        for function in self.function_list:
529            dso_name = self.get_string(function.dso_name_id)
530            if function.vaddr_in_dso:
531                dso = addr2line.get_dso(dso_name)
532                if not dso:
533                    continue
534                sources = addr2line.get_addr_source(dso, function.vaddr_in_dso)
535                if sources:
536                    source_file, source_line, _ = sources[0]
537                    function.source_filename_id = self.get_string_id(source_file)
538                    function.start_line = source_line
539
540    def add_line(self, source_file, source_line, function_id):
541        line = Line()
542        function = self.get_function(function_id)
543        function.source_filename_id = self.get_string_id(source_file)
544        line.function_id = function_id
545        line.line = source_line
546        return line
547
548    def gen_profile_sample(self, sample):
549        profile_sample = self.profile.sample.add()
550        profile_sample.location_id.extend(sample.location_ids)
551        sample_type_count = len(self.sample_types) * 2
552        values = [0] * sample_type_count
553        for sample_type_id in sample.values:
554            values[sample_type_id] = sample.values[sample_type_id]
555        profile_sample.value.extend(values)
556
557    def gen_profile_mapping(self, mapping):
558        profile_mapping = self.profile.mapping.add()
559        profile_mapping.id = mapping.id
560        profile_mapping.memory_start = mapping.memory_start
561        profile_mapping.memory_limit = mapping.memory_limit
562        profile_mapping.file_offset = mapping.file_offset
563        profile_mapping.filename = mapping.filename_id
564        profile_mapping.build_id = mapping.build_id_id
565        profile_mapping.has_filenames = True
566        profile_mapping.has_functions = True
567        if self.config.get('binary_cache_dir'):
568            profile_mapping.has_line_numbers = True
569            profile_mapping.has_inline_frames = True
570        else:
571            profile_mapping.has_line_numbers = False
572            profile_mapping.has_inline_frames = False
573
574    def gen_profile_location(self, location):
575        profile_location = self.profile.location.add()
576        profile_location.id = location.id
577        profile_location.mapping_id = location.mapping_id
578        profile_location.address = location.address
579        for i in range(len(location.lines)):
580            line = profile_location.line.add()
581            line.function_id = location.lines[i].function_id
582            line.line = location.lines[i].line
583
584    def gen_profile_function(self, function):
585        profile_function = self.profile.function.add()
586        profile_function.id = function.id
587        profile_function.name = function.name_id
588        profile_function.system_name = function.name_id
589        profile_function.filename = function.source_filename_id
590        profile_function.start_line = function.start_line
591
592
593def main():
594    parser = argparse.ArgumentParser(description='Generate pprof profile data in pprof.profile.')
595    parser.add_argument('--show', nargs='?', action='append', help='print existing pprof.profile.')
596    parser.add_argument('-i', '--record_file', nargs='+', default=['perf.data'], help="""
597        Set profiling data file to report. Default is perf.data""")
598    parser.add_argument('-o', '--output_file', default='pprof.profile', help="""
599        The path of generated pprof profile data.""")
600    parser.add_argument('--comm', nargs='+', action='append', help="""
601        Use samples only in threads with selected names.""")
602    parser.add_argument('--pid', nargs='+', action='append', help="""
603        Use samples only in processes with selected process ids.""")
604    parser.add_argument('--tid', nargs='+', action='append', help="""
605        Use samples only in threads with selected thread ids.""")
606    parser.add_argument('--dso', nargs='+', action='append', help="""
607        Use samples only in selected binaries.""")
608    parser.add_argument('--max_chain_length', type=int, default=1000000000, help="""
609        Maximum depth of samples to be converted.""")  # Large value as infinity standin.
610    parser.add_argument('--ndk_path', type=extant_dir, help='Set the path of a ndk release.')
611    parser.add_argument('--show_art_frames', action='store_true',
612                        help='Show frames of internal methods in the ART Java interpreter.')
613    parser.add_argument(
614        '--proguard-mapping-file', nargs='+',
615        help='Add proguard mapping file to de-obfuscate symbols')
616
617    args = parser.parse_args()
618    if args.show:
619        show_file = args.show[0] if args.show[0] else 'pprof.profile'
620        profile = load_pprof_profile(show_file)
621        printer = PprofProfilePrinter(profile)
622        printer.show()
623        return
624
625    config = {}
626    config['output_file'] = args.output_file
627    config['comm_filters'] = flatten_arg_list(args.comm)
628    config['pid_filters'] = flatten_arg_list(args.pid)
629    config['tid_filters'] = flatten_arg_list(args.tid)
630    config['dso_filters'] = flatten_arg_list(args.dso)
631    config['ndk_path'] = args.ndk_path
632    config['show_art_frames'] = args.show_art_frames
633    config['max_chain_length'] = args.max_chain_length
634    config['proguard_mapping_file'] = args.proguard_mapping_file
635    generator = PprofProfileGenerator(config)
636    for record_file in args.record_file:
637        generator.load_record_file(record_file)
638    profile = generator.gen()
639    store_pprof_profile(config['output_file'], profile)
640
641
642if __name__ == '__main__':
643    main()
644