1#!/usr/bin/env python
2#
3# Copyright (C) 2016 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""simpleperf_report_lib.py: a python wrapper of libsimpleperf_report.so.
19   Used to access samples in perf.data.
20
21"""
22
23import collections
24import ctypes as ct
25import struct
26from utils import bytes_to_str, get_host_binary_path, is_windows, str_to_bytes
27
28
29def _get_native_lib():
30    return get_host_binary_path('libsimpleperf_report.so')
31
32
33def _is_null(p):
34    if p:
35        return False
36    return ct.cast(p, ct.c_void_p).value is None
37
38
39def _char_pt(s):
40    return str_to_bytes(s)
41
42
43def _char_pt_to_str(char_pt):
44    return bytes_to_str(char_pt)
45
46def _check(cond, failmsg):
47    if not cond:
48        raise RuntimeError(failmsg)
49
50
51class SampleStruct(ct.Structure):
52    """ Instance of a sample in perf.data.
53        ip: the program counter of the thread generating the sample.
54        pid: process id (or thread group id) of the thread generating the sample.
55        tid: thread id.
56        thread_comm: thread name.
57        time: time at which the sample was generated. The value is in nanoseconds.
58              The clock is decided by the --clockid option in `simpleperf record`.
59        in_kernel: whether the instruction is in kernel space or user space.
60        cpu: the cpu generating the sample.
61        period: count of events have happened since last sample. For example, if we use
62             -e cpu-cycles, it means how many cpu-cycles have happened.
63             If we use -e cpu-clock, it means how many nanoseconds have passed.
64    """
65    _fields_ = [('ip', ct.c_uint64),
66                ('pid', ct.c_uint32),
67                ('tid', ct.c_uint32),
68                ('_thread_comm', ct.c_char_p),
69                ('time', ct.c_uint64),
70                ('in_kernel', ct.c_uint32),
71                ('cpu', ct.c_uint32),
72                ('period', ct.c_uint64)]
73
74    @property
75    def thread_comm(self):
76        return _char_pt_to_str(self._thread_comm)
77
78
79class TracingFieldFormatStruct(ct.Structure):
80    """Format of a tracing field.
81       name: name of the field.
82       offset: offset of the field in tracing data.
83       elem_size: size of the element type.
84       elem_count: the number of elements in this field, more than one if the field is an array.
85       is_signed: whether the element type is signed or unsigned.
86    """
87    _fields_ = [('_name', ct.c_char_p),
88                ('offset', ct.c_uint32),
89                ('elem_size', ct.c_uint32),
90                ('elem_count', ct.c_uint32),
91                ('is_signed', ct.c_uint32)]
92
93    _unpack_key_dict = {1: 'b', 2: 'h', 4: 'i', 8: 'q'}
94
95    @property
96    def name(self):
97        return _char_pt_to_str(self._name)
98
99    def parse_value(self, data):
100        """ Parse value of a field in a tracepoint event.
101            The return value depends on the type of the field, and can be an int value, a string,
102            an array of int values, etc. If the type can't be parsed, return a byte array or an
103            array of byte arrays.
104        """
105        if self.elem_count > 1 and self.elem_size == 1 and self.is_signed == 0:
106            # The field is a string.
107            length = 0
108            while length < self.elem_count and bytes_to_str(data[self.offset + length]) != '\x00':
109                length += 1
110            return bytes_to_str(data[self.offset : self.offset + length])
111        unpack_key = self._unpack_key_dict.get(self.elem_size)
112        if unpack_key:
113            if not self.is_signed:
114                unpack_key = unpack_key.upper()
115            value = struct.unpack('%d%s' % (self.elem_count, unpack_key),
116                                  data[self.offset:self.offset + self.elem_count * self.elem_size])
117        else:
118            # Since we don't know the element type, just return the bytes.
119            value = []
120            offset = self.offset
121            for _ in range(self.elem_count):
122                value.append(data[offset : offset + self.elem_size])
123                offset += self.elem_size
124        if self.elem_count == 1:
125            value = value[0]
126        return value
127
128
129class TracingDataFormatStruct(ct.Structure):
130    """Format of tracing data of a tracepoint event, like
131       https://www.kernel.org/doc/html/latest/trace/events.html#event-formats.
132       size: total size of all fields in the tracing data.
133       field_count: the number of fields.
134       fields: an array of fields.
135    """
136    _fields_ = [('size', ct.c_uint32),
137                ('field_count', ct.c_uint32),
138                ('fields', ct.POINTER(TracingFieldFormatStruct))]
139
140
141class EventStruct(ct.Structure):
142    """Event type of a sample.
143       name: name of the event type.
144       tracing_data_format: only available when it is a tracepoint event.
145    """
146    _fields_ = [('_name', ct.c_char_p),
147                ('tracing_data_format', TracingDataFormatStruct)]
148
149    @property
150    def name(self):
151        return _char_pt_to_str(self._name)
152
153
154class MappingStruct(ct.Structure):
155    """ A mapping area in the monitored threads, like the content in /proc/<pid>/maps.
156        start: start addr in memory.
157        end: end addr in memory.
158        pgoff: offset in the mapped shared library.
159    """
160    _fields_ = [('start', ct.c_uint64),
161                ('end', ct.c_uint64),
162                ('pgoff', ct.c_uint64)]
163
164
165class SymbolStruct(ct.Structure):
166    """ Symbol info of the instruction hit by a sample or a callchain entry of a sample.
167        dso_name: path of the shared library containing the instruction.
168        vaddr_in_file: virtual address of the instruction in the shared library.
169        symbol_name: name of the function containing the instruction.
170        symbol_addr: start addr of the function containing the instruction.
171        symbol_len: length of the function in the shared library.
172        mapping: the mapping area hit by the instruction.
173    """
174    _fields_ = [('_dso_name', ct.c_char_p),
175                ('vaddr_in_file', ct.c_uint64),
176                ('_symbol_name', ct.c_char_p),
177                ('symbol_addr', ct.c_uint64),
178                ('symbol_len', ct.c_uint64),
179                ('mapping', ct.POINTER(MappingStruct))]
180
181    @property
182    def dso_name(self):
183        return _char_pt_to_str(self._dso_name)
184
185    @property
186    def symbol_name(self):
187        return _char_pt_to_str(self._symbol_name)
188
189
190class CallChainEntryStructure(ct.Structure):
191    """ A callchain entry of a sample.
192        ip: the address of the instruction of the callchain entry.
193        symbol: symbol info of the callchain entry.
194    """
195    _fields_ = [('ip', ct.c_uint64),
196                ('symbol', SymbolStruct)]
197
198
199class CallChainStructure(ct.Structure):
200    """ Callchain info of a sample.
201        nr: number of entries in the callchain.
202        entries: a pointer to an array of CallChainEntryStructure.
203
204        For example, if a sample is generated when a thread is running function C
205        with callchain function A -> function B -> function C.
206        Then nr = 2, and entries = [function B, function A].
207    """
208    _fields_ = [('nr', ct.c_uint32),
209                ('entries', ct.POINTER(CallChainEntryStructure))]
210
211
212class FeatureSectionStructure(ct.Structure):
213    """ A feature section in perf.data to store information like record cmd, device arch, etc.
214        data: a pointer to a buffer storing the section data.
215        data_size: data size in bytes.
216    """
217    _fields_ = [('data', ct.POINTER(ct.c_char)),
218                ('data_size', ct.c_uint32)]
219
220
221class ReportLibStructure(ct.Structure):
222    _fields_ = []
223
224
225# pylint: disable=invalid-name
226class ReportLib(object):
227
228    def __init__(self, native_lib_path=None):
229        if native_lib_path is None:
230            native_lib_path = _get_native_lib()
231
232        self._load_dependent_lib()
233        self._lib = ct.CDLL(native_lib_path)
234        self._CreateReportLibFunc = self._lib.CreateReportLib
235        self._CreateReportLibFunc.restype = ct.POINTER(ReportLibStructure)
236        self._DestroyReportLibFunc = self._lib.DestroyReportLib
237        self._SetLogSeverityFunc = self._lib.SetLogSeverity
238        self._SetSymfsFunc = self._lib.SetSymfs
239        self._SetRecordFileFunc = self._lib.SetRecordFile
240        self._SetKallsymsFileFunc = self._lib.SetKallsymsFile
241        self._ShowIpForUnknownSymbolFunc = self._lib.ShowIpForUnknownSymbol
242        self._ShowArtFramesFunc = self._lib.ShowArtFrames
243        self._MergeJavaMethodsFunc = self._lib.MergeJavaMethods
244        self._GetNextSampleFunc = self._lib.GetNextSample
245        self._GetNextSampleFunc.restype = ct.POINTER(SampleStruct)
246        self._GetEventOfCurrentSampleFunc = self._lib.GetEventOfCurrentSample
247        self._GetEventOfCurrentSampleFunc.restype = ct.POINTER(EventStruct)
248        self._GetSymbolOfCurrentSampleFunc = self._lib.GetSymbolOfCurrentSample
249        self._GetSymbolOfCurrentSampleFunc.restype = ct.POINTER(SymbolStruct)
250        self._GetCallChainOfCurrentSampleFunc = self._lib.GetCallChainOfCurrentSample
251        self._GetCallChainOfCurrentSampleFunc.restype = ct.POINTER(CallChainStructure)
252        self._GetTracingDataOfCurrentSampleFunc = self._lib.GetTracingDataOfCurrentSample
253        self._GetTracingDataOfCurrentSampleFunc.restype = ct.POINTER(ct.c_char)
254        self._GetBuildIdForPathFunc = self._lib.GetBuildIdForPath
255        self._GetBuildIdForPathFunc.restype = ct.c_char_p
256        self._GetFeatureSection = self._lib.GetFeatureSection
257        self._GetFeatureSection.restype = ct.POINTER(FeatureSectionStructure)
258        self._instance = self._CreateReportLibFunc()
259        assert not _is_null(self._instance)
260
261        self.meta_info = None
262        self.current_sample = None
263        self.record_cmd = None
264
265    def _load_dependent_lib(self):
266        # As the windows dll is built with mingw we need to load 'libwinpthread-1.dll'.
267        if is_windows():
268            self._libwinpthread = ct.CDLL(get_host_binary_path('libwinpthread-1.dll'))
269
270    def Close(self):
271        if self._instance is None:
272            return
273        self._DestroyReportLibFunc(self._instance)
274        self._instance = None
275
276    def SetLogSeverity(self, log_level='info'):
277        """ Set log severity of native lib, can be verbose,debug,info,error,fatal."""
278        cond = self._SetLogSeverityFunc(self.getInstance(), _char_pt(log_level))
279        _check(cond, 'Failed to set log level')
280
281    def SetSymfs(self, symfs_dir):
282        """ Set directory used to find symbols."""
283        cond = self._SetSymfsFunc(self.getInstance(), _char_pt(symfs_dir))
284        _check(cond, 'Failed to set symbols directory')
285
286    def SetRecordFile(self, record_file):
287        """ Set the path of record file, like perf.data."""
288        cond = self._SetRecordFileFunc(self.getInstance(), _char_pt(record_file))
289        _check(cond, 'Failed to set record file')
290
291    def ShowIpForUnknownSymbol(self):
292        self._ShowIpForUnknownSymbolFunc(self.getInstance())
293
294    def ShowArtFrames(self, show=True):
295        """ Show frames of internal methods of the Java interpreter. """
296        self._ShowArtFramesFunc(self.getInstance(), show)
297
298    def MergeJavaMethods(self, merge=True):
299        """ This option merges jitted java methods with the same name but in different jit
300            symfiles. If possible, it also merges jitted methods with interpreted methods,
301            by mapping jitted methods to their corresponding dex files.
302            Side effects:
303              It only works at method level, not instruction level.
304              It makes symbol.vaddr_in_file and symbol.mapping not accurate for jitted methods.
305            Java methods are merged by default.
306        """
307        self._MergeJavaMethodsFunc(self.getInstance(), merge)
308
309    def SetKallsymsFile(self, kallsym_file):
310        """ Set the file path to a copy of the /proc/kallsyms file (for off device decoding) """
311        cond = self._SetKallsymsFileFunc(self.getInstance(), _char_pt(kallsym_file))
312        _check(cond, 'Failed to set kallsyms file')
313
314    def GetNextSample(self):
315        psample = self._GetNextSampleFunc(self.getInstance())
316        if _is_null(psample):
317            self.current_sample = None
318        else:
319            self.current_sample = psample[0]
320        return self.current_sample
321
322    def GetCurrentSample(self):
323        return self.current_sample
324
325    def GetEventOfCurrentSample(self):
326        event = self._GetEventOfCurrentSampleFunc(self.getInstance())
327        assert not _is_null(event)
328        return event[0]
329
330    def GetSymbolOfCurrentSample(self):
331        symbol = self._GetSymbolOfCurrentSampleFunc(self.getInstance())
332        assert not _is_null(symbol)
333        return symbol[0]
334
335    def GetCallChainOfCurrentSample(self):
336        callchain = self._GetCallChainOfCurrentSampleFunc(self.getInstance())
337        assert not _is_null(callchain)
338        return callchain[0]
339
340    def GetTracingDataOfCurrentSample(self):
341        data = self._GetTracingDataOfCurrentSampleFunc(self.getInstance())
342        if _is_null(data):
343            return None
344        event = self.GetEventOfCurrentSample()
345        result = collections.OrderedDict()
346        for i in range(event.tracing_data_format.field_count):
347            field = event.tracing_data_format.fields[i]
348            result[field.name] = field.parse_value(data)
349        return result
350
351    def GetBuildIdForPath(self, path):
352        build_id = self._GetBuildIdForPathFunc(self.getInstance(), _char_pt(path))
353        assert not _is_null(build_id)
354        return _char_pt_to_str(build_id)
355
356    def GetRecordCmd(self):
357        if self.record_cmd is not None:
358            return self.record_cmd
359        self.record_cmd = ''
360        feature_data = self._GetFeatureSection(self.getInstance(), _char_pt('cmdline'))
361        if not _is_null(feature_data):
362            void_p = ct.cast(feature_data[0].data, ct.c_void_p)
363            arg_count = ct.cast(void_p, ct.POINTER(ct.c_uint32)).contents.value
364            void_p.value += 4
365            args = []
366            for _ in range(arg_count):
367                str_len = ct.cast(void_p, ct.POINTER(ct.c_uint32)).contents.value
368                void_p.value += 4
369                char_p = ct.cast(void_p, ct.POINTER(ct.c_char))
370                current_str = ''
371                for j in range(str_len):
372                    c = bytes_to_str(char_p[j])
373                    if c != '\0':
374                        current_str += c
375                if ' ' in current_str:
376                    current_str = '"' + current_str + '"'
377                args.append(current_str)
378                void_p.value += str_len
379            self.record_cmd = ' '.join(args)
380        return self.record_cmd
381
382    def _GetFeatureString(self, feature_name):
383        feature_data = self._GetFeatureSection(self.getInstance(), _char_pt(feature_name))
384        result = ''
385        if not _is_null(feature_data):
386            void_p = ct.cast(feature_data[0].data, ct.c_void_p)
387            str_len = ct.cast(void_p, ct.POINTER(ct.c_uint32)).contents.value
388            void_p.value += 4
389            char_p = ct.cast(void_p, ct.POINTER(ct.c_char))
390            for i in range(str_len):
391                c = bytes_to_str(char_p[i])
392                if c == '\0':
393                    break
394                result += c
395        return result
396
397    def GetArch(self):
398        return self._GetFeatureString('arch')
399
400    def MetaInfo(self):
401        """ Return a string to string map stored in meta_info section in perf.data.
402            It is used to pass some short meta information.
403        """
404        if self.meta_info is None:
405            self.meta_info = {}
406            feature_data = self._GetFeatureSection(self.getInstance(), _char_pt('meta_info'))
407            if not _is_null(feature_data):
408                str_list = []
409                data = feature_data[0].data
410                data_size = feature_data[0].data_size
411                current_str = ''
412                for i in range(data_size):
413                    c = bytes_to_str(data[i])
414                    if c != '\0':
415                        current_str += c
416                    else:
417                        str_list.append(current_str)
418                        current_str = ''
419                for i in range(0, len(str_list), 2):
420                    self.meta_info[str_list[i]] = str_list[i + 1]
421        return self.meta_info
422
423    def getInstance(self):
424        if self._instance is None:
425            raise Exception('Instance is Closed')
426        return self._instance
427