1#!/usr/bin/env python3
2#
3# Copyright (C) 2016 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""simpleperf_report_lib.py: a python wrapper of libsimpleperf_report.so.
19   Used to access samples in perf.data.
20
21"""
22
23import collections
24import ctypes as ct
25from pathlib import Path
26import struct
27from typing import Any, Dict, List, Optional, Union
28
29from simpleperf_utils import bytes_to_str, get_host_binary_path, is_windows, str_to_bytes
30
31
32def _is_null(p: Optional[ct._Pointer]) -> bool:
33    if p:
34        return False
35    return ct.cast(p, ct.c_void_p).value is None
36
37
38def _char_pt(s: str) -> bytes:
39    return str_to_bytes(s)
40
41
42def _char_pt_to_str(char_pt: ct.c_char_p) -> str:
43    return bytes_to_str(char_pt)
44
45
46def _check(cond: bool, failmsg: str):
47    if not cond:
48        raise RuntimeError(failmsg)
49
50
51class SampleStruct(ct.Structure):
52    """ Instance of a sample in perf.data.
53        ip: the program counter of the thread generating the sample.
54        pid: process id (or thread group id) of the thread generating the sample.
55        tid: thread id.
56        thread_comm: thread name.
57        time: time at which the sample was generated. The value is in nanoseconds.
58              The clock is decided by the --clockid option in `simpleperf record`.
59        in_kernel: whether the instruction is in kernel space or user space.
60        cpu: the cpu generating the sample.
61        period: count of events have happened since last sample. For example, if we use
62             -e cpu-cycles, it means how many cpu-cycles have happened.
63             If we use -e cpu-clock, it means how many nanoseconds have passed.
64    """
65    _fields_ = [('ip', ct.c_uint64),
66                ('pid', ct.c_uint32),
67                ('tid', ct.c_uint32),
68                ('_thread_comm', ct.c_char_p),
69                ('time', ct.c_uint64),
70                ('in_kernel', ct.c_uint32),
71                ('cpu', ct.c_uint32),
72                ('period', ct.c_uint64)]
73
74    @property
75    def thread_comm(self) -> str:
76        return _char_pt_to_str(self._thread_comm)
77
78
79class TracingFieldFormatStruct(ct.Structure):
80    """Format of a tracing field.
81       name: name of the field.
82       offset: offset of the field in tracing data.
83       elem_size: size of the element type.
84       elem_count: the number of elements in this field, more than one if the field is an array.
85       is_signed: whether the element type is signed or unsigned.
86       is_dynamic: whether the element is a dynamic string.
87    """
88    _fields_ = [('_name', ct.c_char_p),
89                ('offset', ct.c_uint32),
90                ('elem_size', ct.c_uint32),
91                ('elem_count', ct.c_uint32),
92                ('is_signed', ct.c_uint32),
93                ('is_dynamic', ct.c_uint32)]
94
95    _unpack_key_dict = {1: 'b', 2: 'h', 4: 'i', 8: 'q'}
96
97    @property
98    def name(self) -> str:
99        return _char_pt_to_str(self._name)
100
101    def parse_value(self, data: ct.c_char_p) -> Union[str, bytes, List[bytes]]:
102        """ Parse value of a field in a tracepoint event.
103            The return value depends on the type of the field, and can be an int value, a string,
104            an array of int values, etc. If the type can't be parsed, return a byte array or an
105            array of byte arrays.
106        """
107        if self.is_dynamic:
108            offset, max_len = struct.unpack('<HH', data[self.offset:self.offset + 4])
109            length = 0
110            while length < max_len and bytes_to_str(data[offset + length]) != '\x00':
111                length += 1
112            return bytes_to_str(data[offset: offset + length])
113
114        if self.elem_count > 1 and self.elem_size == 1:
115            # Probably the field is a string.
116            # Don't use self.is_signed, which has different values on x86 and arm.
117            length = 0
118            while length < self.elem_count and bytes_to_str(data[self.offset + length]) != '\x00':
119                length += 1
120            return bytes_to_str(data[self.offset: self.offset + length])
121        unpack_key = self._unpack_key_dict.get(self.elem_size)
122        if unpack_key:
123            if not self.is_signed:
124                unpack_key = unpack_key.upper()
125            value = struct.unpack('%d%s' % (self.elem_count, unpack_key),
126                                  data[self.offset:self.offset + self.elem_count * self.elem_size])
127        else:
128            # Since we don't know the element type, just return the bytes.
129            value = []
130            offset = self.offset
131            for _ in range(self.elem_count):
132                value.append(data[offset: offset + self.elem_size])
133                offset += self.elem_size
134        if self.elem_count == 1:
135            value = value[0]
136        return value
137
138
139class TracingDataFormatStruct(ct.Structure):
140    """Format of tracing data of a tracepoint event, like
141       https://www.kernel.org/doc/html/latest/trace/events.html#event-formats.
142       size: total size of all fields in the tracing data.
143       field_count: the number of fields.
144       fields: an array of fields.
145    """
146    _fields_ = [('size', ct.c_uint32),
147                ('field_count', ct.c_uint32),
148                ('fields', ct.POINTER(TracingFieldFormatStruct))]
149
150
151class EventStruct(ct.Structure):
152    """Event type of a sample.
153       name: name of the event type.
154       tracing_data_format: only available when it is a tracepoint event.
155    """
156    _fields_ = [('_name', ct.c_char_p),
157                ('tracing_data_format', TracingDataFormatStruct)]
158
159    @property
160    def name(self) -> str:
161        return _char_pt_to_str(self._name)
162
163
164class MappingStruct(ct.Structure):
165    """ A mapping area in the monitored threads, like the content in /proc/<pid>/maps.
166        start: start addr in memory.
167        end: end addr in memory.
168        pgoff: offset in the mapped shared library.
169    """
170    _fields_ = [('start', ct.c_uint64),
171                ('end', ct.c_uint64),
172                ('pgoff', ct.c_uint64)]
173
174
175class SymbolStruct(ct.Structure):
176    """ Symbol info of the instruction hit by a sample or a callchain entry of a sample.
177        dso_name: path of the shared library containing the instruction.
178        vaddr_in_file: virtual address of the instruction in the shared library.
179        symbol_name: name of the function containing the instruction.
180        symbol_addr: start addr of the function containing the instruction.
181        symbol_len: length of the function in the shared library.
182        mapping: the mapping area hit by the instruction.
183    """
184    _fields_ = [('_dso_name', ct.c_char_p),
185                ('vaddr_in_file', ct.c_uint64),
186                ('_symbol_name', ct.c_char_p),
187                ('symbol_addr', ct.c_uint64),
188                ('symbol_len', ct.c_uint64),
189                ('mapping', ct.POINTER(MappingStruct))]
190
191    @property
192    def dso_name(self) -> str:
193        return _char_pt_to_str(self._dso_name)
194
195    @property
196    def symbol_name(self) -> str:
197        return _char_pt_to_str(self._symbol_name)
198
199
200class CallChainEntryStructure(ct.Structure):
201    """ A callchain entry of a sample.
202        ip: the address of the instruction of the callchain entry.
203        symbol: symbol info of the callchain entry.
204    """
205    _fields_ = [('ip', ct.c_uint64),
206                ('symbol', SymbolStruct)]
207
208
209class CallChainStructure(ct.Structure):
210    """ Callchain info of a sample.
211        nr: number of entries in the callchain.
212        entries: a pointer to an array of CallChainEntryStructure.
213
214        For example, if a sample is generated when a thread is running function C
215        with callchain function A -> function B -> function C.
216        Then nr = 2, and entries = [function B, function A].
217    """
218    _fields_ = [('nr', ct.c_uint32),
219                ('entries', ct.POINTER(CallChainEntryStructure))]
220
221
222class FeatureSectionStructure(ct.Structure):
223    """ A feature section in perf.data to store information like record cmd, device arch, etc.
224        data: a pointer to a buffer storing the section data.
225        data_size: data size in bytes.
226    """
227    _fields_ = [('data', ct.POINTER(ct.c_char)),
228                ('data_size', ct.c_uint32)]
229
230
231class ReportLibStructure(ct.Structure):
232    _fields_ = []
233
234
235# pylint: disable=invalid-name
236class ReportLib(object):
237
238    def __init__(self, native_lib_path: Optional[str] = None):
239        if native_lib_path is None:
240            native_lib_path = self._get_native_lib()
241
242        self._load_dependent_lib()
243        self._lib = ct.CDLL(native_lib_path)
244        self._CreateReportLibFunc = self._lib.CreateReportLib
245        self._CreateReportLibFunc.restype = ct.POINTER(ReportLibStructure)
246        self._DestroyReportLibFunc = self._lib.DestroyReportLib
247        self._SetLogSeverityFunc = self._lib.SetLogSeverity
248        self._SetSymfsFunc = self._lib.SetSymfs
249        self._SetRecordFileFunc = self._lib.SetRecordFile
250        self._SetKallsymsFileFunc = self._lib.SetKallsymsFile
251        self._ShowIpForUnknownSymbolFunc = self._lib.ShowIpForUnknownSymbol
252        self._ShowArtFramesFunc = self._lib.ShowArtFrames
253        self._MergeJavaMethodsFunc = self._lib.MergeJavaMethods
254        self._AddProguardMappingFileFunc = self._lib.AddProguardMappingFile
255        self._AddProguardMappingFileFunc.restype = ct.c_bool
256        self._GetNextSampleFunc = self._lib.GetNextSample
257        self._GetNextSampleFunc.restype = ct.POINTER(SampleStruct)
258        self._GetEventOfCurrentSampleFunc = self._lib.GetEventOfCurrentSample
259        self._GetEventOfCurrentSampleFunc.restype = ct.POINTER(EventStruct)
260        self._GetSymbolOfCurrentSampleFunc = self._lib.GetSymbolOfCurrentSample
261        self._GetSymbolOfCurrentSampleFunc.restype = ct.POINTER(SymbolStruct)
262        self._GetCallChainOfCurrentSampleFunc = self._lib.GetCallChainOfCurrentSample
263        self._GetCallChainOfCurrentSampleFunc.restype = ct.POINTER(CallChainStructure)
264        self._GetTracingDataOfCurrentSampleFunc = self._lib.GetTracingDataOfCurrentSample
265        self._GetTracingDataOfCurrentSampleFunc.restype = ct.POINTER(ct.c_char)
266        self._GetBuildIdForPathFunc = self._lib.GetBuildIdForPath
267        self._GetBuildIdForPathFunc.restype = ct.c_char_p
268        self._GetFeatureSection = self._lib.GetFeatureSection
269        self._GetFeatureSection.restype = ct.POINTER(FeatureSectionStructure)
270        self._instance = self._CreateReportLibFunc()
271        assert not _is_null(self._instance)
272
273        self.meta_info: Optional[Dict[str, str]] = None
274        self.current_sample: Optional[SampleStruct] = None
275        self.record_cmd: Optional[str] = None
276
277    def _get_native_lib(self) -> str:
278        return get_host_binary_path('libsimpleperf_report.so')
279
280    def _load_dependent_lib(self):
281        # As the windows dll is built with mingw we need to load 'libwinpthread-1.dll'.
282        if is_windows():
283            self._libwinpthread = ct.CDLL(get_host_binary_path('libwinpthread-1.dll'))
284
285    def Close(self):
286        if self._instance:
287            self._DestroyReportLibFunc(self._instance)
288            self._instance = None
289
290    def SetLogSeverity(self, log_level: str = 'info'):
291        """ Set log severity of native lib, can be verbose,debug,info,error,fatal."""
292        cond: bool = self._SetLogSeverityFunc(self.getInstance(), _char_pt(log_level))
293        _check(cond, 'Failed to set log level')
294
295    def SetSymfs(self, symfs_dir: str):
296        """ Set directory used to find symbols."""
297        cond: bool = self._SetSymfsFunc(self.getInstance(), _char_pt(symfs_dir))
298        _check(cond, 'Failed to set symbols directory')
299
300    def SetRecordFile(self, record_file: str):
301        """ Set the path of record file, like perf.data."""
302        cond: bool = self._SetRecordFileFunc(self.getInstance(), _char_pt(record_file))
303        _check(cond, 'Failed to set record file')
304
305    def ShowIpForUnknownSymbol(self):
306        self._ShowIpForUnknownSymbolFunc(self.getInstance())
307
308    def ShowArtFrames(self, show: bool = True):
309        """ Show frames of internal methods of the Java interpreter. """
310        self._ShowArtFramesFunc(self.getInstance(), show)
311
312    def MergeJavaMethods(self, merge: bool = True):
313        """ This option merges jitted java methods with the same name but in different jit
314            symfiles. If possible, it also merges jitted methods with interpreted methods,
315            by mapping jitted methods to their corresponding dex files.
316            Side effects:
317              It only works at method level, not instruction level.
318              It makes symbol.vaddr_in_file and symbol.mapping not accurate for jitted methods.
319            Java methods are merged by default.
320        """
321        self._MergeJavaMethodsFunc(self.getInstance(), merge)
322
323    def AddProguardMappingFile(self, mapping_file: Union[str, Path]):
324        """ Add proguard mapping.txt to de-obfuscate method names. """
325        if not self._AddProguardMappingFileFunc(self.getInstance(), _char_pt(str(mapping_file))):
326            raise ValueError(f'failed to add proguard mapping file: {mapping_file}')
327
328    def SetKallsymsFile(self, kallsym_file: str):
329        """ Set the file path to a copy of the /proc/kallsyms file (for off device decoding) """
330        cond: bool = self._SetKallsymsFileFunc(self.getInstance(), _char_pt(kallsym_file))
331        _check(cond, 'Failed to set kallsyms file')
332
333    def GetNextSample(self) -> Optional[SampleStruct]:
334        """ Return the next sample. If no more samples, return None. """
335        psample = self._GetNextSampleFunc(self.getInstance())
336        if _is_null(psample):
337            self.current_sample = None
338        else:
339            self.current_sample = psample[0]
340        return self.current_sample
341
342    def GetCurrentSample(self) -> Optional[SampleStruct]:
343        return self.current_sample
344
345    def GetEventOfCurrentSample(self) -> EventStruct:
346        event = self._GetEventOfCurrentSampleFunc(self.getInstance())
347        assert not _is_null(event)
348        return event[0]
349
350    def GetSymbolOfCurrentSample(self) -> SymbolStruct:
351        symbol = self._GetSymbolOfCurrentSampleFunc(self.getInstance())
352        assert not _is_null(symbol)
353        return symbol[0]
354
355    def GetCallChainOfCurrentSample(self) -> CallChainStructure:
356        callchain = self._GetCallChainOfCurrentSampleFunc(self.getInstance())
357        assert not _is_null(callchain)
358        return callchain[0]
359
360    def GetTracingDataOfCurrentSample(self) -> Optional[Dict[str, Any]]:
361        data = self._GetTracingDataOfCurrentSampleFunc(self.getInstance())
362        if _is_null(data):
363            return None
364        event = self.GetEventOfCurrentSample()
365        result = collections.OrderedDict()
366        for i in range(event.tracing_data_format.field_count):
367            field = event.tracing_data_format.fields[i]
368            result[field.name] = field.parse_value(data)
369        return result
370
371    def GetBuildIdForPath(self, path: str) -> str:
372        build_id = self._GetBuildIdForPathFunc(self.getInstance(), _char_pt(path))
373        assert not _is_null(build_id)
374        return _char_pt_to_str(build_id)
375
376    def GetRecordCmd(self) -> str:
377        if self.record_cmd is not None:
378            return self.record_cmd
379        self.record_cmd = ''
380        feature_data = self._GetFeatureSection(self.getInstance(), _char_pt('cmdline'))
381        if not _is_null(feature_data):
382            void_p = ct.cast(feature_data[0].data, ct.c_void_p)
383            arg_count = ct.cast(void_p, ct.POINTER(ct.c_uint32)).contents.value
384            void_p.value += 4
385            args = []
386            for _ in range(arg_count):
387                str_len = ct.cast(void_p, ct.POINTER(ct.c_uint32)).contents.value
388                void_p.value += 4
389                char_p = ct.cast(void_p, ct.POINTER(ct.c_char))
390                current_str = ''
391                for j in range(str_len):
392                    c = bytes_to_str(char_p[j])
393                    if c != '\0':
394                        current_str += c
395                if ' ' in current_str:
396                    current_str = '"' + current_str + '"'
397                args.append(current_str)
398                void_p.value += str_len
399            self.record_cmd = ' '.join(args)
400        return self.record_cmd
401
402    def _GetFeatureString(self, feature_name: str) -> str:
403        feature_data = self._GetFeatureSection(self.getInstance(), _char_pt(feature_name))
404        result = ''
405        if not _is_null(feature_data):
406            void_p = ct.cast(feature_data[0].data, ct.c_void_p)
407            str_len = ct.cast(void_p, ct.POINTER(ct.c_uint32)).contents.value
408            void_p.value += 4
409            char_p = ct.cast(void_p, ct.POINTER(ct.c_char))
410            for i in range(str_len):
411                c = bytes_to_str(char_p[i])
412                if c == '\0':
413                    break
414                result += c
415        return result
416
417    def GetArch(self) -> str:
418        return self._GetFeatureString('arch')
419
420    def MetaInfo(self) -> Dict[str, str]:
421        """ Return a string to string map stored in meta_info section in perf.data.
422            It is used to pass some short meta information.
423        """
424        if self.meta_info is None:
425            self.meta_info = {}
426            feature_data = self._GetFeatureSection(self.getInstance(), _char_pt('meta_info'))
427            if not _is_null(feature_data):
428                str_list = []
429                data = feature_data[0].data
430                data_size = feature_data[0].data_size
431                current_str = ''
432                for i in range(data_size):
433                    c = bytes_to_str(data[i])
434                    if c != '\0':
435                        current_str += c
436                    else:
437                        str_list.append(current_str)
438                        current_str = ''
439                for i in range(0, len(str_list), 2):
440                    self.meta_info[str_list[i]] = str_list[i + 1]
441        return self.meta_info
442
443    def getInstance(self) -> ct._Pointer:
444        if self._instance is None:
445            raise Exception('Instance is Closed')
446        return self._instance
447