1#!/usr/bin/env python 2# 3# Copyright (C) 2016 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""simpleperf_report_lib.py: a python wrapper of libsimpleperf_report.so. 19 Used to access samples in perf.data. 20 21""" 22 23import collections 24import ctypes as ct 25import struct 26from utils import bytes_to_str, get_host_binary_path, is_windows, str_to_bytes 27 28 29def _get_native_lib(): 30 return get_host_binary_path('libsimpleperf_report.so') 31 32 33def _is_null(p): 34 if p: 35 return False 36 return ct.cast(p, ct.c_void_p).value is None 37 38 39def _char_pt(s): 40 return str_to_bytes(s) 41 42 43def _char_pt_to_str(char_pt): 44 return bytes_to_str(char_pt) 45 46def _check(cond, failmsg): 47 if not cond: 48 raise RuntimeError(failmsg) 49 50 51class SampleStruct(ct.Structure): 52 """ Instance of a sample in perf.data. 53 ip: the program counter of the thread generating the sample. 54 pid: process id (or thread group id) of the thread generating the sample. 55 tid: thread id. 56 thread_comm: thread name. 57 time: time at which the sample was generated. The value is in nanoseconds. 58 The clock is decided by the --clockid option in `simpleperf record`. 59 in_kernel: whether the instruction is in kernel space or user space. 60 cpu: the cpu generating the sample. 61 period: count of events have happened since last sample. For example, if we use 62 -e cpu-cycles, it means how many cpu-cycles have happened. 63 If we use -e cpu-clock, it means how many nanoseconds have passed. 64 """ 65 _fields_ = [('ip', ct.c_uint64), 66 ('pid', ct.c_uint32), 67 ('tid', ct.c_uint32), 68 ('_thread_comm', ct.c_char_p), 69 ('time', ct.c_uint64), 70 ('in_kernel', ct.c_uint32), 71 ('cpu', ct.c_uint32), 72 ('period', ct.c_uint64)] 73 74 @property 75 def thread_comm(self): 76 return _char_pt_to_str(self._thread_comm) 77 78 79class TracingFieldFormatStruct(ct.Structure): 80 """Format of a tracing field. 81 name: name of the field. 82 offset: offset of the field in tracing data. 83 elem_size: size of the element type. 84 elem_count: the number of elements in this field, more than one if the field is an array. 85 is_signed: whether the element type is signed or unsigned. 86 """ 87 _fields_ = [('_name', ct.c_char_p), 88 ('offset', ct.c_uint32), 89 ('elem_size', ct.c_uint32), 90 ('elem_count', ct.c_uint32), 91 ('is_signed', ct.c_uint32)] 92 93 _unpack_key_dict = {1: 'b', 2: 'h', 4: 'i', 8: 'q'} 94 95 @property 96 def name(self): 97 return _char_pt_to_str(self._name) 98 99 def parse_value(self, data): 100 """ Parse value of a field in a tracepoint event. 101 The return value depends on the type of the field, and can be an int value, a string, 102 an array of int values, etc. If the type can't be parsed, return a byte array or an 103 array of byte arrays. 104 """ 105 if self.elem_count > 1 and self.elem_size == 1 and self.is_signed == 0: 106 # The field is a string. 107 length = 0 108 while length < self.elem_count and bytes_to_str(data[self.offset + length]) != '\x00': 109 length += 1 110 return bytes_to_str(data[self.offset : self.offset + length]) 111 unpack_key = self._unpack_key_dict.get(self.elem_size) 112 if unpack_key: 113 if not self.is_signed: 114 unpack_key = unpack_key.upper() 115 value = struct.unpack('%d%s' % (self.elem_count, unpack_key), 116 data[self.offset:self.offset + self.elem_count * self.elem_size]) 117 else: 118 # Since we don't know the element type, just return the bytes. 119 value = [] 120 offset = self.offset 121 for _ in range(self.elem_count): 122 value.append(data[offset : offset + self.elem_size]) 123 offset += self.elem_size 124 if self.elem_count == 1: 125 value = value[0] 126 return value 127 128 129class TracingDataFormatStruct(ct.Structure): 130 """Format of tracing data of a tracepoint event, like 131 https://www.kernel.org/doc/html/latest/trace/events.html#event-formats. 132 size: total size of all fields in the tracing data. 133 field_count: the number of fields. 134 fields: an array of fields. 135 """ 136 _fields_ = [('size', ct.c_uint32), 137 ('field_count', ct.c_uint32), 138 ('fields', ct.POINTER(TracingFieldFormatStruct))] 139 140 141class EventStruct(ct.Structure): 142 """Event type of a sample. 143 name: name of the event type. 144 tracing_data_format: only available when it is a tracepoint event. 145 """ 146 _fields_ = [('_name', ct.c_char_p), 147 ('tracing_data_format', TracingDataFormatStruct)] 148 149 @property 150 def name(self): 151 return _char_pt_to_str(self._name) 152 153 154class MappingStruct(ct.Structure): 155 """ A mapping area in the monitored threads, like the content in /proc/<pid>/maps. 156 start: start addr in memory. 157 end: end addr in memory. 158 pgoff: offset in the mapped shared library. 159 """ 160 _fields_ = [('start', ct.c_uint64), 161 ('end', ct.c_uint64), 162 ('pgoff', ct.c_uint64)] 163 164 165class SymbolStruct(ct.Structure): 166 """ Symbol info of the instruction hit by a sample or a callchain entry of a sample. 167 dso_name: path of the shared library containing the instruction. 168 vaddr_in_file: virtual address of the instruction in the shared library. 169 symbol_name: name of the function containing the instruction. 170 symbol_addr: start addr of the function containing the instruction. 171 symbol_len: length of the function in the shared library. 172 mapping: the mapping area hit by the instruction. 173 """ 174 _fields_ = [('_dso_name', ct.c_char_p), 175 ('vaddr_in_file', ct.c_uint64), 176 ('_symbol_name', ct.c_char_p), 177 ('symbol_addr', ct.c_uint64), 178 ('symbol_len', ct.c_uint64), 179 ('mapping', ct.POINTER(MappingStruct))] 180 181 @property 182 def dso_name(self): 183 return _char_pt_to_str(self._dso_name) 184 185 @property 186 def symbol_name(self): 187 return _char_pt_to_str(self._symbol_name) 188 189 190class CallChainEntryStructure(ct.Structure): 191 """ A callchain entry of a sample. 192 ip: the address of the instruction of the callchain entry. 193 symbol: symbol info of the callchain entry. 194 """ 195 _fields_ = [('ip', ct.c_uint64), 196 ('symbol', SymbolStruct)] 197 198 199class CallChainStructure(ct.Structure): 200 """ Callchain info of a sample. 201 nr: number of entries in the callchain. 202 entries: a pointer to an array of CallChainEntryStructure. 203 204 For example, if a sample is generated when a thread is running function C 205 with callchain function A -> function B -> function C. 206 Then nr = 2, and entries = [function B, function A]. 207 """ 208 _fields_ = [('nr', ct.c_uint32), 209 ('entries', ct.POINTER(CallChainEntryStructure))] 210 211 212class FeatureSectionStructure(ct.Structure): 213 """ A feature section in perf.data to store information like record cmd, device arch, etc. 214 data: a pointer to a buffer storing the section data. 215 data_size: data size in bytes. 216 """ 217 _fields_ = [('data', ct.POINTER(ct.c_char)), 218 ('data_size', ct.c_uint32)] 219 220 221class ReportLibStructure(ct.Structure): 222 _fields_ = [] 223 224 225# pylint: disable=invalid-name 226class ReportLib(object): 227 228 def __init__(self, native_lib_path=None): 229 if native_lib_path is None: 230 native_lib_path = _get_native_lib() 231 232 self._load_dependent_lib() 233 self._lib = ct.CDLL(native_lib_path) 234 self._CreateReportLibFunc = self._lib.CreateReportLib 235 self._CreateReportLibFunc.restype = ct.POINTER(ReportLibStructure) 236 self._DestroyReportLibFunc = self._lib.DestroyReportLib 237 self._SetLogSeverityFunc = self._lib.SetLogSeverity 238 self._SetSymfsFunc = self._lib.SetSymfs 239 self._SetRecordFileFunc = self._lib.SetRecordFile 240 self._SetKallsymsFileFunc = self._lib.SetKallsymsFile 241 self._ShowIpForUnknownSymbolFunc = self._lib.ShowIpForUnknownSymbol 242 self._ShowArtFramesFunc = self._lib.ShowArtFrames 243 self._MergeJavaMethodsFunc = self._lib.MergeJavaMethods 244 self._GetNextSampleFunc = self._lib.GetNextSample 245 self._GetNextSampleFunc.restype = ct.POINTER(SampleStruct) 246 self._GetEventOfCurrentSampleFunc = self._lib.GetEventOfCurrentSample 247 self._GetEventOfCurrentSampleFunc.restype = ct.POINTER(EventStruct) 248 self._GetSymbolOfCurrentSampleFunc = self._lib.GetSymbolOfCurrentSample 249 self._GetSymbolOfCurrentSampleFunc.restype = ct.POINTER(SymbolStruct) 250 self._GetCallChainOfCurrentSampleFunc = self._lib.GetCallChainOfCurrentSample 251 self._GetCallChainOfCurrentSampleFunc.restype = ct.POINTER(CallChainStructure) 252 self._GetTracingDataOfCurrentSampleFunc = self._lib.GetTracingDataOfCurrentSample 253 self._GetTracingDataOfCurrentSampleFunc.restype = ct.POINTER(ct.c_char) 254 self._GetBuildIdForPathFunc = self._lib.GetBuildIdForPath 255 self._GetBuildIdForPathFunc.restype = ct.c_char_p 256 self._GetFeatureSection = self._lib.GetFeatureSection 257 self._GetFeatureSection.restype = ct.POINTER(FeatureSectionStructure) 258 self._instance = self._CreateReportLibFunc() 259 assert not _is_null(self._instance) 260 261 self.meta_info = None 262 self.current_sample = None 263 self.record_cmd = None 264 265 def _load_dependent_lib(self): 266 # As the windows dll is built with mingw we need to load 'libwinpthread-1.dll'. 267 if is_windows(): 268 self._libwinpthread = ct.CDLL(get_host_binary_path('libwinpthread-1.dll')) 269 270 def Close(self): 271 if self._instance is None: 272 return 273 self._DestroyReportLibFunc(self._instance) 274 self._instance = None 275 276 def SetLogSeverity(self, log_level='info'): 277 """ Set log severity of native lib, can be verbose,debug,info,error,fatal.""" 278 cond = self._SetLogSeverityFunc(self.getInstance(), _char_pt(log_level)) 279 _check(cond, 'Failed to set log level') 280 281 def SetSymfs(self, symfs_dir): 282 """ Set directory used to find symbols.""" 283 cond = self._SetSymfsFunc(self.getInstance(), _char_pt(symfs_dir)) 284 _check(cond, 'Failed to set symbols directory') 285 286 def SetRecordFile(self, record_file): 287 """ Set the path of record file, like perf.data.""" 288 cond = self._SetRecordFileFunc(self.getInstance(), _char_pt(record_file)) 289 _check(cond, 'Failed to set record file') 290 291 def ShowIpForUnknownSymbol(self): 292 self._ShowIpForUnknownSymbolFunc(self.getInstance()) 293 294 def ShowArtFrames(self, show=True): 295 """ Show frames of internal methods of the Java interpreter. """ 296 self._ShowArtFramesFunc(self.getInstance(), show) 297 298 def MergeJavaMethods(self, merge=True): 299 """ This option merges jitted java methods with the same name but in different jit 300 symfiles. If possible, it also merges jitted methods with interpreted methods, 301 by mapping jitted methods to their corresponding dex files. 302 Side effects: 303 It only works at method level, not instruction level. 304 It makes symbol.vaddr_in_file and symbol.mapping not accurate for jitted methods. 305 Java methods are merged by default. 306 """ 307 self._MergeJavaMethodsFunc(self.getInstance(), merge) 308 309 def SetKallsymsFile(self, kallsym_file): 310 """ Set the file path to a copy of the /proc/kallsyms file (for off device decoding) """ 311 cond = self._SetKallsymsFileFunc(self.getInstance(), _char_pt(kallsym_file)) 312 _check(cond, 'Failed to set kallsyms file') 313 314 def GetNextSample(self): 315 psample = self._GetNextSampleFunc(self.getInstance()) 316 if _is_null(psample): 317 self.current_sample = None 318 else: 319 self.current_sample = psample[0] 320 return self.current_sample 321 322 def GetCurrentSample(self): 323 return self.current_sample 324 325 def GetEventOfCurrentSample(self): 326 event = self._GetEventOfCurrentSampleFunc(self.getInstance()) 327 assert not _is_null(event) 328 return event[0] 329 330 def GetSymbolOfCurrentSample(self): 331 symbol = self._GetSymbolOfCurrentSampleFunc(self.getInstance()) 332 assert not _is_null(symbol) 333 return symbol[0] 334 335 def GetCallChainOfCurrentSample(self): 336 callchain = self._GetCallChainOfCurrentSampleFunc(self.getInstance()) 337 assert not _is_null(callchain) 338 return callchain[0] 339 340 def GetTracingDataOfCurrentSample(self): 341 data = self._GetTracingDataOfCurrentSampleFunc(self.getInstance()) 342 if _is_null(data): 343 return None 344 event = self.GetEventOfCurrentSample() 345 result = collections.OrderedDict() 346 for i in range(event.tracing_data_format.field_count): 347 field = event.tracing_data_format.fields[i] 348 result[field.name] = field.parse_value(data) 349 return result 350 351 def GetBuildIdForPath(self, path): 352 build_id = self._GetBuildIdForPathFunc(self.getInstance(), _char_pt(path)) 353 assert not _is_null(build_id) 354 return _char_pt_to_str(build_id) 355 356 def GetRecordCmd(self): 357 if self.record_cmd is not None: 358 return self.record_cmd 359 self.record_cmd = '' 360 feature_data = self._GetFeatureSection(self.getInstance(), _char_pt('cmdline')) 361 if not _is_null(feature_data): 362 void_p = ct.cast(feature_data[0].data, ct.c_void_p) 363 arg_count = ct.cast(void_p, ct.POINTER(ct.c_uint32)).contents.value 364 void_p.value += 4 365 args = [] 366 for _ in range(arg_count): 367 str_len = ct.cast(void_p, ct.POINTER(ct.c_uint32)).contents.value 368 void_p.value += 4 369 char_p = ct.cast(void_p, ct.POINTER(ct.c_char)) 370 current_str = '' 371 for j in range(str_len): 372 c = bytes_to_str(char_p[j]) 373 if c != '\0': 374 current_str += c 375 if ' ' in current_str: 376 current_str = '"' + current_str + '"' 377 args.append(current_str) 378 void_p.value += str_len 379 self.record_cmd = ' '.join(args) 380 return self.record_cmd 381 382 def _GetFeatureString(self, feature_name): 383 feature_data = self._GetFeatureSection(self.getInstance(), _char_pt(feature_name)) 384 result = '' 385 if not _is_null(feature_data): 386 void_p = ct.cast(feature_data[0].data, ct.c_void_p) 387 str_len = ct.cast(void_p, ct.POINTER(ct.c_uint32)).contents.value 388 void_p.value += 4 389 char_p = ct.cast(void_p, ct.POINTER(ct.c_char)) 390 for i in range(str_len): 391 c = bytes_to_str(char_p[i]) 392 if c == '\0': 393 break 394 result += c 395 return result 396 397 def GetArch(self): 398 return self._GetFeatureString('arch') 399 400 def MetaInfo(self): 401 """ Return a string to string map stored in meta_info section in perf.data. 402 It is used to pass some short meta information. 403 """ 404 if self.meta_info is None: 405 self.meta_info = {} 406 feature_data = self._GetFeatureSection(self.getInstance(), _char_pt('meta_info')) 407 if not _is_null(feature_data): 408 str_list = [] 409 data = feature_data[0].data 410 data_size = feature_data[0].data_size 411 current_str = '' 412 for i in range(data_size): 413 c = bytes_to_str(data[i]) 414 if c != '\0': 415 current_str += c 416 else: 417 str_list.append(current_str) 418 current_str = '' 419 for i in range(0, len(str_list), 2): 420 self.meta_info[str_list[i]] = str_list[i + 1] 421 return self.meta_info 422 423 def getInstance(self): 424 if self._instance is None: 425 raise Exception('Instance is Closed') 426 return self._instance 427