1#!/usr/bin/python 2''' 3From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb 4to be extended with Python code e.g. for library-specific data visualizations, 5such as for the C++ STL types. Documentation on this API can be seen at: 6http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html 7 8 9This python module deals with the case when the process being debugged (the 10"inferior process" in gdb parlance) is itself python, or more specifically, 11linked against libpython. In this situation, almost every item of data is a 12(PyObject*), and having the debugger merely print their addresses is not very 13enlightening. 14 15This module embeds knowledge about the implementation details of libpython so 16that we can emit useful visualizations e.g. a string, a list, a dict, a frame 17giving file/line information and the state of local variables 18 19In particular, given a gdb.Value corresponding to a PyObject* in the inferior 20process, we can generate a "proxy value" within the gdb process. For example, 21given a PyObject* in the inferior process that is in fact a PyListObject* 22holding three PyObject* that turn out to be PyBytesObject* instances, we can 23generate a proxy value within the gdb process that is a list of bytes 24instances: 25 [b"foo", b"bar", b"baz"] 26 27Doing so can be expensive for complicated graphs of objects, and could take 28some time, so we also have a "write_repr" method that writes a representation 29of the data to a file-like object. This allows us to stop the traversal by 30having the file-like object raise an exception if it gets too much data. 31 32With both "proxyval" and "write_repr" we keep track of the set of all addresses 33visited so far in the traversal, to avoid infinite recursion due to cycles in 34the graph of object references. 35 36We try to defer gdb.lookup_type() invocations for python types until as late as 37possible: for a dynamically linked python binary, when the process starts in 38the debugger, the libpython.so hasn't been dynamically loaded yet, so none of 39the type names are known to the debugger 40 41The module also extends gdb with some python-specific commands. 42''' 43 44# NOTE: some gdbs are linked with Python 3, so this file should be dual-syntax 45# compatible (2.6+ and 3.0+). See #19308. 46 47from __future__ import print_function 48import gdb 49import os 50import locale 51import sys 52 53if sys.version_info[0] >= 3: 54 unichr = chr 55 xrange = range 56 long = int 57 58# Look up the gdb.Type for some standard types: 59# Those need to be refreshed as types (pointer sizes) may change when 60# gdb loads different executables 61 62def _type_char_ptr(): 63 return gdb.lookup_type('char').pointer() # char* 64 65 66def _type_unsigned_char_ptr(): 67 return gdb.lookup_type('unsigned char').pointer() # unsigned char* 68 69 70def _type_unsigned_short_ptr(): 71 return gdb.lookup_type('unsigned short').pointer() 72 73 74def _type_unsigned_int_ptr(): 75 return gdb.lookup_type('unsigned int').pointer() 76 77 78def _sizeof_void_p(): 79 return gdb.lookup_type('void').pointer().sizeof 80 81 82# value computed later, see PyUnicodeObjectPtr.proxy() 83_is_pep393 = None 84 85Py_TPFLAGS_HEAPTYPE = (1 << 9) 86Py_TPFLAGS_LONG_SUBCLASS = (1 << 24) 87Py_TPFLAGS_LIST_SUBCLASS = (1 << 25) 88Py_TPFLAGS_TUPLE_SUBCLASS = (1 << 26) 89Py_TPFLAGS_BYTES_SUBCLASS = (1 << 27) 90Py_TPFLAGS_UNICODE_SUBCLASS = (1 << 28) 91Py_TPFLAGS_DICT_SUBCLASS = (1 << 29) 92Py_TPFLAGS_BASE_EXC_SUBCLASS = (1 << 30) 93Py_TPFLAGS_TYPE_SUBCLASS = (1 << 31) 94 95 96MAX_OUTPUT_LEN=1024 97 98hexdigits = "0123456789abcdef" 99 100ENCODING = locale.getpreferredencoding() 101 102EVALFRAME = '_PyEval_EvalFrameDefault' 103 104class NullPyObjectPtr(RuntimeError): 105 pass 106 107 108def safety_limit(val): 109 # Given an integer value from the process being debugged, limit it to some 110 # safety threshold so that arbitrary breakage within said process doesn't 111 # break the gdb process too much (e.g. sizes of iterations, sizes of lists) 112 return min(val, 1000) 113 114 115def safe_range(val): 116 # As per range, but don't trust the value too much: cap it to a safety 117 # threshold in case the data was corrupted 118 return xrange(safety_limit(int(val))) 119 120if sys.version_info[0] >= 3: 121 def write_unicode(file, text): 122 file.write(text) 123else: 124 def write_unicode(file, text): 125 # Write a byte or unicode string to file. Unicode strings are encoded to 126 # ENCODING encoding with 'backslashreplace' error handler to avoid 127 # UnicodeEncodeError. 128 if isinstance(text, unicode): 129 text = text.encode(ENCODING, 'backslashreplace') 130 file.write(text) 131 132try: 133 os_fsencode = os.fsencode 134except AttributeError: 135 def os_fsencode(filename): 136 if not isinstance(filename, unicode): 137 return filename 138 encoding = sys.getfilesystemencoding() 139 if encoding == 'mbcs': 140 # mbcs doesn't support surrogateescape 141 return filename.encode(encoding) 142 encoded = [] 143 for char in filename: 144 # surrogateescape error handler 145 if 0xDC80 <= ord(char) <= 0xDCFF: 146 byte = chr(ord(char) - 0xDC00) 147 else: 148 byte = char.encode(encoding) 149 encoded.append(byte) 150 return ''.join(encoded) 151 152class StringTruncated(RuntimeError): 153 pass 154 155class TruncatedStringIO(object): 156 '''Similar to io.StringIO, but can truncate the output by raising a 157 StringTruncated exception''' 158 def __init__(self, maxlen=None): 159 self._val = '' 160 self.maxlen = maxlen 161 162 def write(self, data): 163 if self.maxlen: 164 if len(data) + len(self._val) > self.maxlen: 165 # Truncation: 166 self._val += data[0:self.maxlen - len(self._val)] 167 raise StringTruncated() 168 169 self._val += data 170 171 def getvalue(self): 172 return self._val 173 174class PyObjectPtr(object): 175 """ 176 Class wrapping a gdb.Value that's either a (PyObject*) within the 177 inferior process, or some subclass pointer e.g. (PyBytesObject*) 178 179 There will be a subclass for every refined PyObject type that we care 180 about. 181 182 Note that at every stage the underlying pointer could be NULL, point 183 to corrupt data, etc; this is the debugger, after all. 184 """ 185 _typename = 'PyObject' 186 187 def __init__(self, gdbval, cast_to=None): 188 if cast_to: 189 self._gdbval = gdbval.cast(cast_to) 190 else: 191 self._gdbval = gdbval 192 193 def field(self, name): 194 ''' 195 Get the gdb.Value for the given field within the PyObject, coping with 196 some python 2 versus python 3 differences. 197 198 Various libpython types are defined using the "PyObject_HEAD" and 199 "PyObject_VAR_HEAD" macros. 200 201 In Python 2, this these are defined so that "ob_type" and (for a var 202 object) "ob_size" are fields of the type in question. 203 204 In Python 3, this is defined as an embedded PyVarObject type thus: 205 PyVarObject ob_base; 206 so that the "ob_size" field is located insize the "ob_base" field, and 207 the "ob_type" is most easily accessed by casting back to a (PyObject*). 208 ''' 209 if self.is_null(): 210 raise NullPyObjectPtr(self) 211 212 if name == 'ob_type': 213 pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type()) 214 return pyo_ptr.dereference()[name] 215 216 if name == 'ob_size': 217 pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type()) 218 return pyo_ptr.dereference()[name] 219 220 # General case: look it up inside the object: 221 return self._gdbval.dereference()[name] 222 223 def pyop_field(self, name): 224 ''' 225 Get a PyObjectPtr for the given PyObject* field within this PyObject, 226 coping with some python 2 versus python 3 differences. 227 ''' 228 return PyObjectPtr.from_pyobject_ptr(self.field(name)) 229 230 def write_field_repr(self, name, out, visited): 231 ''' 232 Extract the PyObject* field named "name", and write its representation 233 to file-like object "out" 234 ''' 235 field_obj = self.pyop_field(name) 236 field_obj.write_repr(out, visited) 237 238 def get_truncated_repr(self, maxlen): 239 ''' 240 Get a repr-like string for the data, but truncate it at "maxlen" bytes 241 (ending the object graph traversal as soon as you do) 242 ''' 243 out = TruncatedStringIO(maxlen) 244 try: 245 self.write_repr(out, set()) 246 except StringTruncated: 247 # Truncation occurred: 248 return out.getvalue() + '...(truncated)' 249 250 # No truncation occurred: 251 return out.getvalue() 252 253 def type(self): 254 return PyTypeObjectPtr(self.field('ob_type')) 255 256 def is_null(self): 257 return 0 == long(self._gdbval) 258 259 def is_optimized_out(self): 260 ''' 261 Is the value of the underlying PyObject* visible to the debugger? 262 263 This can vary with the precise version of the compiler used to build 264 Python, and the precise version of gdb. 265 266 See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with 267 PyEval_EvalFrameEx's "f" 268 ''' 269 return self._gdbval.is_optimized_out 270 271 def safe_tp_name(self): 272 try: 273 ob_type = self.type() 274 tp_name = ob_type.field('tp_name') 275 return tp_name.string() 276 # NullPyObjectPtr: NULL tp_name? 277 # RuntimeError: Can't even read the object at all? 278 # UnicodeDecodeError: Failed to decode tp_name bytestring 279 except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError): 280 return 'unknown' 281 282 def proxyval(self, visited): 283 ''' 284 Scrape a value from the inferior process, and try to represent it 285 within the gdb process, whilst (hopefully) avoiding crashes when 286 the remote data is corrupt. 287 288 Derived classes will override this. 289 290 For example, a PyIntObject* with ob_ival 42 in the inferior process 291 should result in an int(42) in this process. 292 293 visited: a set of all gdb.Value pyobject pointers already visited 294 whilst generating this value (to guard against infinite recursion when 295 visiting object graphs with loops). Analogous to Py_ReprEnter and 296 Py_ReprLeave 297 ''' 298 299 class FakeRepr(object): 300 """ 301 Class representing a non-descript PyObject* value in the inferior 302 process for when we don't have a custom scraper, intended to have 303 a sane repr(). 304 """ 305 306 def __init__(self, tp_name, address): 307 self.tp_name = tp_name 308 self.address = address 309 310 def __repr__(self): 311 # For the NULL pointer, we have no way of knowing a type, so 312 # special-case it as per 313 # http://bugs.python.org/issue8032#msg100882 314 if self.address == 0: 315 return '0x0' 316 return '<%s at remote 0x%x>' % (self.tp_name, self.address) 317 318 return FakeRepr(self.safe_tp_name(), 319 long(self._gdbval)) 320 321 def write_repr(self, out, visited): 322 ''' 323 Write a string representation of the value scraped from the inferior 324 process to "out", a file-like object. 325 ''' 326 # Default implementation: generate a proxy value and write its repr 327 # However, this could involve a lot of work for complicated objects, 328 # so for derived classes we specialize this 329 return out.write(repr(self.proxyval(visited))) 330 331 @classmethod 332 def subclass_from_type(cls, t): 333 ''' 334 Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a 335 (PyTypeObject*), determine the corresponding subclass of PyObjectPtr 336 to use 337 338 Ideally, we would look up the symbols for the global types, but that 339 isn't working yet: 340 (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value 341 Traceback (most recent call last): 342 File "<string>", line 1, in <module> 343 NotImplementedError: Symbol type not yet supported in Python scripts. 344 Error while executing Python code. 345 346 For now, we use tp_flags, after doing some string comparisons on the 347 tp_name for some special-cases that don't seem to be visible through 348 flags 349 ''' 350 try: 351 tp_name = t.field('tp_name').string() 352 tp_flags = int(t.field('tp_flags')) 353 # RuntimeError: NULL pointers 354 # UnicodeDecodeError: string() fails to decode the bytestring 355 except (RuntimeError, UnicodeDecodeError): 356 # Handle any kind of error e.g. NULL ptrs by simply using the base 357 # class 358 return cls 359 360 #print('tp_flags = 0x%08x' % tp_flags) 361 #print('tp_name = %r' % tp_name) 362 363 name_map = {'bool': PyBoolObjectPtr, 364 'classobj': PyClassObjectPtr, 365 'NoneType': PyNoneStructPtr, 366 'frame': PyFrameObjectPtr, 367 'set' : PySetObjectPtr, 368 'frozenset' : PySetObjectPtr, 369 'builtin_function_or_method' : PyCFunctionObjectPtr, 370 'method-wrapper': wrapperobject, 371 } 372 if tp_name in name_map: 373 return name_map[tp_name] 374 375 if tp_flags & Py_TPFLAGS_HEAPTYPE: 376 return HeapTypeObjectPtr 377 378 if tp_flags & Py_TPFLAGS_LONG_SUBCLASS: 379 return PyLongObjectPtr 380 if tp_flags & Py_TPFLAGS_LIST_SUBCLASS: 381 return PyListObjectPtr 382 if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS: 383 return PyTupleObjectPtr 384 if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS: 385 return PyBytesObjectPtr 386 if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS: 387 return PyUnicodeObjectPtr 388 if tp_flags & Py_TPFLAGS_DICT_SUBCLASS: 389 return PyDictObjectPtr 390 if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS: 391 return PyBaseExceptionObjectPtr 392 #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS: 393 # return PyTypeObjectPtr 394 395 # Use the base class: 396 return cls 397 398 @classmethod 399 def from_pyobject_ptr(cls, gdbval): 400 ''' 401 Try to locate the appropriate derived class dynamically, and cast 402 the pointer accordingly. 403 ''' 404 try: 405 p = PyObjectPtr(gdbval) 406 cls = cls.subclass_from_type(p.type()) 407 return cls(gdbval, cast_to=cls.get_gdb_type()) 408 except RuntimeError: 409 # Handle any kind of error e.g. NULL ptrs by simply using the base 410 # class 411 pass 412 return cls(gdbval) 413 414 @classmethod 415 def get_gdb_type(cls): 416 return gdb.lookup_type(cls._typename).pointer() 417 418 def as_address(self): 419 return long(self._gdbval) 420 421class PyVarObjectPtr(PyObjectPtr): 422 _typename = 'PyVarObject' 423 424class ProxyAlreadyVisited(object): 425 ''' 426 Placeholder proxy to use when protecting against infinite recursion due to 427 loops in the object graph. 428 429 Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave 430 ''' 431 def __init__(self, rep): 432 self._rep = rep 433 434 def __repr__(self): 435 return self._rep 436 437 438def _write_instance_repr(out, visited, name, pyop_attrdict, address): 439 '''Shared code for use by all classes: 440 write a representation to file-like object "out"''' 441 out.write('<') 442 out.write(name) 443 444 # Write dictionary of instance attributes: 445 if isinstance(pyop_attrdict, PyDictObjectPtr): 446 out.write('(') 447 first = True 448 for pyop_arg, pyop_val in pyop_attrdict.iteritems(): 449 if not first: 450 out.write(', ') 451 first = False 452 out.write(pyop_arg.proxyval(visited)) 453 out.write('=') 454 pyop_val.write_repr(out, visited) 455 out.write(')') 456 out.write(' at remote 0x%x>' % address) 457 458 459class InstanceProxy(object): 460 461 def __init__(self, cl_name, attrdict, address): 462 self.cl_name = cl_name 463 self.attrdict = attrdict 464 self.address = address 465 466 def __repr__(self): 467 if isinstance(self.attrdict, dict): 468 kwargs = ', '.join(["%s=%r" % (arg, val) 469 for arg, val in self.attrdict.iteritems()]) 470 return '<%s(%s) at remote 0x%x>' % (self.cl_name, 471 kwargs, self.address) 472 else: 473 return '<%s at remote 0x%x>' % (self.cl_name, 474 self.address) 475 476def _PyObject_VAR_SIZE(typeobj, nitems): 477 if _PyObject_VAR_SIZE._type_size_t is None: 478 _PyObject_VAR_SIZE._type_size_t = gdb.lookup_type('size_t') 479 480 return ( ( typeobj.field('tp_basicsize') + 481 nitems * typeobj.field('tp_itemsize') + 482 (_sizeof_void_p() - 1) 483 ) & ~(_sizeof_void_p() - 1) 484 ).cast(_PyObject_VAR_SIZE._type_size_t) 485_PyObject_VAR_SIZE._type_size_t = None 486 487class HeapTypeObjectPtr(PyObjectPtr): 488 _typename = 'PyObject' 489 490 def get_attr_dict(self): 491 ''' 492 Get the PyDictObject ptr representing the attribute dictionary 493 (or None if there's a problem) 494 ''' 495 try: 496 typeobj = self.type() 497 dictoffset = int_from_int(typeobj.field('tp_dictoffset')) 498 if dictoffset != 0: 499 if dictoffset < 0: 500 type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer() 501 tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size']) 502 if tsize < 0: 503 tsize = -tsize 504 size = _PyObject_VAR_SIZE(typeobj, tsize) 505 dictoffset += size 506 assert dictoffset > 0 507 assert dictoffset % _sizeof_void_p() == 0 508 509 dictptr = self._gdbval.cast(_type_char_ptr()) + dictoffset 510 PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer() 511 dictptr = dictptr.cast(PyObjectPtrPtr) 512 return PyObjectPtr.from_pyobject_ptr(dictptr.dereference()) 513 except RuntimeError: 514 # Corrupt data somewhere; fail safe 515 pass 516 517 # Not found, or some kind of error: 518 return None 519 520 def proxyval(self, visited): 521 ''' 522 Support for classes. 523 524 Currently we just locate the dictionary using a transliteration to 525 python of _PyObject_GetDictPtr, ignoring descriptors 526 ''' 527 # Guard against infinite loops: 528 if self.as_address() in visited: 529 return ProxyAlreadyVisited('<...>') 530 visited.add(self.as_address()) 531 532 pyop_attr_dict = self.get_attr_dict() 533 if pyop_attr_dict: 534 attr_dict = pyop_attr_dict.proxyval(visited) 535 else: 536 attr_dict = {} 537 tp_name = self.safe_tp_name() 538 539 # Class: 540 return InstanceProxy(tp_name, attr_dict, long(self._gdbval)) 541 542 def write_repr(self, out, visited): 543 # Guard against infinite loops: 544 if self.as_address() in visited: 545 out.write('<...>') 546 return 547 visited.add(self.as_address()) 548 549 pyop_attrdict = self.get_attr_dict() 550 _write_instance_repr(out, visited, 551 self.safe_tp_name(), pyop_attrdict, self.as_address()) 552 553class ProxyException(Exception): 554 def __init__(self, tp_name, args): 555 self.tp_name = tp_name 556 self.args = args 557 558 def __repr__(self): 559 return '%s%r' % (self.tp_name, self.args) 560 561class PyBaseExceptionObjectPtr(PyObjectPtr): 562 """ 563 Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception 564 within the process being debugged. 565 """ 566 _typename = 'PyBaseExceptionObject' 567 568 def proxyval(self, visited): 569 # Guard against infinite loops: 570 if self.as_address() in visited: 571 return ProxyAlreadyVisited('(...)') 572 visited.add(self.as_address()) 573 arg_proxy = self.pyop_field('args').proxyval(visited) 574 return ProxyException(self.safe_tp_name(), 575 arg_proxy) 576 577 def write_repr(self, out, visited): 578 # Guard against infinite loops: 579 if self.as_address() in visited: 580 out.write('(...)') 581 return 582 visited.add(self.as_address()) 583 584 out.write(self.safe_tp_name()) 585 self.write_field_repr('args', out, visited) 586 587class PyClassObjectPtr(PyObjectPtr): 588 """ 589 Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj> 590 instance within the process being debugged. 591 """ 592 _typename = 'PyClassObject' 593 594 595class BuiltInFunctionProxy(object): 596 def __init__(self, ml_name): 597 self.ml_name = ml_name 598 599 def __repr__(self): 600 return "<built-in function %s>" % self.ml_name 601 602class BuiltInMethodProxy(object): 603 def __init__(self, ml_name, pyop_m_self): 604 self.ml_name = ml_name 605 self.pyop_m_self = pyop_m_self 606 607 def __repr__(self): 608 return ('<built-in method %s of %s object at remote 0x%x>' 609 % (self.ml_name, 610 self.pyop_m_self.safe_tp_name(), 611 self.pyop_m_self.as_address()) 612 ) 613 614class PyCFunctionObjectPtr(PyObjectPtr): 615 """ 616 Class wrapping a gdb.Value that's a PyCFunctionObject* 617 (see Include/methodobject.h and Objects/methodobject.c) 618 """ 619 _typename = 'PyCFunctionObject' 620 621 def proxyval(self, visited): 622 m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*) 623 try: 624 ml_name = m_ml['ml_name'].string() 625 except UnicodeDecodeError: 626 ml_name = '<ml_name:UnicodeDecodeError>' 627 628 pyop_m_self = self.pyop_field('m_self') 629 if pyop_m_self.is_null(): 630 return BuiltInFunctionProxy(ml_name) 631 else: 632 return BuiltInMethodProxy(ml_name, pyop_m_self) 633 634 635class PyCodeObjectPtr(PyObjectPtr): 636 """ 637 Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance 638 within the process being debugged. 639 """ 640 _typename = 'PyCodeObject' 641 642 def addr2line(self, addrq): 643 ''' 644 Get the line number for a given bytecode offset 645 646 Analogous to PyCode_Addr2Line; translated from pseudocode in 647 Objects/lnotab_notes.txt 648 ''' 649 co_lnotab = self.pyop_field('co_lnotab').proxyval(set()) 650 651 # Initialize lineno to co_firstlineno as per PyCode_Addr2Line 652 # not 0, as lnotab_notes.txt has it: 653 lineno = int_from_int(self.field('co_firstlineno')) 654 655 addr = 0 656 for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]): 657 addr += ord(addr_incr) 658 if addr > addrq: 659 return lineno 660 lineno += ord(line_incr) 661 return lineno 662 663 664class PyDictObjectPtr(PyObjectPtr): 665 """ 666 Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance 667 within the process being debugged. 668 """ 669 _typename = 'PyDictObject' 670 671 def iteritems(self): 672 ''' 673 Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs, 674 analogous to dict.iteritems() 675 ''' 676 keys = self.field('ma_keys') 677 values = self.field('ma_values') 678 entries, nentries = self._get_entries(keys) 679 for i in safe_range(nentries): 680 ep = entries[i] 681 if long(values): 682 pyop_value = PyObjectPtr.from_pyobject_ptr(values[i]) 683 else: 684 pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value']) 685 if not pyop_value.is_null(): 686 pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key']) 687 yield (pyop_key, pyop_value) 688 689 def proxyval(self, visited): 690 # Guard against infinite loops: 691 if self.as_address() in visited: 692 return ProxyAlreadyVisited('{...}') 693 visited.add(self.as_address()) 694 695 result = {} 696 for pyop_key, pyop_value in self.iteritems(): 697 proxy_key = pyop_key.proxyval(visited) 698 proxy_value = pyop_value.proxyval(visited) 699 result[proxy_key] = proxy_value 700 return result 701 702 def write_repr(self, out, visited): 703 # Guard against infinite loops: 704 if self.as_address() in visited: 705 out.write('{...}') 706 return 707 visited.add(self.as_address()) 708 709 out.write('{') 710 first = True 711 for pyop_key, pyop_value in self.iteritems(): 712 if not first: 713 out.write(', ') 714 first = False 715 pyop_key.write_repr(out, visited) 716 out.write(': ') 717 pyop_value.write_repr(out, visited) 718 out.write('}') 719 720 def _get_entries(self, keys): 721 dk_nentries = int(keys['dk_nentries']) 722 dk_size = int(keys['dk_size']) 723 try: 724 # <= Python 3.5 725 return keys['dk_entries'], dk_size 726 except RuntimeError: 727 # >= Python 3.6 728 pass 729 730 if dk_size <= 0xFF: 731 offset = dk_size 732 elif dk_size <= 0xFFFF: 733 offset = 2 * dk_size 734 elif dk_size <= 0xFFFFFFFF: 735 offset = 4 * dk_size 736 else: 737 offset = 8 * dk_size 738 739 ent_addr = keys['dk_indices'].address 740 ent_addr = ent_addr.cast(_type_unsigned_char_ptr()) + offset 741 ent_ptr_t = gdb.lookup_type('PyDictKeyEntry').pointer() 742 ent_addr = ent_addr.cast(ent_ptr_t) 743 744 return ent_addr, dk_nentries 745 746 747class PyListObjectPtr(PyObjectPtr): 748 _typename = 'PyListObject' 749 750 def __getitem__(self, i): 751 # Get the gdb.Value for the (PyObject*) with the given index: 752 field_ob_item = self.field('ob_item') 753 return field_ob_item[i] 754 755 def proxyval(self, visited): 756 # Guard against infinite loops: 757 if self.as_address() in visited: 758 return ProxyAlreadyVisited('[...]') 759 visited.add(self.as_address()) 760 761 result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited) 762 for i in safe_range(int_from_int(self.field('ob_size')))] 763 return result 764 765 def write_repr(self, out, visited): 766 # Guard against infinite loops: 767 if self.as_address() in visited: 768 out.write('[...]') 769 return 770 visited.add(self.as_address()) 771 772 out.write('[') 773 for i in safe_range(int_from_int(self.field('ob_size'))): 774 if i > 0: 775 out.write(', ') 776 element = PyObjectPtr.from_pyobject_ptr(self[i]) 777 element.write_repr(out, visited) 778 out.write(']') 779 780class PyLongObjectPtr(PyObjectPtr): 781 _typename = 'PyLongObject' 782 783 def proxyval(self, visited): 784 ''' 785 Python's Include/longobjrep.h has this declaration: 786 struct _longobject { 787 PyObject_VAR_HEAD 788 digit ob_digit[1]; 789 }; 790 791 with this description: 792 The absolute value of a number is equal to 793 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i) 794 Negative numbers are represented with ob_size < 0; 795 zero is represented by ob_size == 0. 796 797 where SHIFT can be either: 798 #define PyLong_SHIFT 30 799 #define PyLong_SHIFT 15 800 ''' 801 ob_size = long(self.field('ob_size')) 802 if ob_size == 0: 803 return 0 804 805 ob_digit = self.field('ob_digit') 806 807 if gdb.lookup_type('digit').sizeof == 2: 808 SHIFT = 15 809 else: 810 SHIFT = 30 811 812 digits = [long(ob_digit[i]) * 2**(SHIFT*i) 813 for i in safe_range(abs(ob_size))] 814 result = sum(digits) 815 if ob_size < 0: 816 result = -result 817 return result 818 819 def write_repr(self, out, visited): 820 # Write this out as a Python 3 int literal, i.e. without the "L" suffix 821 proxy = self.proxyval(visited) 822 out.write("%s" % proxy) 823 824 825class PyBoolObjectPtr(PyLongObjectPtr): 826 """ 827 Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two 828 <bool> instances (Py_True/Py_False) within the process being debugged. 829 """ 830 def proxyval(self, visited): 831 if PyLongObjectPtr.proxyval(self, visited): 832 return True 833 else: 834 return False 835 836class PyNoneStructPtr(PyObjectPtr): 837 """ 838 Class wrapping a gdb.Value that's a PyObject* pointing to the 839 singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type 840 """ 841 _typename = 'PyObject' 842 843 def proxyval(self, visited): 844 return None 845 846 847class PyFrameObjectPtr(PyObjectPtr): 848 _typename = 'PyFrameObject' 849 850 def __init__(self, gdbval, cast_to=None): 851 PyObjectPtr.__init__(self, gdbval, cast_to) 852 853 if not self.is_optimized_out(): 854 self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code')) 855 self.co_name = self.co.pyop_field('co_name') 856 self.co_filename = self.co.pyop_field('co_filename') 857 858 self.f_lineno = int_from_int(self.field('f_lineno')) 859 self.f_lasti = int_from_int(self.field('f_lasti')) 860 self.co_nlocals = int_from_int(self.co.field('co_nlocals')) 861 self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames')) 862 863 def iter_locals(self): 864 ''' 865 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 866 the local variables of this frame 867 ''' 868 if self.is_optimized_out(): 869 return 870 871 f_localsplus = self.field('f_localsplus') 872 for i in safe_range(self.co_nlocals): 873 pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i]) 874 if not pyop_value.is_null(): 875 pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i]) 876 yield (pyop_name, pyop_value) 877 878 def iter_globals(self): 879 ''' 880 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 881 the global variables of this frame 882 ''' 883 if self.is_optimized_out(): 884 return () 885 886 pyop_globals = self.pyop_field('f_globals') 887 return pyop_globals.iteritems() 888 889 def iter_builtins(self): 890 ''' 891 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 892 the builtin variables 893 ''' 894 if self.is_optimized_out(): 895 return () 896 897 pyop_builtins = self.pyop_field('f_builtins') 898 return pyop_builtins.iteritems() 899 900 def get_var_by_name(self, name): 901 ''' 902 Look for the named local variable, returning a (PyObjectPtr, scope) pair 903 where scope is a string 'local', 'global', 'builtin' 904 905 If not found, return (None, None) 906 ''' 907 for pyop_name, pyop_value in self.iter_locals(): 908 if name == pyop_name.proxyval(set()): 909 return pyop_value, 'local' 910 for pyop_name, pyop_value in self.iter_globals(): 911 if name == pyop_name.proxyval(set()): 912 return pyop_value, 'global' 913 for pyop_name, pyop_value in self.iter_builtins(): 914 if name == pyop_name.proxyval(set()): 915 return pyop_value, 'builtin' 916 return None, None 917 918 def filename(self): 919 '''Get the path of the current Python source file, as a string''' 920 if self.is_optimized_out(): 921 return '(frame information optimized out)' 922 return self.co_filename.proxyval(set()) 923 924 def current_line_num(self): 925 '''Get current line number as an integer (1-based) 926 927 Translated from PyFrame_GetLineNumber and PyCode_Addr2Line 928 929 See Objects/lnotab_notes.txt 930 ''' 931 if self.is_optimized_out(): 932 return None 933 f_trace = self.field('f_trace') 934 if long(f_trace) != 0: 935 # we have a non-NULL f_trace: 936 return self.f_lineno 937 938 try: 939 return self.co.addr2line(self.f_lasti) 940 except Exception: 941 # bpo-34989: addr2line() is a complex function, it can fail in many 942 # ways. For example, it fails with a TypeError on "FakeRepr" if 943 # gdb fails to load debug symbols. Use a catch-all "except 944 # Exception" to make the whole function safe. The caller has to 945 # handle None anyway for optimized Python. 946 return None 947 948 def current_line(self): 949 '''Get the text of the current source line as a string, with a trailing 950 newline character''' 951 if self.is_optimized_out(): 952 return '(frame information optimized out)' 953 954 lineno = self.current_line_num() 955 if lineno is None: 956 return '(failed to get frame line number)' 957 958 filename = self.filename() 959 try: 960 with open(os_fsencode(filename), 'r') as fp: 961 lines = fp.readlines() 962 except IOError: 963 return None 964 965 try: 966 # Convert from 1-based current_line_num to 0-based list offset 967 return lines[lineno - 1] 968 except IndexError: 969 return None 970 971 def write_repr(self, out, visited): 972 if self.is_optimized_out(): 973 out.write('(frame information optimized out)') 974 return 975 lineno = self.current_line_num() 976 lineno = str(lineno) if lineno is not None else "?" 977 out.write('Frame 0x%x, for file %s, line %s, in %s (' 978 % (self.as_address(), 979 self.co_filename.proxyval(visited), 980 lineno, 981 self.co_name.proxyval(visited))) 982 first = True 983 for pyop_name, pyop_value in self.iter_locals(): 984 if not first: 985 out.write(', ') 986 first = False 987 988 out.write(pyop_name.proxyval(visited)) 989 out.write('=') 990 pyop_value.write_repr(out, visited) 991 992 out.write(')') 993 994 def print_traceback(self): 995 if self.is_optimized_out(): 996 sys.stdout.write(' (frame information optimized out)\n') 997 return 998 visited = set() 999 lineno = self.current_line_num() 1000 lineno = str(lineno) if lineno is not None else "?" 1001 sys.stdout.write(' File "%s", line %s, in %s\n' 1002 % (self.co_filename.proxyval(visited), 1003 lineno, 1004 self.co_name.proxyval(visited))) 1005 1006class PySetObjectPtr(PyObjectPtr): 1007 _typename = 'PySetObject' 1008 1009 @classmethod 1010 def _dummy_key(self): 1011 return gdb.lookup_global_symbol('_PySet_Dummy').value() 1012 1013 def __iter__(self): 1014 dummy_ptr = self._dummy_key() 1015 table = self.field('table') 1016 for i in safe_range(self.field('mask') + 1): 1017 setentry = table[i] 1018 key = setentry['key'] 1019 if key != 0 and key != dummy_ptr: 1020 yield PyObjectPtr.from_pyobject_ptr(key) 1021 1022 def proxyval(self, visited): 1023 # Guard against infinite loops: 1024 if self.as_address() in visited: 1025 return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name()) 1026 visited.add(self.as_address()) 1027 1028 members = (key.proxyval(visited) for key in self) 1029 if self.safe_tp_name() == 'frozenset': 1030 return frozenset(members) 1031 else: 1032 return set(members) 1033 1034 def write_repr(self, out, visited): 1035 # Emulate Python 3's set_repr 1036 tp_name = self.safe_tp_name() 1037 1038 # Guard against infinite loops: 1039 if self.as_address() in visited: 1040 out.write('(...)') 1041 return 1042 visited.add(self.as_address()) 1043 1044 # Python 3's set_repr special-cases the empty set: 1045 if not self.field('used'): 1046 out.write(tp_name) 1047 out.write('()') 1048 return 1049 1050 # Python 3 uses {} for set literals: 1051 if tp_name != 'set': 1052 out.write(tp_name) 1053 out.write('(') 1054 1055 out.write('{') 1056 first = True 1057 for key in self: 1058 if not first: 1059 out.write(', ') 1060 first = False 1061 key.write_repr(out, visited) 1062 out.write('}') 1063 1064 if tp_name != 'set': 1065 out.write(')') 1066 1067 1068class PyBytesObjectPtr(PyObjectPtr): 1069 _typename = 'PyBytesObject' 1070 1071 def __str__(self): 1072 field_ob_size = self.field('ob_size') 1073 field_ob_sval = self.field('ob_sval') 1074 char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr()) 1075 return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)]) 1076 1077 def proxyval(self, visited): 1078 return str(self) 1079 1080 def write_repr(self, out, visited): 1081 # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix 1082 1083 # Get a PyStringObject* within the Python 2 gdb process: 1084 proxy = self.proxyval(visited) 1085 1086 # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr 1087 # to Python 2 code: 1088 quote = "'" 1089 if "'" in proxy and not '"' in proxy: 1090 quote = '"' 1091 out.write('b') 1092 out.write(quote) 1093 for byte in proxy: 1094 if byte == quote or byte == '\\': 1095 out.write('\\') 1096 out.write(byte) 1097 elif byte == '\t': 1098 out.write('\\t') 1099 elif byte == '\n': 1100 out.write('\\n') 1101 elif byte == '\r': 1102 out.write('\\r') 1103 elif byte < ' ' or ord(byte) >= 0x7f: 1104 out.write('\\x') 1105 out.write(hexdigits[(ord(byte) & 0xf0) >> 4]) 1106 out.write(hexdigits[ord(byte) & 0xf]) 1107 else: 1108 out.write(byte) 1109 out.write(quote) 1110 1111class PyTupleObjectPtr(PyObjectPtr): 1112 _typename = 'PyTupleObject' 1113 1114 def __getitem__(self, i): 1115 # Get the gdb.Value for the (PyObject*) with the given index: 1116 field_ob_item = self.field('ob_item') 1117 return field_ob_item[i] 1118 1119 def proxyval(self, visited): 1120 # Guard against infinite loops: 1121 if self.as_address() in visited: 1122 return ProxyAlreadyVisited('(...)') 1123 visited.add(self.as_address()) 1124 1125 result = tuple(PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited) 1126 for i in safe_range(int_from_int(self.field('ob_size')))) 1127 return result 1128 1129 def write_repr(self, out, visited): 1130 # Guard against infinite loops: 1131 if self.as_address() in visited: 1132 out.write('(...)') 1133 return 1134 visited.add(self.as_address()) 1135 1136 out.write('(') 1137 for i in safe_range(int_from_int(self.field('ob_size'))): 1138 if i > 0: 1139 out.write(', ') 1140 element = PyObjectPtr.from_pyobject_ptr(self[i]) 1141 element.write_repr(out, visited) 1142 if self.field('ob_size') == 1: 1143 out.write(',)') 1144 else: 1145 out.write(')') 1146 1147class PyTypeObjectPtr(PyObjectPtr): 1148 _typename = 'PyTypeObject' 1149 1150 1151def _unichr_is_printable(char): 1152 # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py 1153 if char == u" ": 1154 return True 1155 import unicodedata 1156 return unicodedata.category(char) not in ("C", "Z") 1157 1158if sys.maxunicode >= 0x10000: 1159 _unichr = unichr 1160else: 1161 # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb 1162 def _unichr(x): 1163 if x < 0x10000: 1164 return unichr(x) 1165 x -= 0x10000 1166 ch1 = 0xD800 | (x >> 10) 1167 ch2 = 0xDC00 | (x & 0x3FF) 1168 return unichr(ch1) + unichr(ch2) 1169 1170 1171class PyUnicodeObjectPtr(PyObjectPtr): 1172 _typename = 'PyUnicodeObject' 1173 1174 def char_width(self): 1175 _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE') 1176 return _type_Py_UNICODE.sizeof 1177 1178 def proxyval(self, visited): 1179 global _is_pep393 1180 if _is_pep393 is None: 1181 fields = gdb.lookup_type('PyUnicodeObject').fields() 1182 _is_pep393 = 'data' in [f.name for f in fields] 1183 if _is_pep393: 1184 # Python 3.3 and newer 1185 may_have_surrogates = False 1186 compact = self.field('_base') 1187 ascii = compact['_base'] 1188 state = ascii['state'] 1189 is_compact_ascii = (int(state['ascii']) and int(state['compact'])) 1190 if not int(state['ready']): 1191 # string is not ready 1192 field_length = long(compact['wstr_length']) 1193 may_have_surrogates = True 1194 field_str = ascii['wstr'] 1195 else: 1196 field_length = long(ascii['length']) 1197 if is_compact_ascii: 1198 field_str = ascii.address + 1 1199 elif int(state['compact']): 1200 field_str = compact.address + 1 1201 else: 1202 field_str = self.field('data')['any'] 1203 repr_kind = int(state['kind']) 1204 if repr_kind == 1: 1205 field_str = field_str.cast(_type_unsigned_char_ptr()) 1206 elif repr_kind == 2: 1207 field_str = field_str.cast(_type_unsigned_short_ptr()) 1208 elif repr_kind == 4: 1209 field_str = field_str.cast(_type_unsigned_int_ptr()) 1210 else: 1211 # Python 3.2 and earlier 1212 field_length = long(self.field('length')) 1213 field_str = self.field('str') 1214 may_have_surrogates = self.char_width() == 2 1215 1216 # Gather a list of ints from the Py_UNICODE array; these are either 1217 # UCS-1, UCS-2 or UCS-4 code points: 1218 if not may_have_surrogates: 1219 Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)] 1220 else: 1221 # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the 1222 # inferior process: we must join surrogate pairs. 1223 Py_UNICODEs = [] 1224 i = 0 1225 limit = safety_limit(field_length) 1226 while i < limit: 1227 ucs = int(field_str[i]) 1228 i += 1 1229 if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length: 1230 Py_UNICODEs.append(ucs) 1231 continue 1232 # This could be a surrogate pair. 1233 ucs2 = int(field_str[i]) 1234 if ucs2 < 0xDC00 or ucs2 > 0xDFFF: 1235 continue 1236 code = (ucs & 0x03FF) << 10 1237 code |= ucs2 & 0x03FF 1238 code += 0x00010000 1239 Py_UNICODEs.append(code) 1240 i += 1 1241 1242 # Convert the int code points to unicode characters, and generate a 1243 # local unicode instance. 1244 # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb). 1245 result = u''.join([ 1246 (_unichr(ucs) if ucs <= 0x10ffff else '\ufffd') 1247 for ucs in Py_UNICODEs]) 1248 return result 1249 1250 def write_repr(self, out, visited): 1251 # Write this out as a Python 3 str literal, i.e. without a "u" prefix 1252 1253 # Get a PyUnicodeObject* within the Python 2 gdb process: 1254 proxy = self.proxyval(visited) 1255 1256 # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr 1257 # to Python 2: 1258 if "'" in proxy and '"' not in proxy: 1259 quote = '"' 1260 else: 1261 quote = "'" 1262 out.write(quote) 1263 1264 i = 0 1265 while i < len(proxy): 1266 ch = proxy[i] 1267 i += 1 1268 1269 # Escape quotes and backslashes 1270 if ch == quote or ch == '\\': 1271 out.write('\\') 1272 out.write(ch) 1273 1274 # Map special whitespace to '\t', \n', '\r' 1275 elif ch == '\t': 1276 out.write('\\t') 1277 elif ch == '\n': 1278 out.write('\\n') 1279 elif ch == '\r': 1280 out.write('\\r') 1281 1282 # Map non-printable US ASCII to '\xhh' */ 1283 elif ch < ' ' or ch == 0x7F: 1284 out.write('\\x') 1285 out.write(hexdigits[(ord(ch) >> 4) & 0x000F]) 1286 out.write(hexdigits[ord(ch) & 0x000F]) 1287 1288 # Copy ASCII characters as-is 1289 elif ord(ch) < 0x7F: 1290 out.write(ch) 1291 1292 # Non-ASCII characters 1293 else: 1294 ucs = ch 1295 ch2 = None 1296 if sys.maxunicode < 0x10000: 1297 # If sizeof(Py_UNICODE) is 2 here (in gdb), join 1298 # surrogate pairs before calling _unichr_is_printable. 1299 if (i < len(proxy) 1300 and 0xD800 <= ord(ch) < 0xDC00 \ 1301 and 0xDC00 <= ord(proxy[i]) <= 0xDFFF): 1302 ch2 = proxy[i] 1303 ucs = ch + ch2 1304 i += 1 1305 1306 # Unfortuately, Python 2's unicode type doesn't seem 1307 # to expose the "isprintable" method 1308 printable = _unichr_is_printable(ucs) 1309 if printable: 1310 try: 1311 ucs.encode(ENCODING) 1312 except UnicodeEncodeError: 1313 printable = False 1314 1315 # Map Unicode whitespace and control characters 1316 # (categories Z* and C* except ASCII space) 1317 if not printable: 1318 if ch2 is not None: 1319 # Match Python 3's representation of non-printable 1320 # wide characters. 1321 code = (ord(ch) & 0x03FF) << 10 1322 code |= ord(ch2) & 0x03FF 1323 code += 0x00010000 1324 else: 1325 code = ord(ucs) 1326 1327 # Map 8-bit characters to '\\xhh' 1328 if code <= 0xff: 1329 out.write('\\x') 1330 out.write(hexdigits[(code >> 4) & 0x000F]) 1331 out.write(hexdigits[code & 0x000F]) 1332 # Map 21-bit characters to '\U00xxxxxx' 1333 elif code >= 0x10000: 1334 out.write('\\U') 1335 out.write(hexdigits[(code >> 28) & 0x0000000F]) 1336 out.write(hexdigits[(code >> 24) & 0x0000000F]) 1337 out.write(hexdigits[(code >> 20) & 0x0000000F]) 1338 out.write(hexdigits[(code >> 16) & 0x0000000F]) 1339 out.write(hexdigits[(code >> 12) & 0x0000000F]) 1340 out.write(hexdigits[(code >> 8) & 0x0000000F]) 1341 out.write(hexdigits[(code >> 4) & 0x0000000F]) 1342 out.write(hexdigits[code & 0x0000000F]) 1343 # Map 16-bit characters to '\uxxxx' 1344 else: 1345 out.write('\\u') 1346 out.write(hexdigits[(code >> 12) & 0x000F]) 1347 out.write(hexdigits[(code >> 8) & 0x000F]) 1348 out.write(hexdigits[(code >> 4) & 0x000F]) 1349 out.write(hexdigits[code & 0x000F]) 1350 else: 1351 # Copy characters as-is 1352 out.write(ch) 1353 if ch2 is not None: 1354 out.write(ch2) 1355 1356 out.write(quote) 1357 1358 1359class wrapperobject(PyObjectPtr): 1360 _typename = 'wrapperobject' 1361 1362 def safe_name(self): 1363 try: 1364 name = self.field('descr')['d_base']['name'].string() 1365 return repr(name) 1366 except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError): 1367 return '<unknown name>' 1368 1369 def safe_tp_name(self): 1370 try: 1371 return self.field('self')['ob_type']['tp_name'].string() 1372 except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError): 1373 return '<unknown tp_name>' 1374 1375 def safe_self_addresss(self): 1376 try: 1377 address = long(self.field('self')) 1378 return '%#x' % address 1379 except (NullPyObjectPtr, RuntimeError): 1380 return '<failed to get self address>' 1381 1382 def proxyval(self, visited): 1383 name = self.safe_name() 1384 tp_name = self.safe_tp_name() 1385 self_address = self.safe_self_addresss() 1386 return ("<method-wrapper %s of %s object at %s>" 1387 % (name, tp_name, self_address)) 1388 1389 def write_repr(self, out, visited): 1390 proxy = self.proxyval(visited) 1391 out.write(proxy) 1392 1393 1394def int_from_int(gdbval): 1395 return int(str(gdbval)) 1396 1397 1398def stringify(val): 1399 # TODO: repr() puts everything on one line; pformat can be nicer, but 1400 # can lead to v.long results; this function isolates the choice 1401 if True: 1402 return repr(val) 1403 else: 1404 from pprint import pformat 1405 return pformat(val) 1406 1407 1408class PyObjectPtrPrinter: 1409 "Prints a (PyObject*)" 1410 1411 def __init__ (self, gdbval): 1412 self.gdbval = gdbval 1413 1414 def to_string (self): 1415 pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval) 1416 if True: 1417 return pyop.get_truncated_repr(MAX_OUTPUT_LEN) 1418 else: 1419 # Generate full proxy value then stringify it. 1420 # Doing so could be expensive 1421 proxyval = pyop.proxyval(set()) 1422 return stringify(proxyval) 1423 1424def pretty_printer_lookup(gdbval): 1425 type = gdbval.type.unqualified() 1426 if type.code != gdb.TYPE_CODE_PTR: 1427 return None 1428 1429 type = type.target().unqualified() 1430 t = str(type) 1431 if t in ("PyObject", "PyFrameObject", "PyUnicodeObject", "wrapperobject"): 1432 return PyObjectPtrPrinter(gdbval) 1433 1434""" 1435During development, I've been manually invoking the code in this way: 1436(gdb) python 1437 1438import sys 1439sys.path.append('/home/david/coding/python-gdb') 1440import libpython 1441end 1442 1443then reloading it after each edit like this: 1444(gdb) python reload(libpython) 1445 1446The following code should ensure that the prettyprinter is registered 1447if the code is autoloaded by gdb when visiting libpython.so, provided 1448that this python file is installed to the same path as the library (or its 1449.debug file) plus a "-gdb.py" suffix, e.g: 1450 /usr/lib/libpython2.6.so.1.0-gdb.py 1451 /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py 1452""" 1453def register (obj): 1454 if obj is None: 1455 obj = gdb 1456 1457 # Wire up the pretty-printer 1458 obj.pretty_printers.append(pretty_printer_lookup) 1459 1460register (gdb.current_objfile ()) 1461 1462 1463 1464# Unfortunately, the exact API exposed by the gdb module varies somewhat 1465# from build to build 1466# See http://bugs.python.org/issue8279?#msg102276 1467 1468class Frame(object): 1469 ''' 1470 Wrapper for gdb.Frame, adding various methods 1471 ''' 1472 def __init__(self, gdbframe): 1473 self._gdbframe = gdbframe 1474 1475 def older(self): 1476 older = self._gdbframe.older() 1477 if older: 1478 return Frame(older) 1479 else: 1480 return None 1481 1482 def newer(self): 1483 newer = self._gdbframe.newer() 1484 if newer: 1485 return Frame(newer) 1486 else: 1487 return None 1488 1489 def select(self): 1490 '''If supported, select this frame and return True; return False if unsupported 1491 1492 Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12 1493 onwards, but absent on Ubuntu buildbot''' 1494 if not hasattr(self._gdbframe, 'select'): 1495 print ('Unable to select frame: ' 1496 'this build of gdb does not expose a gdb.Frame.select method') 1497 return False 1498 self._gdbframe.select() 1499 return True 1500 1501 def get_index(self): 1502 '''Calculate index of frame, starting at 0 for the newest frame within 1503 this thread''' 1504 index = 0 1505 # Go down until you reach the newest frame: 1506 iter_frame = self 1507 while iter_frame.newer(): 1508 index += 1 1509 iter_frame = iter_frame.newer() 1510 return index 1511 1512 # We divide frames into: 1513 # - "python frames": 1514 # - "bytecode frames" i.e. PyEval_EvalFrameEx 1515 # - "other python frames": things that are of interest from a python 1516 # POV, but aren't bytecode (e.g. GC, GIL) 1517 # - everything else 1518 1519 def is_python_frame(self): 1520 '''Is this a _PyEval_EvalFrameDefault frame, or some other important 1521 frame? (see is_other_python_frame for what "important" means in this 1522 context)''' 1523 if self.is_evalframe(): 1524 return True 1525 if self.is_other_python_frame(): 1526 return True 1527 return False 1528 1529 def is_evalframe(self): 1530 '''Is this a _PyEval_EvalFrameDefault frame?''' 1531 if self._gdbframe.name() == EVALFRAME: 1532 ''' 1533 I believe we also need to filter on the inline 1534 struct frame_id.inline_depth, only regarding frames with 1535 an inline depth of 0 as actually being this function 1536 1537 So we reject those with type gdb.INLINE_FRAME 1538 ''' 1539 if self._gdbframe.type() == gdb.NORMAL_FRAME: 1540 # We have a _PyEval_EvalFrameDefault frame: 1541 return True 1542 1543 return False 1544 1545 def is_other_python_frame(self): 1546 '''Is this frame worth displaying in python backtraces? 1547 Examples: 1548 - waiting on the GIL 1549 - garbage-collecting 1550 - within a CFunction 1551 If it is, return a descriptive string 1552 For other frames, return False 1553 ''' 1554 if self.is_waiting_for_gil(): 1555 return 'Waiting for the GIL' 1556 1557 if self.is_gc_collect(): 1558 return 'Garbage-collecting' 1559 1560 # Detect invocations of PyCFunction instances: 1561 frame = self._gdbframe 1562 caller = frame.name() 1563 if not caller: 1564 return False 1565 1566 if caller in ('_PyCFunction_FastCallDict', 1567 '_PyCFunction_FastCallKeywords'): 1568 arg_name = 'func' 1569 # Within that frame: 1570 # "func" is the local containing the PyObject* of the 1571 # PyCFunctionObject instance 1572 # "f" is the same value, but cast to (PyCFunctionObject*) 1573 # "self" is the (PyObject*) of the 'self' 1574 try: 1575 # Use the prettyprinter for the func: 1576 func = frame.read_var(arg_name) 1577 return str(func) 1578 except ValueError: 1579 return ('PyCFunction invocation (unable to read %s: ' 1580 'missing debuginfos?)' % arg_name) 1581 except RuntimeError: 1582 return 'PyCFunction invocation (unable to read %s)' % arg_name 1583 1584 if caller == 'wrapper_call': 1585 arg_name = 'wp' 1586 try: 1587 func = frame.read_var(arg_name) 1588 return str(func) 1589 except ValueError: 1590 return ('<wrapper_call invocation (unable to read %s: ' 1591 'missing debuginfos?)>' % arg_name) 1592 except RuntimeError: 1593 return '<wrapper_call invocation (unable to read %s)>' % arg_name 1594 1595 # This frame isn't worth reporting: 1596 return False 1597 1598 def is_waiting_for_gil(self): 1599 '''Is this frame waiting on the GIL?''' 1600 # This assumes the _POSIX_THREADS version of Python/ceval_gil.h: 1601 name = self._gdbframe.name() 1602 if name: 1603 return 'pthread_cond_timedwait' in name 1604 1605 def is_gc_collect(self): 1606 '''Is this frame "collect" within the garbage-collector?''' 1607 return self._gdbframe.name() == 'collect' 1608 1609 def get_pyop(self): 1610 try: 1611 f = self._gdbframe.read_var('f') 1612 frame = PyFrameObjectPtr.from_pyobject_ptr(f) 1613 if not frame.is_optimized_out(): 1614 return frame 1615 # gdb is unable to get the "f" argument of PyEval_EvalFrameEx() 1616 # because it was "optimized out". Try to get "f" from the frame 1617 # of the caller, PyEval_EvalCodeEx(). 1618 orig_frame = frame 1619 caller = self._gdbframe.older() 1620 if caller: 1621 f = caller.read_var('f') 1622 frame = PyFrameObjectPtr.from_pyobject_ptr(f) 1623 if not frame.is_optimized_out(): 1624 return frame 1625 return orig_frame 1626 except ValueError: 1627 return None 1628 1629 @classmethod 1630 def get_selected_frame(cls): 1631 _gdbframe = gdb.selected_frame() 1632 if _gdbframe: 1633 return Frame(_gdbframe) 1634 return None 1635 1636 @classmethod 1637 def get_selected_python_frame(cls): 1638 '''Try to obtain the Frame for the python-related code in the selected 1639 frame, or None''' 1640 try: 1641 frame = cls.get_selected_frame() 1642 except gdb.error: 1643 # No frame: Python didn't start yet 1644 return None 1645 1646 while frame: 1647 if frame.is_python_frame(): 1648 return frame 1649 frame = frame.older() 1650 1651 # Not found: 1652 return None 1653 1654 @classmethod 1655 def get_selected_bytecode_frame(cls): 1656 '''Try to obtain the Frame for the python bytecode interpreter in the 1657 selected GDB frame, or None''' 1658 frame = cls.get_selected_frame() 1659 1660 while frame: 1661 if frame.is_evalframe(): 1662 return frame 1663 frame = frame.older() 1664 1665 # Not found: 1666 return None 1667 1668 def print_summary(self): 1669 if self.is_evalframe(): 1670 pyop = self.get_pyop() 1671 if pyop: 1672 line = pyop.get_truncated_repr(MAX_OUTPUT_LEN) 1673 write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line)) 1674 if not pyop.is_optimized_out(): 1675 line = pyop.current_line() 1676 if line is not None: 1677 sys.stdout.write(' %s\n' % line.strip()) 1678 else: 1679 sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index()) 1680 else: 1681 info = self.is_other_python_frame() 1682 if info: 1683 sys.stdout.write('#%i %s\n' % (self.get_index(), info)) 1684 else: 1685 sys.stdout.write('#%i\n' % self.get_index()) 1686 1687 def print_traceback(self): 1688 if self.is_evalframe(): 1689 pyop = self.get_pyop() 1690 if pyop: 1691 pyop.print_traceback() 1692 if not pyop.is_optimized_out(): 1693 line = pyop.current_line() 1694 if line is not None: 1695 sys.stdout.write(' %s\n' % line.strip()) 1696 else: 1697 sys.stdout.write(' (unable to read python frame information)\n') 1698 else: 1699 info = self.is_other_python_frame() 1700 if info: 1701 sys.stdout.write(' %s\n' % info) 1702 else: 1703 sys.stdout.write(' (not a python frame)\n') 1704 1705class PyList(gdb.Command): 1706 '''List the current Python source code, if any 1707 1708 Use 1709 py-list START 1710 to list at a different line number within the python source. 1711 1712 Use 1713 py-list START, END 1714 to list a specific range of lines within the python source. 1715 ''' 1716 1717 def __init__(self): 1718 gdb.Command.__init__ (self, 1719 "py-list", 1720 gdb.COMMAND_FILES, 1721 gdb.COMPLETE_NONE) 1722 1723 1724 def invoke(self, args, from_tty): 1725 import re 1726 1727 start = None 1728 end = None 1729 1730 m = re.match(r'\s*(\d+)\s*', args) 1731 if m: 1732 start = int(m.group(0)) 1733 end = start + 10 1734 1735 m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args) 1736 if m: 1737 start, end = map(int, m.groups()) 1738 1739 # py-list requires an actual PyEval_EvalFrameEx frame: 1740 frame = Frame.get_selected_bytecode_frame() 1741 if not frame: 1742 print('Unable to locate gdb frame for python bytecode interpreter') 1743 return 1744 1745 pyop = frame.get_pyop() 1746 if not pyop or pyop.is_optimized_out(): 1747 print('Unable to read information on python frame') 1748 return 1749 1750 filename = pyop.filename() 1751 lineno = pyop.current_line_num() 1752 if lineno is None: 1753 print('Unable to read python frame line number') 1754 return 1755 1756 if start is None: 1757 start = lineno - 5 1758 end = lineno + 5 1759 1760 if start<1: 1761 start = 1 1762 1763 try: 1764 f = open(os_fsencode(filename), 'r') 1765 except IOError as err: 1766 sys.stdout.write('Unable to open %s: %s\n' 1767 % (filename, err)) 1768 return 1769 with f: 1770 all_lines = f.readlines() 1771 # start and end are 1-based, all_lines is 0-based; 1772 # so [start-1:end] as a python slice gives us [start, end] as a 1773 # closed interval 1774 for i, line in enumerate(all_lines[start-1:end]): 1775 linestr = str(i+start) 1776 # Highlight current line: 1777 if i + start == lineno: 1778 linestr = '>' + linestr 1779 sys.stdout.write('%4s %s' % (linestr, line)) 1780 1781 1782# ...and register the command: 1783PyList() 1784 1785def move_in_stack(move_up): 1786 '''Move up or down the stack (for the py-up/py-down command)''' 1787 frame = Frame.get_selected_python_frame() 1788 if not frame: 1789 print('Unable to locate python frame') 1790 return 1791 1792 while frame: 1793 if move_up: 1794 iter_frame = frame.older() 1795 else: 1796 iter_frame = frame.newer() 1797 1798 if not iter_frame: 1799 break 1800 1801 if iter_frame.is_python_frame(): 1802 # Result: 1803 if iter_frame.select(): 1804 iter_frame.print_summary() 1805 return 1806 1807 frame = iter_frame 1808 1809 if move_up: 1810 print('Unable to find an older python frame') 1811 else: 1812 print('Unable to find a newer python frame') 1813 1814class PyUp(gdb.Command): 1815 'Select and print the python stack frame that called this one (if any)' 1816 def __init__(self): 1817 gdb.Command.__init__ (self, 1818 "py-up", 1819 gdb.COMMAND_STACK, 1820 gdb.COMPLETE_NONE) 1821 1822 1823 def invoke(self, args, from_tty): 1824 move_in_stack(move_up=True) 1825 1826class PyDown(gdb.Command): 1827 'Select and print the python stack frame called by this one (if any)' 1828 def __init__(self): 1829 gdb.Command.__init__ (self, 1830 "py-down", 1831 gdb.COMMAND_STACK, 1832 gdb.COMPLETE_NONE) 1833 1834 1835 def invoke(self, args, from_tty): 1836 move_in_stack(move_up=False) 1837 1838# Not all builds of gdb have gdb.Frame.select 1839if hasattr(gdb.Frame, 'select'): 1840 PyUp() 1841 PyDown() 1842 1843class PyBacktraceFull(gdb.Command): 1844 'Display the current python frame and all the frames within its call stack (if any)' 1845 def __init__(self): 1846 gdb.Command.__init__ (self, 1847 "py-bt-full", 1848 gdb.COMMAND_STACK, 1849 gdb.COMPLETE_NONE) 1850 1851 1852 def invoke(self, args, from_tty): 1853 frame = Frame.get_selected_python_frame() 1854 if not frame: 1855 print('Unable to locate python frame') 1856 return 1857 1858 while frame: 1859 if frame.is_python_frame(): 1860 frame.print_summary() 1861 frame = frame.older() 1862 1863PyBacktraceFull() 1864 1865class PyBacktrace(gdb.Command): 1866 'Display the current python frame and all the frames within its call stack (if any)' 1867 def __init__(self): 1868 gdb.Command.__init__ (self, 1869 "py-bt", 1870 gdb.COMMAND_STACK, 1871 gdb.COMPLETE_NONE) 1872 1873 1874 def invoke(self, args, from_tty): 1875 frame = Frame.get_selected_python_frame() 1876 if not frame: 1877 print('Unable to locate python frame') 1878 return 1879 1880 sys.stdout.write('Traceback (most recent call first):\n') 1881 while frame: 1882 if frame.is_python_frame(): 1883 frame.print_traceback() 1884 frame = frame.older() 1885 1886PyBacktrace() 1887 1888class PyPrint(gdb.Command): 1889 'Look up the given python variable name, and print it' 1890 def __init__(self): 1891 gdb.Command.__init__ (self, 1892 "py-print", 1893 gdb.COMMAND_DATA, 1894 gdb.COMPLETE_NONE) 1895 1896 1897 def invoke(self, args, from_tty): 1898 name = str(args) 1899 1900 frame = Frame.get_selected_python_frame() 1901 if not frame: 1902 print('Unable to locate python frame') 1903 return 1904 1905 pyop_frame = frame.get_pyop() 1906 if not pyop_frame: 1907 print('Unable to read information on python frame') 1908 return 1909 1910 pyop_var, scope = pyop_frame.get_var_by_name(name) 1911 1912 if pyop_var: 1913 print('%s %r = %s' 1914 % (scope, 1915 name, 1916 pyop_var.get_truncated_repr(MAX_OUTPUT_LEN))) 1917 else: 1918 print('%r not found' % name) 1919 1920PyPrint() 1921 1922class PyLocals(gdb.Command): 1923 'Look up the given python variable name, and print it' 1924 def __init__(self): 1925 gdb.Command.__init__ (self, 1926 "py-locals", 1927 gdb.COMMAND_DATA, 1928 gdb.COMPLETE_NONE) 1929 1930 1931 def invoke(self, args, from_tty): 1932 name = str(args) 1933 1934 frame = Frame.get_selected_python_frame() 1935 if not frame: 1936 print('Unable to locate python frame') 1937 return 1938 1939 pyop_frame = frame.get_pyop() 1940 if not pyop_frame: 1941 print('Unable to read information on python frame') 1942 return 1943 1944 for pyop_name, pyop_value in pyop_frame.iter_locals(): 1945 print('%s = %s' 1946 % (pyop_name.proxyval(set()), 1947 pyop_value.get_truncated_repr(MAX_OUTPUT_LEN))) 1948 1949PyLocals() 1950