1#!/usr/bin/python 2''' 3From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb 4to be extended with Python code e.g. for library-specific data visualizations, 5such as for the C++ STL types. Documentation on this API can be seen at: 6http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html 7 8 9This python module deals with the case when the process being debugged (the 10"inferior process" in gdb parlance) is itself python, or more specifically, 11linked against libpython. In this situation, almost every item of data is a 12(PyObject*), and having the debugger merely print their addresses is not very 13enlightening. 14 15This module embeds knowledge about the implementation details of libpython so 16that we can emit useful visualizations e.g. a string, a list, a dict, a frame 17giving file/line information and the state of local variables 18 19In particular, given a gdb.Value corresponding to a PyObject* in the inferior 20process, we can generate a "proxy value" within the gdb process. For example, 21given a PyObject* in the inferior process that is in fact a PyListObject* 22holding three PyObject* that turn out to be PyStringObject* instances, we can 23generate a proxy value within the gdb process that is a list of strings: 24 ["foo", "bar", "baz"] 25 26Doing so can be expensive for complicated graphs of objects, and could take 27some time, so we also have a "write_repr" method that writes a representation 28of the data to a file-like object. This allows us to stop the traversal by 29having the file-like object raise an exception if it gets too much data. 30 31With both "proxyval" and "write_repr" we keep track of the set of all addresses 32visited so far in the traversal, to avoid infinite recursion due to cycles in 33the graph of object references. 34 35We try to defer gdb.lookup_type() invocations for python types until as late as 36possible: for a dynamically linked python binary, when the process starts in 37the debugger, the libpython.so hasn't been dynamically loaded yet, so none of 38the type names are known to the debugger 39 40The module also extends gdb with some python-specific commands. 41''' 42from __future__ import with_statement 43import gdb 44import sys 45 46# Look up the gdb.Type for some standard types: 47_type_char_ptr = gdb.lookup_type('char').pointer() # char* 48_type_unsigned_char_ptr = gdb.lookup_type('unsigned char').pointer() # unsigned char* 49_type_void_ptr = gdb.lookup_type('void').pointer() # void* 50_type_size_t = gdb.lookup_type('size_t') 51 52SIZEOF_VOID_P = _type_void_ptr.sizeof 53 54 55Py_TPFLAGS_HEAPTYPE = (1L << 9) 56 57Py_TPFLAGS_INT_SUBCLASS = (1L << 23) 58Py_TPFLAGS_LONG_SUBCLASS = (1L << 24) 59Py_TPFLAGS_LIST_SUBCLASS = (1L << 25) 60Py_TPFLAGS_TUPLE_SUBCLASS = (1L << 26) 61Py_TPFLAGS_STRING_SUBCLASS = (1L << 27) 62Py_TPFLAGS_UNICODE_SUBCLASS = (1L << 28) 63Py_TPFLAGS_DICT_SUBCLASS = (1L << 29) 64Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30) 65Py_TPFLAGS_TYPE_SUBCLASS = (1L << 31) 66 67 68MAX_OUTPUT_LEN=1024 69 70class NullPyObjectPtr(RuntimeError): 71 pass 72 73 74def safety_limit(val): 75 # Given a integer value from the process being debugged, limit it to some 76 # safety threshold so that arbitrary breakage within said process doesn't 77 # break the gdb process too much (e.g. sizes of iterations, sizes of lists) 78 return min(val, 1000) 79 80 81def safe_range(val): 82 # As per range, but don't trust the value too much: cap it to a safety 83 # threshold in case the data was corrupted 84 return xrange(safety_limit(val)) 85 86 87class StringTruncated(RuntimeError): 88 pass 89 90class TruncatedStringIO(object): 91 '''Similar to cStringIO, but can truncate the output by raising a 92 StringTruncated exception''' 93 def __init__(self, maxlen=None): 94 self._val = '' 95 self.maxlen = maxlen 96 97 def write(self, data): 98 if self.maxlen: 99 if len(data) + len(self._val) > self.maxlen: 100 # Truncation: 101 self._val += data[0:self.maxlen - len(self._val)] 102 raise StringTruncated() 103 104 self._val += data 105 106 def getvalue(self): 107 return self._val 108 109class PyObjectPtr(object): 110 """ 111 Class wrapping a gdb.Value that's a either a (PyObject*) within the 112 inferior process, or some subclass pointer e.g. (PyStringObject*) 113 114 There will be a subclass for every refined PyObject type that we care 115 about. 116 117 Note that at every stage the underlying pointer could be NULL, point 118 to corrupt data, etc; this is the debugger, after all. 119 """ 120 _typename = 'PyObject' 121 122 def __init__(self, gdbval, cast_to=None): 123 if cast_to: 124 self._gdbval = gdbval.cast(cast_to) 125 else: 126 self._gdbval = gdbval 127 128 def field(self, name): 129 ''' 130 Get the gdb.Value for the given field within the PyObject, coping with 131 some python 2 versus python 3 differences. 132 133 Various libpython types are defined using the "PyObject_HEAD" and 134 "PyObject_VAR_HEAD" macros. 135 136 In Python 2, this these are defined so that "ob_type" and (for a var 137 object) "ob_size" are fields of the type in question. 138 139 In Python 3, this is defined as an embedded PyVarObject type thus: 140 PyVarObject ob_base; 141 so that the "ob_size" field is located insize the "ob_base" field, and 142 the "ob_type" is most easily accessed by casting back to a (PyObject*). 143 ''' 144 if self.is_null(): 145 raise NullPyObjectPtr(self) 146 147 if name == 'ob_type': 148 pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type()) 149 return pyo_ptr.dereference()[name] 150 151 if name == 'ob_size': 152 try: 153 # Python 2: 154 return self._gdbval.dereference()[name] 155 except RuntimeError: 156 # Python 3: 157 return self._gdbval.dereference()['ob_base'][name] 158 159 # General case: look it up inside the object: 160 return self._gdbval.dereference()[name] 161 162 def pyop_field(self, name): 163 ''' 164 Get a PyObjectPtr for the given PyObject* field within this PyObject, 165 coping with some python 2 versus python 3 differences. 166 ''' 167 return PyObjectPtr.from_pyobject_ptr(self.field(name)) 168 169 def write_field_repr(self, name, out, visited): 170 ''' 171 Extract the PyObject* field named "name", and write its representation 172 to file-like object "out" 173 ''' 174 field_obj = self.pyop_field(name) 175 field_obj.write_repr(out, visited) 176 177 def get_truncated_repr(self, maxlen): 178 ''' 179 Get a repr-like string for the data, but truncate it at "maxlen" bytes 180 (ending the object graph traversal as soon as you do) 181 ''' 182 out = TruncatedStringIO(maxlen) 183 try: 184 self.write_repr(out, set()) 185 except StringTruncated: 186 # Truncation occurred: 187 return out.getvalue() + '...(truncated)' 188 189 # No truncation occurred: 190 return out.getvalue() 191 192 def type(self): 193 return PyTypeObjectPtr(self.field('ob_type')) 194 195 def is_null(self): 196 return 0 == long(self._gdbval) 197 198 def is_optimized_out(self): 199 ''' 200 Is the value of the underlying PyObject* visible to the debugger? 201 202 This can vary with the precise version of the compiler used to build 203 Python, and the precise version of gdb. 204 205 See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with 206 PyEval_EvalFrameEx's "f" 207 ''' 208 return self._gdbval.is_optimized_out 209 210 def safe_tp_name(self): 211 try: 212 return self.type().field('tp_name').string() 213 except NullPyObjectPtr: 214 # NULL tp_name? 215 return 'unknown' 216 except RuntimeError: 217 # Can't even read the object at all? 218 return 'unknown' 219 220 def proxyval(self, visited): 221 ''' 222 Scrape a value from the inferior process, and try to represent it 223 within the gdb process, whilst (hopefully) avoiding crashes when 224 the remote data is corrupt. 225 226 Derived classes will override this. 227 228 For example, a PyIntObject* with ob_ival 42 in the inferior process 229 should result in an int(42) in this process. 230 231 visited: a set of all gdb.Value pyobject pointers already visited 232 whilst generating this value (to guard against infinite recursion when 233 visiting object graphs with loops). Analogous to Py_ReprEnter and 234 Py_ReprLeave 235 ''' 236 237 class FakeRepr(object): 238 """ 239 Class representing a non-descript PyObject* value in the inferior 240 process for when we don't have a custom scraper, intended to have 241 a sane repr(). 242 """ 243 244 def __init__(self, tp_name, address): 245 self.tp_name = tp_name 246 self.address = address 247 248 def __repr__(self): 249 # For the NULL pointer, we have no way of knowing a type, so 250 # special-case it as per 251 # http://bugs.python.org/issue8032#msg100882 252 if self.address == 0: 253 return '0x0' 254 return '<%s at remote 0x%x>' % (self.tp_name, self.address) 255 256 return FakeRepr(self.safe_tp_name(), 257 long(self._gdbval)) 258 259 def write_repr(self, out, visited): 260 ''' 261 Write a string representation of the value scraped from the inferior 262 process to "out", a file-like object. 263 ''' 264 # Default implementation: generate a proxy value and write its repr 265 # However, this could involve a lot of work for complicated objects, 266 # so for derived classes we specialize this 267 return out.write(repr(self.proxyval(visited))) 268 269 @classmethod 270 def subclass_from_type(cls, t): 271 ''' 272 Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a 273 (PyTypeObject*), determine the corresponding subclass of PyObjectPtr 274 to use 275 276 Ideally, we would look up the symbols for the global types, but that 277 isn't working yet: 278 (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value 279 Traceback (most recent call last): 280 File "<string>", line 1, in <module> 281 NotImplementedError: Symbol type not yet supported in Python scripts. 282 Error while executing Python code. 283 284 For now, we use tp_flags, after doing some string comparisons on the 285 tp_name for some special-cases that don't seem to be visible through 286 flags 287 ''' 288 try: 289 tp_name = t.field('tp_name').string() 290 tp_flags = int(t.field('tp_flags')) 291 except RuntimeError: 292 # Handle any kind of error e.g. NULL ptrs by simply using the base 293 # class 294 return cls 295 296 #print 'tp_flags = 0x%08x' % tp_flags 297 #print 'tp_name = %r' % tp_name 298 299 name_map = {'bool': PyBoolObjectPtr, 300 'classobj': PyClassObjectPtr, 301 'instance': PyInstanceObjectPtr, 302 'NoneType': PyNoneStructPtr, 303 'frame': PyFrameObjectPtr, 304 'set' : PySetObjectPtr, 305 'frozenset' : PySetObjectPtr, 306 'builtin_function_or_method' : PyCFunctionObjectPtr, 307 } 308 if tp_name in name_map: 309 return name_map[tp_name] 310 311 if tp_flags & Py_TPFLAGS_HEAPTYPE: 312 return HeapTypeObjectPtr 313 314 if tp_flags & Py_TPFLAGS_INT_SUBCLASS: 315 return PyIntObjectPtr 316 if tp_flags & Py_TPFLAGS_LONG_SUBCLASS: 317 return PyLongObjectPtr 318 if tp_flags & Py_TPFLAGS_LIST_SUBCLASS: 319 return PyListObjectPtr 320 if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS: 321 return PyTupleObjectPtr 322 if tp_flags & Py_TPFLAGS_STRING_SUBCLASS: 323 return PyStringObjectPtr 324 if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS: 325 return PyUnicodeObjectPtr 326 if tp_flags & Py_TPFLAGS_DICT_SUBCLASS: 327 return PyDictObjectPtr 328 if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS: 329 return PyBaseExceptionObjectPtr 330 #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS: 331 # return PyTypeObjectPtr 332 333 # Use the base class: 334 return cls 335 336 @classmethod 337 def from_pyobject_ptr(cls, gdbval): 338 ''' 339 Try to locate the appropriate derived class dynamically, and cast 340 the pointer accordingly. 341 ''' 342 try: 343 p = PyObjectPtr(gdbval) 344 cls = cls.subclass_from_type(p.type()) 345 return cls(gdbval, cast_to=cls.get_gdb_type()) 346 except RuntimeError: 347 # Handle any kind of error e.g. NULL ptrs by simply using the base 348 # class 349 pass 350 return cls(gdbval) 351 352 @classmethod 353 def get_gdb_type(cls): 354 return gdb.lookup_type(cls._typename).pointer() 355 356 def as_address(self): 357 return long(self._gdbval) 358 359 360class ProxyAlreadyVisited(object): 361 ''' 362 Placeholder proxy to use when protecting against infinite recursion due to 363 loops in the object graph. 364 365 Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave 366 ''' 367 def __init__(self, rep): 368 self._rep = rep 369 370 def __repr__(self): 371 return self._rep 372 373 374def _write_instance_repr(out, visited, name, pyop_attrdict, address): 375 '''Shared code for use by old-style and new-style classes: 376 write a representation to file-like object "out"''' 377 out.write('<') 378 out.write(name) 379 380 # Write dictionary of instance attributes: 381 if isinstance(pyop_attrdict, PyDictObjectPtr): 382 out.write('(') 383 first = True 384 for pyop_arg, pyop_val in pyop_attrdict.iteritems(): 385 if not first: 386 out.write(', ') 387 first = False 388 out.write(pyop_arg.proxyval(visited)) 389 out.write('=') 390 pyop_val.write_repr(out, visited) 391 out.write(')') 392 out.write(' at remote 0x%x>' % address) 393 394 395class InstanceProxy(object): 396 397 def __init__(self, cl_name, attrdict, address): 398 self.cl_name = cl_name 399 self.attrdict = attrdict 400 self.address = address 401 402 def __repr__(self): 403 if isinstance(self.attrdict, dict): 404 kwargs = ', '.join(["%s=%r" % (arg, val) 405 for arg, val in self.attrdict.iteritems()]) 406 return '<%s(%s) at remote 0x%x>' % (self.cl_name, 407 kwargs, self.address) 408 else: 409 return '<%s at remote 0x%x>' % (self.cl_name, 410 self.address) 411 412def _PyObject_VAR_SIZE(typeobj, nitems): 413 return ( ( typeobj.field('tp_basicsize') + 414 nitems * typeobj.field('tp_itemsize') + 415 (SIZEOF_VOID_P - 1) 416 ) & ~(SIZEOF_VOID_P - 1) 417 ).cast(_type_size_t) 418 419class HeapTypeObjectPtr(PyObjectPtr): 420 _typename = 'PyObject' 421 422 def get_attr_dict(self): 423 ''' 424 Get the PyDictObject ptr representing the attribute dictionary 425 (or None if there's a problem) 426 ''' 427 try: 428 typeobj = self.type() 429 dictoffset = int_from_int(typeobj.field('tp_dictoffset')) 430 if dictoffset != 0: 431 if dictoffset < 0: 432 type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer() 433 tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size']) 434 if tsize < 0: 435 tsize = -tsize 436 size = _PyObject_VAR_SIZE(typeobj, tsize) 437 dictoffset += size 438 assert dictoffset > 0 439 assert dictoffset % SIZEOF_VOID_P == 0 440 441 dictptr = self._gdbval.cast(_type_char_ptr) + dictoffset 442 PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer() 443 dictptr = dictptr.cast(PyObjectPtrPtr) 444 return PyObjectPtr.from_pyobject_ptr(dictptr.dereference()) 445 except RuntimeError: 446 # Corrupt data somewhere; fail safe 447 pass 448 449 # Not found, or some kind of error: 450 return None 451 452 def proxyval(self, visited): 453 ''' 454 Support for new-style classes. 455 456 Currently we just locate the dictionary using a transliteration to 457 python of _PyObject_GetDictPtr, ignoring descriptors 458 ''' 459 # Guard against infinite loops: 460 if self.as_address() in visited: 461 return ProxyAlreadyVisited('<...>') 462 visited.add(self.as_address()) 463 464 pyop_attr_dict = self.get_attr_dict() 465 if pyop_attr_dict: 466 attr_dict = pyop_attr_dict.proxyval(visited) 467 else: 468 attr_dict = {} 469 tp_name = self.safe_tp_name() 470 471 # New-style class: 472 return InstanceProxy(tp_name, attr_dict, long(self._gdbval)) 473 474 def write_repr(self, out, visited): 475 # Guard against infinite loops: 476 if self.as_address() in visited: 477 out.write('<...>') 478 return 479 visited.add(self.as_address()) 480 481 pyop_attrdict = self.get_attr_dict() 482 _write_instance_repr(out, visited, 483 self.safe_tp_name(), pyop_attrdict, self.as_address()) 484 485class ProxyException(Exception): 486 def __init__(self, tp_name, args): 487 self.tp_name = tp_name 488 self.args = args 489 490 def __repr__(self): 491 return '%s%r' % (self.tp_name, self.args) 492 493class PyBaseExceptionObjectPtr(PyObjectPtr): 494 """ 495 Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception 496 within the process being debugged. 497 """ 498 _typename = 'PyBaseExceptionObject' 499 500 def proxyval(self, visited): 501 # Guard against infinite loops: 502 if self.as_address() in visited: 503 return ProxyAlreadyVisited('(...)') 504 visited.add(self.as_address()) 505 arg_proxy = self.pyop_field('args').proxyval(visited) 506 return ProxyException(self.safe_tp_name(), 507 arg_proxy) 508 509 def write_repr(self, out, visited): 510 # Guard against infinite loops: 511 if self.as_address() in visited: 512 out.write('(...)') 513 return 514 visited.add(self.as_address()) 515 516 out.write(self.safe_tp_name()) 517 self.write_field_repr('args', out, visited) 518 519class PyBoolObjectPtr(PyObjectPtr): 520 """ 521 Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two 522 <bool> instances (Py_True/Py_False) within the process being debugged. 523 """ 524 _typename = 'PyBoolObject' 525 526 def proxyval(self, visited): 527 if int_from_int(self.field('ob_ival')): 528 return True 529 else: 530 return False 531 532 533class PyClassObjectPtr(PyObjectPtr): 534 """ 535 Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj> 536 instance within the process being debugged. 537 """ 538 _typename = 'PyClassObject' 539 540 541class BuiltInFunctionProxy(object): 542 def __init__(self, ml_name): 543 self.ml_name = ml_name 544 545 def __repr__(self): 546 return "<built-in function %s>" % self.ml_name 547 548class BuiltInMethodProxy(object): 549 def __init__(self, ml_name, pyop_m_self): 550 self.ml_name = ml_name 551 self.pyop_m_self = pyop_m_self 552 553 def __repr__(self): 554 return ('<built-in method %s of %s object at remote 0x%x>' 555 % (self.ml_name, 556 self.pyop_m_self.safe_tp_name(), 557 self.pyop_m_self.as_address()) 558 ) 559 560class PyCFunctionObjectPtr(PyObjectPtr): 561 """ 562 Class wrapping a gdb.Value that's a PyCFunctionObject* 563 (see Include/methodobject.h and Objects/methodobject.c) 564 """ 565 _typename = 'PyCFunctionObject' 566 567 def proxyval(self, visited): 568 m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*) 569 ml_name = m_ml['ml_name'].string() 570 571 pyop_m_self = self.pyop_field('m_self') 572 if pyop_m_self.is_null(): 573 return BuiltInFunctionProxy(ml_name) 574 else: 575 return BuiltInMethodProxy(ml_name, pyop_m_self) 576 577 578class PyCodeObjectPtr(PyObjectPtr): 579 """ 580 Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance 581 within the process being debugged. 582 """ 583 _typename = 'PyCodeObject' 584 585 def addr2line(self, addrq): 586 ''' 587 Get the line number for a given bytecode offset 588 589 Analogous to PyCode_Addr2Line; translated from pseudocode in 590 Objects/lnotab_notes.txt 591 ''' 592 co_lnotab = self.pyop_field('co_lnotab').proxyval(set()) 593 594 # Initialize lineno to co_firstlineno as per PyCode_Addr2Line 595 # not 0, as lnotab_notes.txt has it: 596 lineno = int_from_int(self.field('co_firstlineno')) 597 598 addr = 0 599 for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]): 600 addr += ord(addr_incr) 601 if addr > addrq: 602 return lineno 603 lineno += ord(line_incr) 604 return lineno 605 606 607class PyDictObjectPtr(PyObjectPtr): 608 """ 609 Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance 610 within the process being debugged. 611 """ 612 _typename = 'PyDictObject' 613 614 def iteritems(self): 615 ''' 616 Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs, 617 analagous to dict.iteritems() 618 ''' 619 for i in safe_range(self.field('ma_mask') + 1): 620 ep = self.field('ma_table') + i 621 pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value']) 622 if not pyop_value.is_null(): 623 pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key']) 624 yield (pyop_key, pyop_value) 625 626 def proxyval(self, visited): 627 # Guard against infinite loops: 628 if self.as_address() in visited: 629 return ProxyAlreadyVisited('{...}') 630 visited.add(self.as_address()) 631 632 result = {} 633 for pyop_key, pyop_value in self.iteritems(): 634 proxy_key = pyop_key.proxyval(visited) 635 proxy_value = pyop_value.proxyval(visited) 636 result[proxy_key] = proxy_value 637 return result 638 639 def write_repr(self, out, visited): 640 # Guard against infinite loops: 641 if self.as_address() in visited: 642 out.write('{...}') 643 return 644 visited.add(self.as_address()) 645 646 out.write('{') 647 first = True 648 for pyop_key, pyop_value in self.iteritems(): 649 if not first: 650 out.write(', ') 651 first = False 652 pyop_key.write_repr(out, visited) 653 out.write(': ') 654 pyop_value.write_repr(out, visited) 655 out.write('}') 656 657class PyInstanceObjectPtr(PyObjectPtr): 658 _typename = 'PyInstanceObject' 659 660 def proxyval(self, visited): 661 # Guard against infinite loops: 662 if self.as_address() in visited: 663 return ProxyAlreadyVisited('<...>') 664 visited.add(self.as_address()) 665 666 # Get name of class: 667 in_class = self.pyop_field('in_class') 668 cl_name = in_class.pyop_field('cl_name').proxyval(visited) 669 670 # Get dictionary of instance attributes: 671 in_dict = self.pyop_field('in_dict').proxyval(visited) 672 673 # Old-style class: 674 return InstanceProxy(cl_name, in_dict, long(self._gdbval)) 675 676 def write_repr(self, out, visited): 677 # Guard against infinite loops: 678 if self.as_address() in visited: 679 out.write('<...>') 680 return 681 visited.add(self.as_address()) 682 683 # Old-style class: 684 685 # Get name of class: 686 in_class = self.pyop_field('in_class') 687 cl_name = in_class.pyop_field('cl_name').proxyval(visited) 688 689 # Get dictionary of instance attributes: 690 pyop_in_dict = self.pyop_field('in_dict') 691 692 _write_instance_repr(out, visited, 693 cl_name, pyop_in_dict, self.as_address()) 694 695class PyIntObjectPtr(PyObjectPtr): 696 _typename = 'PyIntObject' 697 698 def proxyval(self, visited): 699 result = int_from_int(self.field('ob_ival')) 700 return result 701 702class PyListObjectPtr(PyObjectPtr): 703 _typename = 'PyListObject' 704 705 def __getitem__(self, i): 706 # Get the gdb.Value for the (PyObject*) with the given index: 707 field_ob_item = self.field('ob_item') 708 return field_ob_item[i] 709 710 def proxyval(self, visited): 711 # Guard against infinite loops: 712 if self.as_address() in visited: 713 return ProxyAlreadyVisited('[...]') 714 visited.add(self.as_address()) 715 716 result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited) 717 for i in safe_range(int_from_int(self.field('ob_size')))] 718 return result 719 720 def write_repr(self, out, visited): 721 # Guard against infinite loops: 722 if self.as_address() in visited: 723 out.write('[...]') 724 return 725 visited.add(self.as_address()) 726 727 out.write('[') 728 for i in safe_range(int_from_int(self.field('ob_size'))): 729 if i > 0: 730 out.write(', ') 731 element = PyObjectPtr.from_pyobject_ptr(self[i]) 732 element.write_repr(out, visited) 733 out.write(']') 734 735class PyLongObjectPtr(PyObjectPtr): 736 _typename = 'PyLongObject' 737 738 def proxyval(self, visited): 739 ''' 740 Python's Include/longobjrep.h has this declaration: 741 struct _longobject { 742 PyObject_VAR_HEAD 743 digit ob_digit[1]; 744 }; 745 746 with this description: 747 The absolute value of a number is equal to 748 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i) 749 Negative numbers are represented with ob_size < 0; 750 zero is represented by ob_size == 0. 751 752 where SHIFT can be either: 753 #define PyLong_SHIFT 30 754 #define PyLong_SHIFT 15 755 ''' 756 ob_size = long(self.field('ob_size')) 757 if ob_size == 0: 758 return 0L 759 760 ob_digit = self.field('ob_digit') 761 762 if gdb.lookup_type('digit').sizeof == 2: 763 SHIFT = 15L 764 else: 765 SHIFT = 30L 766 767 digits = [long(ob_digit[i]) * 2**(SHIFT*i) 768 for i in safe_range(abs(ob_size))] 769 result = sum(digits) 770 if ob_size < 0: 771 result = -result 772 return result 773 774 775class PyNoneStructPtr(PyObjectPtr): 776 """ 777 Class wrapping a gdb.Value that's a PyObject* pointing to the 778 singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type 779 """ 780 _typename = 'PyObject' 781 782 def proxyval(self, visited): 783 return None 784 785 786class PyFrameObjectPtr(PyObjectPtr): 787 _typename = 'PyFrameObject' 788 789 def __init__(self, gdbval, cast_to): 790 PyObjectPtr.__init__(self, gdbval, cast_to) 791 792 if not self.is_optimized_out(): 793 self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code')) 794 self.co_name = self.co.pyop_field('co_name') 795 self.co_filename = self.co.pyop_field('co_filename') 796 797 self.f_lineno = int_from_int(self.field('f_lineno')) 798 self.f_lasti = int_from_int(self.field('f_lasti')) 799 self.co_nlocals = int_from_int(self.co.field('co_nlocals')) 800 self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames')) 801 802 def iter_locals(self): 803 ''' 804 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 805 the local variables of this frame 806 ''' 807 if self.is_optimized_out(): 808 return 809 810 f_localsplus = self.field('f_localsplus') 811 for i in safe_range(self.co_nlocals): 812 pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i]) 813 if not pyop_value.is_null(): 814 pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i]) 815 yield (pyop_name, pyop_value) 816 817 def iter_globals(self): 818 ''' 819 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 820 the global variables of this frame 821 ''' 822 if self.is_optimized_out(): 823 return 824 825 pyop_globals = self.pyop_field('f_globals') 826 return pyop_globals.iteritems() 827 828 def iter_builtins(self): 829 ''' 830 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 831 the builtin variables 832 ''' 833 if self.is_optimized_out(): 834 return 835 836 pyop_builtins = self.pyop_field('f_builtins') 837 return pyop_builtins.iteritems() 838 839 def get_var_by_name(self, name): 840 ''' 841 Look for the named local variable, returning a (PyObjectPtr, scope) pair 842 where scope is a string 'local', 'global', 'builtin' 843 844 If not found, return (None, None) 845 ''' 846 for pyop_name, pyop_value in self.iter_locals(): 847 if name == pyop_name.proxyval(set()): 848 return pyop_value, 'local' 849 for pyop_name, pyop_value in self.iter_globals(): 850 if name == pyop_name.proxyval(set()): 851 return pyop_value, 'global' 852 for pyop_name, pyop_value in self.iter_builtins(): 853 if name == pyop_name.proxyval(set()): 854 return pyop_value, 'builtin' 855 return None, None 856 857 def filename(self): 858 '''Get the path of the current Python source file, as a string''' 859 if self.is_optimized_out(): 860 return '(frame information optimized out)' 861 return self.co_filename.proxyval(set()) 862 863 def current_line_num(self): 864 '''Get current line number as an integer (1-based) 865 866 Translated from PyFrame_GetLineNumber and PyCode_Addr2Line 867 868 See Objects/lnotab_notes.txt 869 ''' 870 if self.is_optimized_out(): 871 return None 872 f_trace = self.field('f_trace') 873 if long(f_trace) != 0: 874 # we have a non-NULL f_trace: 875 return self.f_lineno 876 else: 877 #try: 878 return self.co.addr2line(self.f_lasti) 879 #except ValueError: 880 # return self.f_lineno 881 882 def current_line(self): 883 '''Get the text of the current source line as a string, with a trailing 884 newline character''' 885 if self.is_optimized_out(): 886 return '(frame information optimized out)' 887 with open(self.filename(), 'r') as f: 888 all_lines = f.readlines() 889 # Convert from 1-based current_line_num to 0-based list offset: 890 return all_lines[self.current_line_num()-1] 891 892 def write_repr(self, out, visited): 893 if self.is_optimized_out(): 894 out.write('(frame information optimized out)') 895 return 896 out.write('Frame 0x%x, for file %s, line %i, in %s (' 897 % (self.as_address(), 898 self.co_filename, 899 self.current_line_num(), 900 self.co_name)) 901 first = True 902 for pyop_name, pyop_value in self.iter_locals(): 903 if not first: 904 out.write(', ') 905 first = False 906 907 out.write(pyop_name.proxyval(visited)) 908 out.write('=') 909 pyop_value.write_repr(out, visited) 910 911 out.write(')') 912 913class PySetObjectPtr(PyObjectPtr): 914 _typename = 'PySetObject' 915 916 def proxyval(self, visited): 917 # Guard against infinite loops: 918 if self.as_address() in visited: 919 return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name()) 920 visited.add(self.as_address()) 921 922 members = [] 923 table = self.field('table') 924 for i in safe_range(self.field('mask')+1): 925 setentry = table[i] 926 key = setentry['key'] 927 if key != 0: 928 key_proxy = PyObjectPtr.from_pyobject_ptr(key).proxyval(visited) 929 if key_proxy != '<dummy key>': 930 members.append(key_proxy) 931 if self.safe_tp_name() == 'frozenset': 932 return frozenset(members) 933 else: 934 return set(members) 935 936 def write_repr(self, out, visited): 937 out.write(self.safe_tp_name()) 938 939 # Guard against infinite loops: 940 if self.as_address() in visited: 941 out.write('(...)') 942 return 943 visited.add(self.as_address()) 944 945 out.write('([') 946 first = True 947 table = self.field('table') 948 for i in safe_range(self.field('mask')+1): 949 setentry = table[i] 950 key = setentry['key'] 951 if key != 0: 952 pyop_key = PyObjectPtr.from_pyobject_ptr(key) 953 key_proxy = pyop_key.proxyval(visited) # FIXME! 954 if key_proxy != '<dummy key>': 955 if not first: 956 out.write(', ') 957 first = False 958 pyop_key.write_repr(out, visited) 959 out.write('])') 960 961 962class PyStringObjectPtr(PyObjectPtr): 963 _typename = 'PyStringObject' 964 965 def __str__(self): 966 field_ob_size = self.field('ob_size') 967 field_ob_sval = self.field('ob_sval') 968 char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr) 969 return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)]) 970 971 def proxyval(self, visited): 972 return str(self) 973 974class PyTupleObjectPtr(PyObjectPtr): 975 _typename = 'PyTupleObject' 976 977 def __getitem__(self, i): 978 # Get the gdb.Value for the (PyObject*) with the given index: 979 field_ob_item = self.field('ob_item') 980 return field_ob_item[i] 981 982 def proxyval(self, visited): 983 # Guard against infinite loops: 984 if self.as_address() in visited: 985 return ProxyAlreadyVisited('(...)') 986 visited.add(self.as_address()) 987 988 result = tuple([PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited) 989 for i in safe_range(int_from_int(self.field('ob_size')))]) 990 return result 991 992 def write_repr(self, out, visited): 993 # Guard against infinite loops: 994 if self.as_address() in visited: 995 out.write('(...)') 996 return 997 visited.add(self.as_address()) 998 999 out.write('(') 1000 for i in safe_range(int_from_int(self.field('ob_size'))): 1001 if i > 0: 1002 out.write(', ') 1003 element = PyObjectPtr.from_pyobject_ptr(self[i]) 1004 element.write_repr(out, visited) 1005 if self.field('ob_size') == 1: 1006 out.write(',)') 1007 else: 1008 out.write(')') 1009 1010class PyTypeObjectPtr(PyObjectPtr): 1011 _typename = 'PyTypeObject' 1012 1013 1014if sys.maxunicode >= 0x10000: 1015 _unichr = unichr 1016else: 1017 # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb 1018 def _unichr(x): 1019 if x < 0x10000: 1020 return unichr(x) 1021 x -= 0x10000 1022 ch1 = 0xD800 | (x >> 10) 1023 ch2 = 0xDC00 | (x & 0x3FF) 1024 return unichr(ch1) + unichr(ch2) 1025 1026class PyUnicodeObjectPtr(PyObjectPtr): 1027 _typename = 'PyUnicodeObject' 1028 1029 def char_width(self): 1030 _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE') 1031 return _type_Py_UNICODE.sizeof 1032 1033 def proxyval(self, visited): 1034 # From unicodeobject.h: 1035 # Py_ssize_t length; /* Length of raw Unicode data in buffer */ 1036 # Py_UNICODE *str; /* Raw Unicode buffer */ 1037 field_length = long(self.field('length')) 1038 field_str = self.field('str') 1039 1040 # Gather a list of ints from the Py_UNICODE array; these are either 1041 # UCS-2 or UCS-4 code points: 1042 if self.char_width() > 2: 1043 Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)] 1044 else: 1045 # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the 1046 # inferior process: we must join surrogate pairs. 1047 Py_UNICODEs = [] 1048 i = 0 1049 limit = safety_limit(field_length) 1050 while i < limit: 1051 ucs = int(field_str[i]) 1052 i += 1 1053 if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length: 1054 Py_UNICODEs.append(ucs) 1055 continue 1056 # This could be a surrogate pair. 1057 ucs2 = int(field_str[i]) 1058 if ucs2 < 0xDC00 or ucs2 > 0xDFFF: 1059 continue 1060 code = (ucs & 0x03FF) << 10 1061 code |= ucs2 & 0x03FF 1062 code += 0x00010000 1063 Py_UNICODEs.append(code) 1064 i += 1 1065 1066 # Convert the int code points to unicode characters, and generate a 1067 # local unicode instance. 1068 # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb). 1069 result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs]) 1070 return result 1071 1072 1073def int_from_int(gdbval): 1074 return int(str(gdbval)) 1075 1076 1077def stringify(val): 1078 # TODO: repr() puts everything on one line; pformat can be nicer, but 1079 # can lead to v.long results; this function isolates the choice 1080 if True: 1081 return repr(val) 1082 else: 1083 from pprint import pformat 1084 return pformat(val) 1085 1086 1087class PyObjectPtrPrinter: 1088 "Prints a (PyObject*)" 1089 1090 def __init__ (self, gdbval): 1091 self.gdbval = gdbval 1092 1093 def to_string (self): 1094 pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval) 1095 if True: 1096 return pyop.get_truncated_repr(MAX_OUTPUT_LEN) 1097 else: 1098 # Generate full proxy value then stringify it. 1099 # Doing so could be expensive 1100 proxyval = pyop.proxyval(set()) 1101 return stringify(proxyval) 1102 1103def pretty_printer_lookup(gdbval): 1104 type = gdbval.type.unqualified() 1105 if type.code == gdb.TYPE_CODE_PTR: 1106 type = type.target().unqualified() 1107 t = str(type) 1108 if t in ("PyObject", "PyFrameObject"): 1109 return PyObjectPtrPrinter(gdbval) 1110 1111""" 1112During development, I've been manually invoking the code in this way: 1113(gdb) python 1114 1115import sys 1116sys.path.append('/home/david/coding/python-gdb') 1117import libpython 1118end 1119 1120then reloading it after each edit like this: 1121(gdb) python reload(libpython) 1122 1123The following code should ensure that the prettyprinter is registered 1124if the code is autoloaded by gdb when visiting libpython.so, provided 1125that this python file is installed to the same path as the library (or its 1126.debug file) plus a "-gdb.py" suffix, e.g: 1127 /usr/lib/libpython2.6.so.1.0-gdb.py 1128 /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py 1129""" 1130def register (obj): 1131 if obj == None: 1132 obj = gdb 1133 1134 # Wire up the pretty-printer 1135 obj.pretty_printers.append(pretty_printer_lookup) 1136 1137register (gdb.current_objfile ()) 1138 1139 1140 1141# Unfortunately, the exact API exposed by the gdb module varies somewhat 1142# from build to build 1143# See http://bugs.python.org/issue8279?#msg102276 1144 1145class Frame(object): 1146 ''' 1147 Wrapper for gdb.Frame, adding various methods 1148 ''' 1149 def __init__(self, gdbframe): 1150 self._gdbframe = gdbframe 1151 1152 def older(self): 1153 older = self._gdbframe.older() 1154 if older: 1155 return Frame(older) 1156 else: 1157 return None 1158 1159 def newer(self): 1160 newer = self._gdbframe.newer() 1161 if newer: 1162 return Frame(newer) 1163 else: 1164 return None 1165 1166 def select(self): 1167 '''If supported, select this frame and return True; return False if unsupported 1168 1169 Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12 1170 onwards, but absent on Ubuntu buildbot''' 1171 if not hasattr(self._gdbframe, 'select'): 1172 print ('Unable to select frame: ' 1173 'this build of gdb does not expose a gdb.Frame.select method') 1174 return False 1175 self._gdbframe.select() 1176 return True 1177 1178 def get_index(self): 1179 '''Calculate index of frame, starting at 0 for the newest frame within 1180 this thread''' 1181 index = 0 1182 # Go down until you reach the newest frame: 1183 iter_frame = self 1184 while iter_frame.newer(): 1185 index += 1 1186 iter_frame = iter_frame.newer() 1187 return index 1188 1189 def is_evalframeex(self): 1190 '''Is this a PyEval_EvalFrameEx frame?''' 1191 if self._gdbframe.name() == 'PyEval_EvalFrameEx': 1192 ''' 1193 I believe we also need to filter on the inline 1194 struct frame_id.inline_depth, only regarding frames with 1195 an inline depth of 0 as actually being this function 1196 1197 So we reject those with type gdb.INLINE_FRAME 1198 ''' 1199 if self._gdbframe.type() == gdb.NORMAL_FRAME: 1200 # We have a PyEval_EvalFrameEx frame: 1201 return True 1202 1203 return False 1204 1205 def get_pyop(self): 1206 try: 1207 f = self._gdbframe.read_var('f') 1208 return PyFrameObjectPtr.from_pyobject_ptr(f) 1209 except ValueError: 1210 return None 1211 1212 @classmethod 1213 def get_selected_frame(cls): 1214 _gdbframe = gdb.selected_frame() 1215 if _gdbframe: 1216 return Frame(_gdbframe) 1217 return None 1218 1219 @classmethod 1220 def get_selected_python_frame(cls): 1221 '''Try to obtain the Frame for the python code in the selected frame, 1222 or None''' 1223 frame = cls.get_selected_frame() 1224 1225 while frame: 1226 if frame.is_evalframeex(): 1227 return frame 1228 frame = frame.older() 1229 1230 # Not found: 1231 return None 1232 1233 def print_summary(self): 1234 if self.is_evalframeex(): 1235 pyop = self.get_pyop() 1236 if pyop: 1237 sys.stdout.write('#%i %s\n' % (self.get_index(), pyop.get_truncated_repr(MAX_OUTPUT_LEN))) 1238 sys.stdout.write(pyop.current_line()) 1239 else: 1240 sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index()) 1241 else: 1242 sys.stdout.write('#%i\n' % self.get_index()) 1243 1244class PyList(gdb.Command): 1245 '''List the current Python source code, if any 1246 1247 Use 1248 py-list START 1249 to list at a different line number within the python source. 1250 1251 Use 1252 py-list START, END 1253 to list a specific range of lines within the python source. 1254 ''' 1255 1256 def __init__(self): 1257 gdb.Command.__init__ (self, 1258 "py-list", 1259 gdb.COMMAND_FILES, 1260 gdb.COMPLETE_NONE) 1261 1262 1263 def invoke(self, args, from_tty): 1264 import re 1265 1266 start = None 1267 end = None 1268 1269 m = re.match(r'\s*(\d+)\s*', args) 1270 if m: 1271 start = int(m.group(0)) 1272 end = start + 10 1273 1274 m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args) 1275 if m: 1276 start, end = map(int, m.groups()) 1277 1278 frame = Frame.get_selected_python_frame() 1279 if not frame: 1280 print 'Unable to locate python frame' 1281 return 1282 1283 pyop = frame.get_pyop() 1284 if not pyop: 1285 print 'Unable to read information on python frame' 1286 return 1287 1288 filename = pyop.filename() 1289 lineno = pyop.current_line_num() 1290 1291 if start is None: 1292 start = lineno - 5 1293 end = lineno + 5 1294 1295 if start<1: 1296 start = 1 1297 1298 with open(filename, 'r') as f: 1299 all_lines = f.readlines() 1300 # start and end are 1-based, all_lines is 0-based; 1301 # so [start-1:end] as a python slice gives us [start, end] as a 1302 # closed interval 1303 for i, line in enumerate(all_lines[start-1:end]): 1304 linestr = str(i+start) 1305 # Highlight current line: 1306 if i + start == lineno: 1307 linestr = '>' + linestr 1308 sys.stdout.write('%4s %s' % (linestr, line)) 1309 1310 1311# ...and register the command: 1312PyList() 1313 1314def move_in_stack(move_up): 1315 '''Move up or down the stack (for the py-up/py-down command)''' 1316 frame = Frame.get_selected_python_frame() 1317 while frame: 1318 if move_up: 1319 iter_frame = frame.older() 1320 else: 1321 iter_frame = frame.newer() 1322 1323 if not iter_frame: 1324 break 1325 1326 if iter_frame.is_evalframeex(): 1327 # Result: 1328 if iter_frame.select(): 1329 iter_frame.print_summary() 1330 return 1331 1332 frame = iter_frame 1333 1334 if move_up: 1335 print 'Unable to find an older python frame' 1336 else: 1337 print 'Unable to find a newer python frame' 1338 1339class PyUp(gdb.Command): 1340 'Select and print the python stack frame that called this one (if any)' 1341 def __init__(self): 1342 gdb.Command.__init__ (self, 1343 "py-up", 1344 gdb.COMMAND_STACK, 1345 gdb.COMPLETE_NONE) 1346 1347 1348 def invoke(self, args, from_tty): 1349 move_in_stack(move_up=True) 1350 1351class PyDown(gdb.Command): 1352 'Select and print the python stack frame called by this one (if any)' 1353 def __init__(self): 1354 gdb.Command.__init__ (self, 1355 "py-down", 1356 gdb.COMMAND_STACK, 1357 gdb.COMPLETE_NONE) 1358 1359 1360 def invoke(self, args, from_tty): 1361 move_in_stack(move_up=False) 1362 1363# Not all builds of gdb have gdb.Frame.select 1364if hasattr(gdb.Frame, 'select'): 1365 PyUp() 1366 PyDown() 1367 1368class PyBacktrace(gdb.Command): 1369 'Display the current python frame and all the frames within its call stack (if any)' 1370 def __init__(self): 1371 gdb.Command.__init__ (self, 1372 "py-bt", 1373 gdb.COMMAND_STACK, 1374 gdb.COMPLETE_NONE) 1375 1376 1377 def invoke(self, args, from_tty): 1378 frame = Frame.get_selected_python_frame() 1379 while frame: 1380 if frame.is_evalframeex(): 1381 frame.print_summary() 1382 frame = frame.older() 1383 1384PyBacktrace() 1385 1386class PyPrint(gdb.Command): 1387 'Look up the given python variable name, and print it' 1388 def __init__(self): 1389 gdb.Command.__init__ (self, 1390 "py-print", 1391 gdb.COMMAND_DATA, 1392 gdb.COMPLETE_NONE) 1393 1394 1395 def invoke(self, args, from_tty): 1396 name = str(args) 1397 1398 frame = Frame.get_selected_python_frame() 1399 if not frame: 1400 print 'Unable to locate python frame' 1401 return 1402 1403 pyop_frame = frame.get_pyop() 1404 if not pyop_frame: 1405 print 'Unable to read information on python frame' 1406 return 1407 1408 pyop_var, scope = pyop_frame.get_var_by_name(name) 1409 1410 if pyop_var: 1411 print ('%s %r = %s' 1412 % (scope, 1413 name, 1414 pyop_var.get_truncated_repr(MAX_OUTPUT_LEN))) 1415 else: 1416 print '%r not found' % name 1417 1418PyPrint() 1419 1420class PyLocals(gdb.Command): 1421 'Look up the given python variable name, and print it' 1422 def __init__(self): 1423 gdb.Command.__init__ (self, 1424 "py-locals", 1425 gdb.COMMAND_DATA, 1426 gdb.COMPLETE_NONE) 1427 1428 1429 def invoke(self, args, from_tty): 1430 name = str(args) 1431 1432 frame = Frame.get_selected_python_frame() 1433 if not frame: 1434 print 'Unable to locate python frame' 1435 return 1436 1437 pyop_frame = frame.get_pyop() 1438 if not pyop_frame: 1439 print 'Unable to read information on python frame' 1440 return 1441 1442 for pyop_name, pyop_value in pyop_frame.iter_locals(): 1443 print ('%s = %s' 1444 % (pyop_name.proxyval(set()), 1445 pyop_value.get_truncated_repr(MAX_OUTPUT_LEN))) 1446 1447PyLocals() 1448