1#!/usr/bin/python
2'''
3From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb
4to be extended with Python code e.g. for library-specific data visualizations,
5such as for the C++ STL types.  Documentation on this API can be seen at:
6http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html
7
8
9This python module deals with the case when the process being debugged (the
10"inferior process" in gdb parlance) is itself python, or more specifically,
11linked against libpython.  In this situation, almost every item of data is a
12(PyObject*), and having the debugger merely print their addresses is not very
13enlightening.
14
15This module embeds knowledge about the implementation details of libpython so
16that we can emit useful visualizations e.g. a string, a list, a dict, a frame
17giving file/line information and the state of local variables
18
19In particular, given a gdb.Value corresponding to a PyObject* in the inferior
20process, we can generate a "proxy value" within the gdb process.  For example,
21given a PyObject* in the inferior process that is in fact a PyListObject*
22holding three PyObject* that turn out to be PyStringObject* instances, we can
23generate a proxy value within the gdb process that is a list of strings:
24  ["foo", "bar", "baz"]
25
26Doing so can be expensive for complicated graphs of objects, and could take
27some time, so we also have a "write_repr" method that writes a representation
28of the data to a file-like object.  This allows us to stop the traversal by
29having the file-like object raise an exception if it gets too much data.
30
31With both "proxyval" and "write_repr" we keep track of the set of all addresses
32visited so far in the traversal, to avoid infinite recursion due to cycles in
33the graph of object references.
34
35We try to defer gdb.lookup_type() invocations for python types until as late as
36possible: for a dynamically linked python binary, when the process starts in
37the debugger, the libpython.so hasn't been dynamically loaded yet, so none of
38the type names are known to the debugger
39
40The module also extends gdb with some python-specific commands.
41'''
42from __future__ import with_statement
43import gdb
44import sys
45
46# Look up the gdb.Type for some standard types:
47_type_char_ptr = gdb.lookup_type('char').pointer() # char*
48_type_unsigned_char_ptr = gdb.lookup_type('unsigned char').pointer() # unsigned char*
49_type_void_ptr = gdb.lookup_type('void').pointer() # void*
50_type_size_t = gdb.lookup_type('size_t')
51
52SIZEOF_VOID_P = _type_void_ptr.sizeof
53
54
55Py_TPFLAGS_HEAPTYPE = (1L << 9)
56
57Py_TPFLAGS_INT_SUBCLASS      = (1L << 23)
58Py_TPFLAGS_LONG_SUBCLASS     = (1L << 24)
59Py_TPFLAGS_LIST_SUBCLASS     = (1L << 25)
60Py_TPFLAGS_TUPLE_SUBCLASS    = (1L << 26)
61Py_TPFLAGS_STRING_SUBCLASS   = (1L << 27)
62Py_TPFLAGS_UNICODE_SUBCLASS  = (1L << 28)
63Py_TPFLAGS_DICT_SUBCLASS     = (1L << 29)
64Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30)
65Py_TPFLAGS_TYPE_SUBCLASS     = (1L << 31)
66
67
68MAX_OUTPUT_LEN=1024
69
70class NullPyObjectPtr(RuntimeError):
71    pass
72
73
74def safety_limit(val):
75    # Given a integer value from the process being debugged, limit it to some
76    # safety threshold so that arbitrary breakage within said process doesn't
77    # break the gdb process too much (e.g. sizes of iterations, sizes of lists)
78    return min(val, 1000)
79
80
81def safe_range(val):
82    # As per range, but don't trust the value too much: cap it to a safety
83    # threshold in case the data was corrupted
84    return xrange(safety_limit(val))
85
86
87class StringTruncated(RuntimeError):
88    pass
89
90class TruncatedStringIO(object):
91    '''Similar to cStringIO, but can truncate the output by raising a
92    StringTruncated exception'''
93    def __init__(self, maxlen=None):
94        self._val = ''
95        self.maxlen = maxlen
96
97    def write(self, data):
98        if self.maxlen:
99            if len(data) + len(self._val) > self.maxlen:
100                # Truncation:
101                self._val += data[0:self.maxlen - len(self._val)]
102                raise StringTruncated()
103
104        self._val += data
105
106    def getvalue(self):
107        return self._val
108
109class PyObjectPtr(object):
110    """
111    Class wrapping a gdb.Value that's a either a (PyObject*) within the
112    inferior process, or some subclass pointer e.g. (PyStringObject*)
113
114    There will be a subclass for every refined PyObject type that we care
115    about.
116
117    Note that at every stage the underlying pointer could be NULL, point
118    to corrupt data, etc; this is the debugger, after all.
119    """
120    _typename = 'PyObject'
121
122    def __init__(self, gdbval, cast_to=None):
123        if cast_to:
124            self._gdbval = gdbval.cast(cast_to)
125        else:
126            self._gdbval = gdbval
127
128    def field(self, name):
129        '''
130        Get the gdb.Value for the given field within the PyObject, coping with
131        some python 2 versus python 3 differences.
132
133        Various libpython types are defined using the "PyObject_HEAD" and
134        "PyObject_VAR_HEAD" macros.
135
136        In Python 2, this these are defined so that "ob_type" and (for a var
137        object) "ob_size" are fields of the type in question.
138
139        In Python 3, this is defined as an embedded PyVarObject type thus:
140           PyVarObject ob_base;
141        so that the "ob_size" field is located insize the "ob_base" field, and
142        the "ob_type" is most easily accessed by casting back to a (PyObject*).
143        '''
144        if self.is_null():
145            raise NullPyObjectPtr(self)
146
147        if name == 'ob_type':
148            pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type())
149            return pyo_ptr.dereference()[name]
150
151        if name == 'ob_size':
152            try:
153            # Python 2:
154                return self._gdbval.dereference()[name]
155            except RuntimeError:
156                # Python 3:
157                return self._gdbval.dereference()['ob_base'][name]
158
159        # General case: look it up inside the object:
160        return self._gdbval.dereference()[name]
161
162    def pyop_field(self, name):
163        '''
164        Get a PyObjectPtr for the given PyObject* field within this PyObject,
165        coping with some python 2 versus python 3 differences.
166        '''
167        return PyObjectPtr.from_pyobject_ptr(self.field(name))
168
169    def write_field_repr(self, name, out, visited):
170        '''
171        Extract the PyObject* field named "name", and write its representation
172        to file-like object "out"
173        '''
174        field_obj = self.pyop_field(name)
175        field_obj.write_repr(out, visited)
176
177    def get_truncated_repr(self, maxlen):
178        '''
179        Get a repr-like string for the data, but truncate it at "maxlen" bytes
180        (ending the object graph traversal as soon as you do)
181        '''
182        out = TruncatedStringIO(maxlen)
183        try:
184            self.write_repr(out, set())
185        except StringTruncated:
186            # Truncation occurred:
187            return out.getvalue() + '...(truncated)'
188
189        # No truncation occurred:
190        return out.getvalue()
191
192    def type(self):
193        return PyTypeObjectPtr(self.field('ob_type'))
194
195    def is_null(self):
196        return 0 == long(self._gdbval)
197
198    def is_optimized_out(self):
199        '''
200        Is the value of the underlying PyObject* visible to the debugger?
201
202        This can vary with the precise version of the compiler used to build
203        Python, and the precise version of gdb.
204
205        See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with
206        PyEval_EvalFrameEx's "f"
207        '''
208        return self._gdbval.is_optimized_out
209
210    def safe_tp_name(self):
211        try:
212            return self.type().field('tp_name').string()
213        except NullPyObjectPtr:
214            # NULL tp_name?
215            return 'unknown'
216        except RuntimeError:
217            # Can't even read the object at all?
218            return 'unknown'
219
220    def proxyval(self, visited):
221        '''
222        Scrape a value from the inferior process, and try to represent it
223        within the gdb process, whilst (hopefully) avoiding crashes when
224        the remote data is corrupt.
225
226        Derived classes will override this.
227
228        For example, a PyIntObject* with ob_ival 42 in the inferior process
229        should result in an int(42) in this process.
230
231        visited: a set of all gdb.Value pyobject pointers already visited
232        whilst generating this value (to guard against infinite recursion when
233        visiting object graphs with loops).  Analogous to Py_ReprEnter and
234        Py_ReprLeave
235        '''
236
237        class FakeRepr(object):
238            """
239            Class representing a non-descript PyObject* value in the inferior
240            process for when we don't have a custom scraper, intended to have
241            a sane repr().
242            """
243
244            def __init__(self, tp_name, address):
245                self.tp_name = tp_name
246                self.address = address
247
248            def __repr__(self):
249                # For the NULL pointer, we have no way of knowing a type, so
250                # special-case it as per
251                # http://bugs.python.org/issue8032#msg100882
252                if self.address == 0:
253                    return '0x0'
254                return '<%s at remote 0x%x>' % (self.tp_name, self.address)
255
256        return FakeRepr(self.safe_tp_name(),
257                        long(self._gdbval))
258
259    def write_repr(self, out, visited):
260        '''
261        Write a string representation of the value scraped from the inferior
262        process to "out", a file-like object.
263        '''
264        # Default implementation: generate a proxy value and write its repr
265        # However, this could involve a lot of work for complicated objects,
266        # so for derived classes we specialize this
267        return out.write(repr(self.proxyval(visited)))
268
269    @classmethod
270    def subclass_from_type(cls, t):
271        '''
272        Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a
273        (PyTypeObject*), determine the corresponding subclass of PyObjectPtr
274        to use
275
276        Ideally, we would look up the symbols for the global types, but that
277        isn't working yet:
278          (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value
279          Traceback (most recent call last):
280            File "<string>", line 1, in <module>
281          NotImplementedError: Symbol type not yet supported in Python scripts.
282          Error while executing Python code.
283
284        For now, we use tp_flags, after doing some string comparisons on the
285        tp_name for some special-cases that don't seem to be visible through
286        flags
287        '''
288        try:
289            tp_name = t.field('tp_name').string()
290            tp_flags = int(t.field('tp_flags'))
291        except RuntimeError:
292            # Handle any kind of error e.g. NULL ptrs by simply using the base
293            # class
294            return cls
295
296        #print 'tp_flags = 0x%08x' % tp_flags
297        #print 'tp_name = %r' % tp_name
298
299        name_map = {'bool': PyBoolObjectPtr,
300                    'classobj': PyClassObjectPtr,
301                    'instance': PyInstanceObjectPtr,
302                    'NoneType': PyNoneStructPtr,
303                    'frame': PyFrameObjectPtr,
304                    'set' : PySetObjectPtr,
305                    'frozenset' : PySetObjectPtr,
306                    'builtin_function_or_method' : PyCFunctionObjectPtr,
307                    }
308        if tp_name in name_map:
309            return name_map[tp_name]
310
311        if tp_flags & Py_TPFLAGS_HEAPTYPE:
312            return HeapTypeObjectPtr
313
314        if tp_flags & Py_TPFLAGS_INT_SUBCLASS:
315            return PyIntObjectPtr
316        if tp_flags & Py_TPFLAGS_LONG_SUBCLASS:
317            return PyLongObjectPtr
318        if tp_flags & Py_TPFLAGS_LIST_SUBCLASS:
319            return PyListObjectPtr
320        if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS:
321            return PyTupleObjectPtr
322        if tp_flags & Py_TPFLAGS_STRING_SUBCLASS:
323            return PyStringObjectPtr
324        if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS:
325            return PyUnicodeObjectPtr
326        if tp_flags & Py_TPFLAGS_DICT_SUBCLASS:
327            return PyDictObjectPtr
328        if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS:
329            return PyBaseExceptionObjectPtr
330        #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS:
331        #    return PyTypeObjectPtr
332
333        # Use the base class:
334        return cls
335
336    @classmethod
337    def from_pyobject_ptr(cls, gdbval):
338        '''
339        Try to locate the appropriate derived class dynamically, and cast
340        the pointer accordingly.
341        '''
342        try:
343            p = PyObjectPtr(gdbval)
344            cls = cls.subclass_from_type(p.type())
345            return cls(gdbval, cast_to=cls.get_gdb_type())
346        except RuntimeError:
347            # Handle any kind of error e.g. NULL ptrs by simply using the base
348            # class
349            pass
350        return cls(gdbval)
351
352    @classmethod
353    def get_gdb_type(cls):
354        return gdb.lookup_type(cls._typename).pointer()
355
356    def as_address(self):
357        return long(self._gdbval)
358
359
360class ProxyAlreadyVisited(object):
361    '''
362    Placeholder proxy to use when protecting against infinite recursion due to
363    loops in the object graph.
364
365    Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave
366    '''
367    def __init__(self, rep):
368        self._rep = rep
369
370    def __repr__(self):
371        return self._rep
372
373
374def _write_instance_repr(out, visited, name, pyop_attrdict, address):
375    '''Shared code for use by old-style and new-style classes:
376    write a representation to file-like object "out"'''
377    out.write('<')
378    out.write(name)
379
380    # Write dictionary of instance attributes:
381    if isinstance(pyop_attrdict, PyDictObjectPtr):
382        out.write('(')
383        first = True
384        for pyop_arg, pyop_val in pyop_attrdict.iteritems():
385            if not first:
386                out.write(', ')
387            first = False
388            out.write(pyop_arg.proxyval(visited))
389            out.write('=')
390            pyop_val.write_repr(out, visited)
391        out.write(')')
392    out.write(' at remote 0x%x>' % address)
393
394
395class InstanceProxy(object):
396
397    def __init__(self, cl_name, attrdict, address):
398        self.cl_name = cl_name
399        self.attrdict = attrdict
400        self.address = address
401
402    def __repr__(self):
403        if isinstance(self.attrdict, dict):
404            kwargs = ', '.join(["%s=%r" % (arg, val)
405                                for arg, val in self.attrdict.iteritems()])
406            return '<%s(%s) at remote 0x%x>' % (self.cl_name,
407                                                kwargs, self.address)
408        else:
409            return '<%s at remote 0x%x>' % (self.cl_name,
410                                            self.address)
411
412def _PyObject_VAR_SIZE(typeobj, nitems):
413    return ( ( typeobj.field('tp_basicsize') +
414               nitems * typeobj.field('tp_itemsize') +
415               (SIZEOF_VOID_P - 1)
416             ) & ~(SIZEOF_VOID_P - 1)
417           ).cast(_type_size_t)
418
419class HeapTypeObjectPtr(PyObjectPtr):
420    _typename = 'PyObject'
421
422    def get_attr_dict(self):
423        '''
424        Get the PyDictObject ptr representing the attribute dictionary
425        (or None if there's a problem)
426        '''
427        try:
428            typeobj = self.type()
429            dictoffset = int_from_int(typeobj.field('tp_dictoffset'))
430            if dictoffset != 0:
431                if dictoffset < 0:
432                    type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer()
433                    tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size'])
434                    if tsize < 0:
435                        tsize = -tsize
436                    size = _PyObject_VAR_SIZE(typeobj, tsize)
437                    dictoffset += size
438                    assert dictoffset > 0
439                    assert dictoffset % SIZEOF_VOID_P == 0
440
441                dictptr = self._gdbval.cast(_type_char_ptr) + dictoffset
442                PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer()
443                dictptr = dictptr.cast(PyObjectPtrPtr)
444                return PyObjectPtr.from_pyobject_ptr(dictptr.dereference())
445        except RuntimeError:
446            # Corrupt data somewhere; fail safe
447            pass
448
449        # Not found, or some kind of error:
450        return None
451
452    def proxyval(self, visited):
453        '''
454        Support for new-style classes.
455
456        Currently we just locate the dictionary using a transliteration to
457        python of _PyObject_GetDictPtr, ignoring descriptors
458        '''
459        # Guard against infinite loops:
460        if self.as_address() in visited:
461            return ProxyAlreadyVisited('<...>')
462        visited.add(self.as_address())
463
464        pyop_attr_dict = self.get_attr_dict()
465        if pyop_attr_dict:
466            attr_dict = pyop_attr_dict.proxyval(visited)
467        else:
468            attr_dict = {}
469        tp_name = self.safe_tp_name()
470
471        # New-style class:
472        return InstanceProxy(tp_name, attr_dict, long(self._gdbval))
473
474    def write_repr(self, out, visited):
475        # Guard against infinite loops:
476        if self.as_address() in visited:
477            out.write('<...>')
478            return
479        visited.add(self.as_address())
480
481        pyop_attrdict = self.get_attr_dict()
482        _write_instance_repr(out, visited,
483                             self.safe_tp_name(), pyop_attrdict, self.as_address())
484
485class ProxyException(Exception):
486    def __init__(self, tp_name, args):
487        self.tp_name = tp_name
488        self.args = args
489
490    def __repr__(self):
491        return '%s%r' % (self.tp_name, self.args)
492
493class PyBaseExceptionObjectPtr(PyObjectPtr):
494    """
495    Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception
496    within the process being debugged.
497    """
498    _typename = 'PyBaseExceptionObject'
499
500    def proxyval(self, visited):
501        # Guard against infinite loops:
502        if self.as_address() in visited:
503            return ProxyAlreadyVisited('(...)')
504        visited.add(self.as_address())
505        arg_proxy = self.pyop_field('args').proxyval(visited)
506        return ProxyException(self.safe_tp_name(),
507                              arg_proxy)
508
509    def write_repr(self, out, visited):
510        # Guard against infinite loops:
511        if self.as_address() in visited:
512            out.write('(...)')
513            return
514        visited.add(self.as_address())
515
516        out.write(self.safe_tp_name())
517        self.write_field_repr('args', out, visited)
518
519class PyBoolObjectPtr(PyObjectPtr):
520    """
521    Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
522    <bool> instances (Py_True/Py_False) within the process being debugged.
523    """
524    _typename = 'PyBoolObject'
525
526    def proxyval(self, visited):
527        if int_from_int(self.field('ob_ival')):
528            return True
529        else:
530            return False
531
532
533class PyClassObjectPtr(PyObjectPtr):
534    """
535    Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj>
536    instance within the process being debugged.
537    """
538    _typename = 'PyClassObject'
539
540
541class BuiltInFunctionProxy(object):
542    def __init__(self, ml_name):
543        self.ml_name = ml_name
544
545    def __repr__(self):
546        return "<built-in function %s>" % self.ml_name
547
548class BuiltInMethodProxy(object):
549    def __init__(self, ml_name, pyop_m_self):
550        self.ml_name = ml_name
551        self.pyop_m_self = pyop_m_self
552
553    def __repr__(self):
554        return ('<built-in method %s of %s object at remote 0x%x>'
555                % (self.ml_name,
556                   self.pyop_m_self.safe_tp_name(),
557                   self.pyop_m_self.as_address())
558                )
559
560class PyCFunctionObjectPtr(PyObjectPtr):
561    """
562    Class wrapping a gdb.Value that's a PyCFunctionObject*
563    (see Include/methodobject.h and Objects/methodobject.c)
564    """
565    _typename = 'PyCFunctionObject'
566
567    def proxyval(self, visited):
568        m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*)
569        ml_name = m_ml['ml_name'].string()
570
571        pyop_m_self = self.pyop_field('m_self')
572        if pyop_m_self.is_null():
573            return BuiltInFunctionProxy(ml_name)
574        else:
575            return BuiltInMethodProxy(ml_name, pyop_m_self)
576
577
578class PyCodeObjectPtr(PyObjectPtr):
579    """
580    Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance
581    within the process being debugged.
582    """
583    _typename = 'PyCodeObject'
584
585    def addr2line(self, addrq):
586        '''
587        Get the line number for a given bytecode offset
588
589        Analogous to PyCode_Addr2Line; translated from pseudocode in
590        Objects/lnotab_notes.txt
591        '''
592        co_lnotab = self.pyop_field('co_lnotab').proxyval(set())
593
594        # Initialize lineno to co_firstlineno as per PyCode_Addr2Line
595        # not 0, as lnotab_notes.txt has it:
596        lineno = int_from_int(self.field('co_firstlineno'))
597
598        addr = 0
599        for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]):
600            addr += ord(addr_incr)
601            if addr > addrq:
602                return lineno
603            lineno += ord(line_incr)
604        return lineno
605
606
607class PyDictObjectPtr(PyObjectPtr):
608    """
609    Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance
610    within the process being debugged.
611    """
612    _typename = 'PyDictObject'
613
614    def iteritems(self):
615        '''
616        Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs,
617        analagous to dict.iteritems()
618        '''
619        for i in safe_range(self.field('ma_mask') + 1):
620            ep = self.field('ma_table') + i
621            pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
622            if not pyop_value.is_null():
623                pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
624                yield (pyop_key, pyop_value)
625
626    def proxyval(self, visited):
627        # Guard against infinite loops:
628        if self.as_address() in visited:
629            return ProxyAlreadyVisited('{...}')
630        visited.add(self.as_address())
631
632        result = {}
633        for pyop_key, pyop_value in self.iteritems():
634            proxy_key = pyop_key.proxyval(visited)
635            proxy_value = pyop_value.proxyval(visited)
636            result[proxy_key] = proxy_value
637        return result
638
639    def write_repr(self, out, visited):
640        # Guard against infinite loops:
641        if self.as_address() in visited:
642            out.write('{...}')
643            return
644        visited.add(self.as_address())
645
646        out.write('{')
647        first = True
648        for pyop_key, pyop_value in self.iteritems():
649            if not first:
650                out.write(', ')
651            first = False
652            pyop_key.write_repr(out, visited)
653            out.write(': ')
654            pyop_value.write_repr(out, visited)
655        out.write('}')
656
657class PyInstanceObjectPtr(PyObjectPtr):
658    _typename = 'PyInstanceObject'
659
660    def proxyval(self, visited):
661        # Guard against infinite loops:
662        if self.as_address() in visited:
663            return ProxyAlreadyVisited('<...>')
664        visited.add(self.as_address())
665
666        # Get name of class:
667        in_class = self.pyop_field('in_class')
668        cl_name = in_class.pyop_field('cl_name').proxyval(visited)
669
670        # Get dictionary of instance attributes:
671        in_dict = self.pyop_field('in_dict').proxyval(visited)
672
673        # Old-style class:
674        return InstanceProxy(cl_name, in_dict, long(self._gdbval))
675
676    def write_repr(self, out, visited):
677        # Guard against infinite loops:
678        if self.as_address() in visited:
679            out.write('<...>')
680            return
681        visited.add(self.as_address())
682
683        # Old-style class:
684
685        # Get name of class:
686        in_class = self.pyop_field('in_class')
687        cl_name = in_class.pyop_field('cl_name').proxyval(visited)
688
689        # Get dictionary of instance attributes:
690        pyop_in_dict = self.pyop_field('in_dict')
691
692        _write_instance_repr(out, visited,
693                             cl_name, pyop_in_dict, self.as_address())
694
695class PyIntObjectPtr(PyObjectPtr):
696    _typename = 'PyIntObject'
697
698    def proxyval(self, visited):
699        result = int_from_int(self.field('ob_ival'))
700        return result
701
702class PyListObjectPtr(PyObjectPtr):
703    _typename = 'PyListObject'
704
705    def __getitem__(self, i):
706        # Get the gdb.Value for the (PyObject*) with the given index:
707        field_ob_item = self.field('ob_item')
708        return field_ob_item[i]
709
710    def proxyval(self, visited):
711        # Guard against infinite loops:
712        if self.as_address() in visited:
713            return ProxyAlreadyVisited('[...]')
714        visited.add(self.as_address())
715
716        result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
717                  for i in safe_range(int_from_int(self.field('ob_size')))]
718        return result
719
720    def write_repr(self, out, visited):
721        # Guard against infinite loops:
722        if self.as_address() in visited:
723            out.write('[...]')
724            return
725        visited.add(self.as_address())
726
727        out.write('[')
728        for i in safe_range(int_from_int(self.field('ob_size'))):
729            if i > 0:
730                out.write(', ')
731            element = PyObjectPtr.from_pyobject_ptr(self[i])
732            element.write_repr(out, visited)
733        out.write(']')
734
735class PyLongObjectPtr(PyObjectPtr):
736    _typename = 'PyLongObject'
737
738    def proxyval(self, visited):
739        '''
740        Python's Include/longobjrep.h has this declaration:
741           struct _longobject {
742               PyObject_VAR_HEAD
743               digit ob_digit[1];
744           };
745
746        with this description:
747            The absolute value of a number is equal to
748                 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
749            Negative numbers are represented with ob_size < 0;
750            zero is represented by ob_size == 0.
751
752        where SHIFT can be either:
753            #define PyLong_SHIFT        30
754            #define PyLong_SHIFT        15
755        '''
756        ob_size = long(self.field('ob_size'))
757        if ob_size == 0:
758            return 0L
759
760        ob_digit = self.field('ob_digit')
761
762        if gdb.lookup_type('digit').sizeof == 2:
763            SHIFT = 15L
764        else:
765            SHIFT = 30L
766
767        digits = [long(ob_digit[i]) * 2**(SHIFT*i)
768                  for i in safe_range(abs(ob_size))]
769        result = sum(digits)
770        if ob_size < 0:
771            result = -result
772        return result
773
774
775class PyNoneStructPtr(PyObjectPtr):
776    """
777    Class wrapping a gdb.Value that's a PyObject* pointing to the
778    singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type
779    """
780    _typename = 'PyObject'
781
782    def proxyval(self, visited):
783        return None
784
785
786class PyFrameObjectPtr(PyObjectPtr):
787    _typename = 'PyFrameObject'
788
789    def __init__(self, gdbval, cast_to):
790        PyObjectPtr.__init__(self, gdbval, cast_to)
791
792        if not self.is_optimized_out():
793            self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code'))
794            self.co_name = self.co.pyop_field('co_name')
795            self.co_filename = self.co.pyop_field('co_filename')
796
797            self.f_lineno = int_from_int(self.field('f_lineno'))
798            self.f_lasti = int_from_int(self.field('f_lasti'))
799            self.co_nlocals = int_from_int(self.co.field('co_nlocals'))
800            self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames'))
801
802    def iter_locals(self):
803        '''
804        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
805        the local variables of this frame
806        '''
807        if self.is_optimized_out():
808            return
809
810        f_localsplus = self.field('f_localsplus')
811        for i in safe_range(self.co_nlocals):
812            pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i])
813            if not pyop_value.is_null():
814                pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i])
815                yield (pyop_name, pyop_value)
816
817    def iter_globals(self):
818        '''
819        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
820        the global variables of this frame
821        '''
822        if self.is_optimized_out():
823            return
824
825        pyop_globals = self.pyop_field('f_globals')
826        return pyop_globals.iteritems()
827
828    def iter_builtins(self):
829        '''
830        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
831        the builtin variables
832        '''
833        if self.is_optimized_out():
834            return
835
836        pyop_builtins = self.pyop_field('f_builtins')
837        return pyop_builtins.iteritems()
838
839    def get_var_by_name(self, name):
840        '''
841        Look for the named local variable, returning a (PyObjectPtr, scope) pair
842        where scope is a string 'local', 'global', 'builtin'
843
844        If not found, return (None, None)
845        '''
846        for pyop_name, pyop_value in self.iter_locals():
847            if name == pyop_name.proxyval(set()):
848                return pyop_value, 'local'
849        for pyop_name, pyop_value in self.iter_globals():
850            if name == pyop_name.proxyval(set()):
851                return pyop_value, 'global'
852        for pyop_name, pyop_value in self.iter_builtins():
853            if name == pyop_name.proxyval(set()):
854                return pyop_value, 'builtin'
855        return None, None
856
857    def filename(self):
858        '''Get the path of the current Python source file, as a string'''
859        if self.is_optimized_out():
860            return '(frame information optimized out)'
861        return self.co_filename.proxyval(set())
862
863    def current_line_num(self):
864        '''Get current line number as an integer (1-based)
865
866        Translated from PyFrame_GetLineNumber and PyCode_Addr2Line
867
868        See Objects/lnotab_notes.txt
869        '''
870        if self.is_optimized_out():
871            return None
872        f_trace = self.field('f_trace')
873        if long(f_trace) != 0:
874            # we have a non-NULL f_trace:
875            return self.f_lineno
876        else:
877            #try:
878            return self.co.addr2line(self.f_lasti)
879            #except ValueError:
880            #    return self.f_lineno
881
882    def current_line(self):
883        '''Get the text of the current source line as a string, with a trailing
884        newline character'''
885        if self.is_optimized_out():
886            return '(frame information optimized out)'
887        with open(self.filename(), 'r') as f:
888            all_lines = f.readlines()
889            # Convert from 1-based current_line_num to 0-based list offset:
890            return all_lines[self.current_line_num()-1]
891
892    def write_repr(self, out, visited):
893        if self.is_optimized_out():
894            out.write('(frame information optimized out)')
895            return
896        out.write('Frame 0x%x, for file %s, line %i, in %s ('
897                  % (self.as_address(),
898                     self.co_filename,
899                     self.current_line_num(),
900                     self.co_name))
901        first = True
902        for pyop_name, pyop_value in self.iter_locals():
903            if not first:
904                out.write(', ')
905            first = False
906
907            out.write(pyop_name.proxyval(visited))
908            out.write('=')
909            pyop_value.write_repr(out, visited)
910
911        out.write(')')
912
913class PySetObjectPtr(PyObjectPtr):
914    _typename = 'PySetObject'
915
916    def proxyval(self, visited):
917        # Guard against infinite loops:
918        if self.as_address() in visited:
919            return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name())
920        visited.add(self.as_address())
921
922        members = []
923        table = self.field('table')
924        for i in safe_range(self.field('mask')+1):
925            setentry = table[i]
926            key = setentry['key']
927            if key != 0:
928                key_proxy = PyObjectPtr.from_pyobject_ptr(key).proxyval(visited)
929                if key_proxy != '<dummy key>':
930                    members.append(key_proxy)
931        if self.safe_tp_name() == 'frozenset':
932            return frozenset(members)
933        else:
934            return set(members)
935
936    def write_repr(self, out, visited):
937        out.write(self.safe_tp_name())
938
939        # Guard against infinite loops:
940        if self.as_address() in visited:
941            out.write('(...)')
942            return
943        visited.add(self.as_address())
944
945        out.write('([')
946        first = True
947        table = self.field('table')
948        for i in safe_range(self.field('mask')+1):
949            setentry = table[i]
950            key = setentry['key']
951            if key != 0:
952                pyop_key = PyObjectPtr.from_pyobject_ptr(key)
953                key_proxy = pyop_key.proxyval(visited) # FIXME!
954                if key_proxy != '<dummy key>':
955                    if not first:
956                        out.write(', ')
957                    first = False
958                    pyop_key.write_repr(out, visited)
959        out.write('])')
960
961
962class PyStringObjectPtr(PyObjectPtr):
963    _typename = 'PyStringObject'
964
965    def __str__(self):
966        field_ob_size = self.field('ob_size')
967        field_ob_sval = self.field('ob_sval')
968        char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr)
969        return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)])
970
971    def proxyval(self, visited):
972        return str(self)
973
974class PyTupleObjectPtr(PyObjectPtr):
975    _typename = 'PyTupleObject'
976
977    def __getitem__(self, i):
978        # Get the gdb.Value for the (PyObject*) with the given index:
979        field_ob_item = self.field('ob_item')
980        return field_ob_item[i]
981
982    def proxyval(self, visited):
983        # Guard against infinite loops:
984        if self.as_address() in visited:
985            return ProxyAlreadyVisited('(...)')
986        visited.add(self.as_address())
987
988        result = tuple([PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
989                        for i in safe_range(int_from_int(self.field('ob_size')))])
990        return result
991
992    def write_repr(self, out, visited):
993        # Guard against infinite loops:
994        if self.as_address() in visited:
995            out.write('(...)')
996            return
997        visited.add(self.as_address())
998
999        out.write('(')
1000        for i in safe_range(int_from_int(self.field('ob_size'))):
1001            if i > 0:
1002                out.write(', ')
1003            element = PyObjectPtr.from_pyobject_ptr(self[i])
1004            element.write_repr(out, visited)
1005        if self.field('ob_size') == 1:
1006            out.write(',)')
1007        else:
1008            out.write(')')
1009
1010class PyTypeObjectPtr(PyObjectPtr):
1011    _typename = 'PyTypeObject'
1012
1013
1014if sys.maxunicode >= 0x10000:
1015    _unichr = unichr
1016else:
1017    # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb
1018    def _unichr(x):
1019        if x < 0x10000:
1020            return unichr(x)
1021        x -= 0x10000
1022        ch1 = 0xD800 | (x >> 10)
1023        ch2 = 0xDC00 | (x & 0x3FF)
1024        return unichr(ch1) + unichr(ch2)
1025
1026class PyUnicodeObjectPtr(PyObjectPtr):
1027    _typename = 'PyUnicodeObject'
1028
1029    def char_width(self):
1030        _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE')
1031        return _type_Py_UNICODE.sizeof
1032
1033    def proxyval(self, visited):
1034        # From unicodeobject.h:
1035        #     Py_ssize_t length;  /* Length of raw Unicode data in buffer */
1036        #     Py_UNICODE *str;    /* Raw Unicode buffer */
1037        field_length = long(self.field('length'))
1038        field_str = self.field('str')
1039
1040        # Gather a list of ints from the Py_UNICODE array; these are either
1041        # UCS-2 or UCS-4 code points:
1042        if self.char_width() > 2:
1043            Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
1044        else:
1045            # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
1046            # inferior process: we must join surrogate pairs.
1047            Py_UNICODEs = []
1048            i = 0
1049            limit = safety_limit(field_length)
1050            while i < limit:
1051                ucs = int(field_str[i])
1052                i += 1
1053                if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length:
1054                    Py_UNICODEs.append(ucs)
1055                    continue
1056                # This could be a surrogate pair.
1057                ucs2 = int(field_str[i])
1058                if ucs2 < 0xDC00 or ucs2 > 0xDFFF:
1059                    continue
1060                code = (ucs & 0x03FF) << 10
1061                code |= ucs2 & 0x03FF
1062                code += 0x00010000
1063                Py_UNICODEs.append(code)
1064                i += 1
1065
1066        # Convert the int code points to unicode characters, and generate a
1067        # local unicode instance.
1068        # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb).
1069        result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs])
1070        return result
1071
1072
1073def int_from_int(gdbval):
1074    return int(str(gdbval))
1075
1076
1077def stringify(val):
1078    # TODO: repr() puts everything on one line; pformat can be nicer, but
1079    # can lead to v.long results; this function isolates the choice
1080    if True:
1081        return repr(val)
1082    else:
1083        from pprint import pformat
1084        return pformat(val)
1085
1086
1087class PyObjectPtrPrinter:
1088    "Prints a (PyObject*)"
1089
1090    def __init__ (self, gdbval):
1091        self.gdbval = gdbval
1092
1093    def to_string (self):
1094        pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval)
1095        if True:
1096            return pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1097        else:
1098            # Generate full proxy value then stringify it.
1099            # Doing so could be expensive
1100            proxyval = pyop.proxyval(set())
1101            return stringify(proxyval)
1102
1103def pretty_printer_lookup(gdbval):
1104    type = gdbval.type.unqualified()
1105    if type.code == gdb.TYPE_CODE_PTR:
1106        type = type.target().unqualified()
1107        t = str(type)
1108        if t in ("PyObject", "PyFrameObject"):
1109            return PyObjectPtrPrinter(gdbval)
1110
1111"""
1112During development, I've been manually invoking the code in this way:
1113(gdb) python
1114
1115import sys
1116sys.path.append('/home/david/coding/python-gdb')
1117import libpython
1118end
1119
1120then reloading it after each edit like this:
1121(gdb) python reload(libpython)
1122
1123The following code should ensure that the prettyprinter is registered
1124if the code is autoloaded by gdb when visiting libpython.so, provided
1125that this python file is installed to the same path as the library (or its
1126.debug file) plus a "-gdb.py" suffix, e.g:
1127  /usr/lib/libpython2.6.so.1.0-gdb.py
1128  /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py
1129"""
1130def register (obj):
1131    if obj == None:
1132        obj = gdb
1133
1134    # Wire up the pretty-printer
1135    obj.pretty_printers.append(pretty_printer_lookup)
1136
1137register (gdb.current_objfile ())
1138
1139
1140
1141# Unfortunately, the exact API exposed by the gdb module varies somewhat
1142# from build to build
1143# See http://bugs.python.org/issue8279?#msg102276
1144
1145class Frame(object):
1146    '''
1147    Wrapper for gdb.Frame, adding various methods
1148    '''
1149    def __init__(self, gdbframe):
1150        self._gdbframe = gdbframe
1151
1152    def older(self):
1153        older = self._gdbframe.older()
1154        if older:
1155            return Frame(older)
1156        else:
1157            return None
1158
1159    def newer(self):
1160        newer = self._gdbframe.newer()
1161        if newer:
1162            return Frame(newer)
1163        else:
1164            return None
1165
1166    def select(self):
1167        '''If supported, select this frame and return True; return False if unsupported
1168
1169        Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12
1170        onwards, but absent on Ubuntu buildbot'''
1171        if not hasattr(self._gdbframe, 'select'):
1172            print ('Unable to select frame: '
1173                   'this build of gdb does not expose a gdb.Frame.select method')
1174            return False
1175        self._gdbframe.select()
1176        return True
1177
1178    def get_index(self):
1179        '''Calculate index of frame, starting at 0 for the newest frame within
1180        this thread'''
1181        index = 0
1182        # Go down until you reach the newest frame:
1183        iter_frame = self
1184        while iter_frame.newer():
1185            index += 1
1186            iter_frame = iter_frame.newer()
1187        return index
1188
1189    def is_evalframeex(self):
1190        '''Is this a PyEval_EvalFrameEx frame?'''
1191        if self._gdbframe.name() == 'PyEval_EvalFrameEx':
1192            '''
1193            I believe we also need to filter on the inline
1194            struct frame_id.inline_depth, only regarding frames with
1195            an inline depth of 0 as actually being this function
1196
1197            So we reject those with type gdb.INLINE_FRAME
1198            '''
1199            if self._gdbframe.type() == gdb.NORMAL_FRAME:
1200                # We have a PyEval_EvalFrameEx frame:
1201                return True
1202
1203        return False
1204
1205    def get_pyop(self):
1206        try:
1207            f = self._gdbframe.read_var('f')
1208            return PyFrameObjectPtr.from_pyobject_ptr(f)
1209        except ValueError:
1210            return None
1211
1212    @classmethod
1213    def get_selected_frame(cls):
1214        _gdbframe = gdb.selected_frame()
1215        if _gdbframe:
1216            return Frame(_gdbframe)
1217        return None
1218
1219    @classmethod
1220    def get_selected_python_frame(cls):
1221        '''Try to obtain the Frame for the python code in the selected frame,
1222        or None'''
1223        frame = cls.get_selected_frame()
1224
1225        while frame:
1226            if frame.is_evalframeex():
1227                return frame
1228            frame = frame.older()
1229
1230        # Not found:
1231        return None
1232
1233    def print_summary(self):
1234        if self.is_evalframeex():
1235            pyop = self.get_pyop()
1236            if pyop:
1237                sys.stdout.write('#%i %s\n' % (self.get_index(), pyop.get_truncated_repr(MAX_OUTPUT_LEN)))
1238                sys.stdout.write(pyop.current_line())
1239            else:
1240                sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
1241        else:
1242            sys.stdout.write('#%i\n' % self.get_index())
1243
1244class PyList(gdb.Command):
1245    '''List the current Python source code, if any
1246
1247    Use
1248       py-list START
1249    to list at a different line number within the python source.
1250
1251    Use
1252       py-list START, END
1253    to list a specific range of lines within the python source.
1254    '''
1255
1256    def __init__(self):
1257        gdb.Command.__init__ (self,
1258                              "py-list",
1259                              gdb.COMMAND_FILES,
1260                              gdb.COMPLETE_NONE)
1261
1262
1263    def invoke(self, args, from_tty):
1264        import re
1265
1266        start = None
1267        end = None
1268
1269        m = re.match(r'\s*(\d+)\s*', args)
1270        if m:
1271            start = int(m.group(0))
1272            end = start + 10
1273
1274        m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args)
1275        if m:
1276            start, end = map(int, m.groups())
1277
1278        frame = Frame.get_selected_python_frame()
1279        if not frame:
1280            print 'Unable to locate python frame'
1281            return
1282
1283        pyop = frame.get_pyop()
1284        if not pyop:
1285            print 'Unable to read information on python frame'
1286            return
1287
1288        filename = pyop.filename()
1289        lineno = pyop.current_line_num()
1290
1291        if start is None:
1292            start = lineno - 5
1293            end = lineno + 5
1294
1295        if start<1:
1296            start = 1
1297
1298        with open(filename, 'r') as f:
1299            all_lines = f.readlines()
1300            # start and end are 1-based, all_lines is 0-based;
1301            # so [start-1:end] as a python slice gives us [start, end] as a
1302            # closed interval
1303            for i, line in enumerate(all_lines[start-1:end]):
1304                linestr = str(i+start)
1305                # Highlight current line:
1306                if i + start == lineno:
1307                    linestr = '>' + linestr
1308                sys.stdout.write('%4s    %s' % (linestr, line))
1309
1310
1311# ...and register the command:
1312PyList()
1313
1314def move_in_stack(move_up):
1315    '''Move up or down the stack (for the py-up/py-down command)'''
1316    frame = Frame.get_selected_python_frame()
1317    while frame:
1318        if move_up:
1319            iter_frame = frame.older()
1320        else:
1321            iter_frame = frame.newer()
1322
1323        if not iter_frame:
1324            break
1325
1326        if iter_frame.is_evalframeex():
1327            # Result:
1328            if iter_frame.select():
1329                iter_frame.print_summary()
1330            return
1331
1332        frame = iter_frame
1333
1334    if move_up:
1335        print 'Unable to find an older python frame'
1336    else:
1337        print 'Unable to find a newer python frame'
1338
1339class PyUp(gdb.Command):
1340    'Select and print the python stack frame that called this one (if any)'
1341    def __init__(self):
1342        gdb.Command.__init__ (self,
1343                              "py-up",
1344                              gdb.COMMAND_STACK,
1345                              gdb.COMPLETE_NONE)
1346
1347
1348    def invoke(self, args, from_tty):
1349        move_in_stack(move_up=True)
1350
1351class PyDown(gdb.Command):
1352    'Select and print the python stack frame called by this one (if any)'
1353    def __init__(self):
1354        gdb.Command.__init__ (self,
1355                              "py-down",
1356                              gdb.COMMAND_STACK,
1357                              gdb.COMPLETE_NONE)
1358
1359
1360    def invoke(self, args, from_tty):
1361        move_in_stack(move_up=False)
1362
1363# Not all builds of gdb have gdb.Frame.select
1364if hasattr(gdb.Frame, 'select'):
1365    PyUp()
1366    PyDown()
1367
1368class PyBacktrace(gdb.Command):
1369    'Display the current python frame and all the frames within its call stack (if any)'
1370    def __init__(self):
1371        gdb.Command.__init__ (self,
1372                              "py-bt",
1373                              gdb.COMMAND_STACK,
1374                              gdb.COMPLETE_NONE)
1375
1376
1377    def invoke(self, args, from_tty):
1378        frame = Frame.get_selected_python_frame()
1379        while frame:
1380            if frame.is_evalframeex():
1381                frame.print_summary()
1382            frame = frame.older()
1383
1384PyBacktrace()
1385
1386class PyPrint(gdb.Command):
1387    'Look up the given python variable name, and print it'
1388    def __init__(self):
1389        gdb.Command.__init__ (self,
1390                              "py-print",
1391                              gdb.COMMAND_DATA,
1392                              gdb.COMPLETE_NONE)
1393
1394
1395    def invoke(self, args, from_tty):
1396        name = str(args)
1397
1398        frame = Frame.get_selected_python_frame()
1399        if not frame:
1400            print 'Unable to locate python frame'
1401            return
1402
1403        pyop_frame = frame.get_pyop()
1404        if not pyop_frame:
1405            print 'Unable to read information on python frame'
1406            return
1407
1408        pyop_var, scope = pyop_frame.get_var_by_name(name)
1409
1410        if pyop_var:
1411            print ('%s %r = %s'
1412                   % (scope,
1413                      name,
1414                      pyop_var.get_truncated_repr(MAX_OUTPUT_LEN)))
1415        else:
1416            print '%r not found' % name
1417
1418PyPrint()
1419
1420class PyLocals(gdb.Command):
1421    'Look up the given python variable name, and print it'
1422    def __init__(self):
1423        gdb.Command.__init__ (self,
1424                              "py-locals",
1425                              gdb.COMMAND_DATA,
1426                              gdb.COMPLETE_NONE)
1427
1428
1429    def invoke(self, args, from_tty):
1430        name = str(args)
1431
1432        frame = Frame.get_selected_python_frame()
1433        if not frame:
1434            print 'Unable to locate python frame'
1435            return
1436
1437        pyop_frame = frame.get_pyop()
1438        if not pyop_frame:
1439            print 'Unable to read information on python frame'
1440            return
1441
1442        for pyop_name, pyop_value in pyop_frame.iter_locals():
1443            print ('%s = %s'
1444                   % (pyop_name.proxyval(set()),
1445                      pyop_value.get_truncated_repr(MAX_OUTPUT_LEN)))
1446
1447PyLocals()
1448