1#!/usr/bin/python
2'''
3From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb
4to be extended with Python code e.g. for library-specific data visualizations,
5such as for the C++ STL types.  Documentation on this API can be seen at:
6http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html
7
8
9This python module deals with the case when the process being debugged (the
10"inferior process" in gdb parlance) is itself python, or more specifically,
11linked against libpython.  In this situation, almost every item of data is a
12(PyObject*), and having the debugger merely print their addresses is not very
13enlightening.
14
15This module embeds knowledge about the implementation details of libpython so
16that we can emit useful visualizations e.g. a string, a list, a dict, a frame
17giving file/line information and the state of local variables
18
19In particular, given a gdb.Value corresponding to a PyObject* in the inferior
20process, we can generate a "proxy value" within the gdb process.  For example,
21given a PyObject* in the inferior process that is in fact a PyListObject*
22holding three PyObject* that turn out to be PyBytesObject* instances, we can
23generate a proxy value within the gdb process that is a list of bytes
24instances:
25  [b"foo", b"bar", b"baz"]
26
27Doing so can be expensive for complicated graphs of objects, and could take
28some time, so we also have a "write_repr" method that writes a representation
29of the data to a file-like object.  This allows us to stop the traversal by
30having the file-like object raise an exception if it gets too much data.
31
32With both "proxyval" and "write_repr" we keep track of the set of all addresses
33visited so far in the traversal, to avoid infinite recursion due to cycles in
34the graph of object references.
35
36We try to defer gdb.lookup_type() invocations for python types until as late as
37possible: for a dynamically linked python binary, when the process starts in
38the debugger, the libpython.so hasn't been dynamically loaded yet, so none of
39the type names are known to the debugger
40
41The module also extends gdb with some python-specific commands.
42'''
43
44# NOTE: some gdbs are linked with Python 3, so this file should be dual-syntax
45# compatible (2.6+ and 3.0+).  See #19308.
46
47from __future__ import print_function
48import gdb
49import os
50import locale
51import sys
52
53if sys.version_info[0] >= 3:
54    unichr = chr
55    xrange = range
56    long = int
57
58# Look up the gdb.Type for some standard types:
59# Those need to be refreshed as types (pointer sizes) may change when
60# gdb loads different executables
61
62def _type_char_ptr():
63    return gdb.lookup_type('char').pointer()  # char*
64
65
66def _type_unsigned_char_ptr():
67    return gdb.lookup_type('unsigned char').pointer()  # unsigned char*
68
69
70def _type_unsigned_short_ptr():
71    return gdb.lookup_type('unsigned short').pointer()
72
73
74def _type_unsigned_int_ptr():
75    return gdb.lookup_type('unsigned int').pointer()
76
77
78def _sizeof_void_p():
79    return gdb.lookup_type('void').pointer().sizeof
80
81
82# value computed later, see PyUnicodeObjectPtr.proxy()
83_is_pep393 = None
84
85Py_TPFLAGS_HEAPTYPE = (1 << 9)
86Py_TPFLAGS_LONG_SUBCLASS     = (1 << 24)
87Py_TPFLAGS_LIST_SUBCLASS     = (1 << 25)
88Py_TPFLAGS_TUPLE_SUBCLASS    = (1 << 26)
89Py_TPFLAGS_BYTES_SUBCLASS    = (1 << 27)
90Py_TPFLAGS_UNICODE_SUBCLASS  = (1 << 28)
91Py_TPFLAGS_DICT_SUBCLASS     = (1 << 29)
92Py_TPFLAGS_BASE_EXC_SUBCLASS = (1 << 30)
93Py_TPFLAGS_TYPE_SUBCLASS     = (1 << 31)
94
95
96MAX_OUTPUT_LEN=1024
97
98hexdigits = "0123456789abcdef"
99
100ENCODING = locale.getpreferredencoding()
101
102EVALFRAME = '_PyEval_EvalFrameDefault'
103
104class NullPyObjectPtr(RuntimeError):
105    pass
106
107
108def safety_limit(val):
109    # Given an integer value from the process being debugged, limit it to some
110    # safety threshold so that arbitrary breakage within said process doesn't
111    # break the gdb process too much (e.g. sizes of iterations, sizes of lists)
112    return min(val, 1000)
113
114
115def safe_range(val):
116    # As per range, but don't trust the value too much: cap it to a safety
117    # threshold in case the data was corrupted
118    return xrange(safety_limit(int(val)))
119
120if sys.version_info[0] >= 3:
121    def write_unicode(file, text):
122        file.write(text)
123else:
124    def write_unicode(file, text):
125        # Write a byte or unicode string to file. Unicode strings are encoded to
126        # ENCODING encoding with 'backslashreplace' error handler to avoid
127        # UnicodeEncodeError.
128        if isinstance(text, unicode):
129            text = text.encode(ENCODING, 'backslashreplace')
130        file.write(text)
131
132try:
133    os_fsencode = os.fsencode
134except AttributeError:
135    def os_fsencode(filename):
136        if not isinstance(filename, unicode):
137            return filename
138        encoding = sys.getfilesystemencoding()
139        if encoding == 'mbcs':
140            # mbcs doesn't support surrogateescape
141            return filename.encode(encoding)
142        encoded = []
143        for char in filename:
144            # surrogateescape error handler
145            if 0xDC80 <= ord(char) <= 0xDCFF:
146                byte = chr(ord(char) - 0xDC00)
147            else:
148                byte = char.encode(encoding)
149            encoded.append(byte)
150        return ''.join(encoded)
151
152class StringTruncated(RuntimeError):
153    pass
154
155class TruncatedStringIO(object):
156    '''Similar to io.StringIO, but can truncate the output by raising a
157    StringTruncated exception'''
158    def __init__(self, maxlen=None):
159        self._val = ''
160        self.maxlen = maxlen
161
162    def write(self, data):
163        if self.maxlen:
164            if len(data) + len(self._val) > self.maxlen:
165                # Truncation:
166                self._val += data[0:self.maxlen - len(self._val)]
167                raise StringTruncated()
168
169        self._val += data
170
171    def getvalue(self):
172        return self._val
173
174class PyObjectPtr(object):
175    """
176    Class wrapping a gdb.Value that's either a (PyObject*) within the
177    inferior process, or some subclass pointer e.g. (PyBytesObject*)
178
179    There will be a subclass for every refined PyObject type that we care
180    about.
181
182    Note that at every stage the underlying pointer could be NULL, point
183    to corrupt data, etc; this is the debugger, after all.
184    """
185    _typename = 'PyObject'
186
187    def __init__(self, gdbval, cast_to=None):
188        if cast_to:
189            self._gdbval = gdbval.cast(cast_to)
190        else:
191            self._gdbval = gdbval
192
193    def field(self, name):
194        '''
195        Get the gdb.Value for the given field within the PyObject, coping with
196        some python 2 versus python 3 differences.
197
198        Various libpython types are defined using the "PyObject_HEAD" and
199        "PyObject_VAR_HEAD" macros.
200
201        In Python 2, this these are defined so that "ob_type" and (for a var
202        object) "ob_size" are fields of the type in question.
203
204        In Python 3, this is defined as an embedded PyVarObject type thus:
205           PyVarObject ob_base;
206        so that the "ob_size" field is located insize the "ob_base" field, and
207        the "ob_type" is most easily accessed by casting back to a (PyObject*).
208        '''
209        if self.is_null():
210            raise NullPyObjectPtr(self)
211
212        if name == 'ob_type':
213            pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type())
214            return pyo_ptr.dereference()[name]
215
216        if name == 'ob_size':
217            pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type())
218            return pyo_ptr.dereference()[name]
219
220        # General case: look it up inside the object:
221        return self._gdbval.dereference()[name]
222
223    def pyop_field(self, name):
224        '''
225        Get a PyObjectPtr for the given PyObject* field within this PyObject,
226        coping with some python 2 versus python 3 differences.
227        '''
228        return PyObjectPtr.from_pyobject_ptr(self.field(name))
229
230    def write_field_repr(self, name, out, visited):
231        '''
232        Extract the PyObject* field named "name", and write its representation
233        to file-like object "out"
234        '''
235        field_obj = self.pyop_field(name)
236        field_obj.write_repr(out, visited)
237
238    def get_truncated_repr(self, maxlen):
239        '''
240        Get a repr-like string for the data, but truncate it at "maxlen" bytes
241        (ending the object graph traversal as soon as you do)
242        '''
243        out = TruncatedStringIO(maxlen)
244        try:
245            self.write_repr(out, set())
246        except StringTruncated:
247            # Truncation occurred:
248            return out.getvalue() + '...(truncated)'
249
250        # No truncation occurred:
251        return out.getvalue()
252
253    def type(self):
254        return PyTypeObjectPtr(self.field('ob_type'))
255
256    def is_null(self):
257        return 0 == long(self._gdbval)
258
259    def is_optimized_out(self):
260        '''
261        Is the value of the underlying PyObject* visible to the debugger?
262
263        This can vary with the precise version of the compiler used to build
264        Python, and the precise version of gdb.
265
266        See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with
267        PyEval_EvalFrameEx's "f"
268        '''
269        return self._gdbval.is_optimized_out
270
271    def safe_tp_name(self):
272        try:
273            ob_type = self.type()
274            tp_name = ob_type.field('tp_name')
275            return tp_name.string()
276        # NullPyObjectPtr: NULL tp_name?
277        # RuntimeError: Can't even read the object at all?
278        # UnicodeDecodeError: Failed to decode tp_name bytestring
279        except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError):
280            return 'unknown'
281
282    def proxyval(self, visited):
283        '''
284        Scrape a value from the inferior process, and try to represent it
285        within the gdb process, whilst (hopefully) avoiding crashes when
286        the remote data is corrupt.
287
288        Derived classes will override this.
289
290        For example, a PyIntObject* with ob_ival 42 in the inferior process
291        should result in an int(42) in this process.
292
293        visited: a set of all gdb.Value pyobject pointers already visited
294        whilst generating this value (to guard against infinite recursion when
295        visiting object graphs with loops).  Analogous to Py_ReprEnter and
296        Py_ReprLeave
297        '''
298
299        class FakeRepr(object):
300            """
301            Class representing a non-descript PyObject* value in the inferior
302            process for when we don't have a custom scraper, intended to have
303            a sane repr().
304            """
305
306            def __init__(self, tp_name, address):
307                self.tp_name = tp_name
308                self.address = address
309
310            def __repr__(self):
311                # For the NULL pointer, we have no way of knowing a type, so
312                # special-case it as per
313                # http://bugs.python.org/issue8032#msg100882
314                if self.address == 0:
315                    return '0x0'
316                return '<%s at remote 0x%x>' % (self.tp_name, self.address)
317
318        return FakeRepr(self.safe_tp_name(),
319                        long(self._gdbval))
320
321    def write_repr(self, out, visited):
322        '''
323        Write a string representation of the value scraped from the inferior
324        process to "out", a file-like object.
325        '''
326        # Default implementation: generate a proxy value and write its repr
327        # However, this could involve a lot of work for complicated objects,
328        # so for derived classes we specialize this
329        return out.write(repr(self.proxyval(visited)))
330
331    @classmethod
332    def subclass_from_type(cls, t):
333        '''
334        Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a
335        (PyTypeObject*), determine the corresponding subclass of PyObjectPtr
336        to use
337
338        Ideally, we would look up the symbols for the global types, but that
339        isn't working yet:
340          (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value
341          Traceback (most recent call last):
342            File "<string>", line 1, in <module>
343          NotImplementedError: Symbol type not yet supported in Python scripts.
344          Error while executing Python code.
345
346        For now, we use tp_flags, after doing some string comparisons on the
347        tp_name for some special-cases that don't seem to be visible through
348        flags
349        '''
350        try:
351            tp_name = t.field('tp_name').string()
352            tp_flags = int(t.field('tp_flags'))
353        # RuntimeError: NULL pointers
354        # UnicodeDecodeError: string() fails to decode the bytestring
355        except (RuntimeError, UnicodeDecodeError):
356            # Handle any kind of error e.g. NULL ptrs by simply using the base
357            # class
358            return cls
359
360        #print('tp_flags = 0x%08x' % tp_flags)
361        #print('tp_name = %r' % tp_name)
362
363        name_map = {'bool': PyBoolObjectPtr,
364                    'classobj': PyClassObjectPtr,
365                    'NoneType': PyNoneStructPtr,
366                    'frame': PyFrameObjectPtr,
367                    'set' : PySetObjectPtr,
368                    'frozenset' : PySetObjectPtr,
369                    'builtin_function_or_method' : PyCFunctionObjectPtr,
370                    'method-wrapper': wrapperobject,
371                    }
372        if tp_name in name_map:
373            return name_map[tp_name]
374
375        if tp_flags & Py_TPFLAGS_HEAPTYPE:
376            return HeapTypeObjectPtr
377
378        if tp_flags & Py_TPFLAGS_LONG_SUBCLASS:
379            return PyLongObjectPtr
380        if tp_flags & Py_TPFLAGS_LIST_SUBCLASS:
381            return PyListObjectPtr
382        if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS:
383            return PyTupleObjectPtr
384        if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS:
385            return PyBytesObjectPtr
386        if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS:
387            return PyUnicodeObjectPtr
388        if tp_flags & Py_TPFLAGS_DICT_SUBCLASS:
389            return PyDictObjectPtr
390        if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS:
391            return PyBaseExceptionObjectPtr
392        #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS:
393        #    return PyTypeObjectPtr
394
395        # Use the base class:
396        return cls
397
398    @classmethod
399    def from_pyobject_ptr(cls, gdbval):
400        '''
401        Try to locate the appropriate derived class dynamically, and cast
402        the pointer accordingly.
403        '''
404        try:
405            p = PyObjectPtr(gdbval)
406            cls = cls.subclass_from_type(p.type())
407            return cls(gdbval, cast_to=cls.get_gdb_type())
408        except RuntimeError:
409            # Handle any kind of error e.g. NULL ptrs by simply using the base
410            # class
411            pass
412        return cls(gdbval)
413
414    @classmethod
415    def get_gdb_type(cls):
416        return gdb.lookup_type(cls._typename).pointer()
417
418    def as_address(self):
419        return long(self._gdbval)
420
421class PyVarObjectPtr(PyObjectPtr):
422    _typename = 'PyVarObject'
423
424class ProxyAlreadyVisited(object):
425    '''
426    Placeholder proxy to use when protecting against infinite recursion due to
427    loops in the object graph.
428
429    Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave
430    '''
431    def __init__(self, rep):
432        self._rep = rep
433
434    def __repr__(self):
435        return self._rep
436
437
438def _write_instance_repr(out, visited, name, pyop_attrdict, address):
439    '''Shared code for use by all classes:
440    write a representation to file-like object "out"'''
441    out.write('<')
442    out.write(name)
443
444    # Write dictionary of instance attributes:
445    if isinstance(pyop_attrdict, PyDictObjectPtr):
446        out.write('(')
447        first = True
448        for pyop_arg, pyop_val in pyop_attrdict.iteritems():
449            if not first:
450                out.write(', ')
451            first = False
452            out.write(pyop_arg.proxyval(visited))
453            out.write('=')
454            pyop_val.write_repr(out, visited)
455        out.write(')')
456    out.write(' at remote 0x%x>' % address)
457
458
459class InstanceProxy(object):
460
461    def __init__(self, cl_name, attrdict, address):
462        self.cl_name = cl_name
463        self.attrdict = attrdict
464        self.address = address
465
466    def __repr__(self):
467        if isinstance(self.attrdict, dict):
468            kwargs = ', '.join(["%s=%r" % (arg, val)
469                                for arg, val in self.attrdict.iteritems()])
470            return '<%s(%s) at remote 0x%x>' % (self.cl_name,
471                                                kwargs, self.address)
472        else:
473            return '<%s at remote 0x%x>' % (self.cl_name,
474                                            self.address)
475
476def _PyObject_VAR_SIZE(typeobj, nitems):
477    if _PyObject_VAR_SIZE._type_size_t is None:
478        _PyObject_VAR_SIZE._type_size_t = gdb.lookup_type('size_t')
479
480    return ( ( typeobj.field('tp_basicsize') +
481               nitems * typeobj.field('tp_itemsize') +
482               (_sizeof_void_p() - 1)
483             ) & ~(_sizeof_void_p() - 1)
484           ).cast(_PyObject_VAR_SIZE._type_size_t)
485_PyObject_VAR_SIZE._type_size_t = None
486
487class HeapTypeObjectPtr(PyObjectPtr):
488    _typename = 'PyObject'
489
490    def get_attr_dict(self):
491        '''
492        Get the PyDictObject ptr representing the attribute dictionary
493        (or None if there's a problem)
494        '''
495        try:
496            typeobj = self.type()
497            dictoffset = int_from_int(typeobj.field('tp_dictoffset'))
498            if dictoffset != 0:
499                if dictoffset < 0:
500                    type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer()
501                    tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size'])
502                    if tsize < 0:
503                        tsize = -tsize
504                    size = _PyObject_VAR_SIZE(typeobj, tsize)
505                    dictoffset += size
506                    assert dictoffset > 0
507                    assert dictoffset % _sizeof_void_p() == 0
508
509                dictptr = self._gdbval.cast(_type_char_ptr()) + dictoffset
510                PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer()
511                dictptr = dictptr.cast(PyObjectPtrPtr)
512                return PyObjectPtr.from_pyobject_ptr(dictptr.dereference())
513        except RuntimeError:
514            # Corrupt data somewhere; fail safe
515            pass
516
517        # Not found, or some kind of error:
518        return None
519
520    def proxyval(self, visited):
521        '''
522        Support for classes.
523
524        Currently we just locate the dictionary using a transliteration to
525        python of _PyObject_GetDictPtr, ignoring descriptors
526        '''
527        # Guard against infinite loops:
528        if self.as_address() in visited:
529            return ProxyAlreadyVisited('<...>')
530        visited.add(self.as_address())
531
532        pyop_attr_dict = self.get_attr_dict()
533        if pyop_attr_dict:
534            attr_dict = pyop_attr_dict.proxyval(visited)
535        else:
536            attr_dict = {}
537        tp_name = self.safe_tp_name()
538
539        # Class:
540        return InstanceProxy(tp_name, attr_dict, long(self._gdbval))
541
542    def write_repr(self, out, visited):
543        # Guard against infinite loops:
544        if self.as_address() in visited:
545            out.write('<...>')
546            return
547        visited.add(self.as_address())
548
549        pyop_attrdict = self.get_attr_dict()
550        _write_instance_repr(out, visited,
551                             self.safe_tp_name(), pyop_attrdict, self.as_address())
552
553class ProxyException(Exception):
554    def __init__(self, tp_name, args):
555        self.tp_name = tp_name
556        self.args = args
557
558    def __repr__(self):
559        return '%s%r' % (self.tp_name, self.args)
560
561class PyBaseExceptionObjectPtr(PyObjectPtr):
562    """
563    Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception
564    within the process being debugged.
565    """
566    _typename = 'PyBaseExceptionObject'
567
568    def proxyval(self, visited):
569        # Guard against infinite loops:
570        if self.as_address() in visited:
571            return ProxyAlreadyVisited('(...)')
572        visited.add(self.as_address())
573        arg_proxy = self.pyop_field('args').proxyval(visited)
574        return ProxyException(self.safe_tp_name(),
575                              arg_proxy)
576
577    def write_repr(self, out, visited):
578        # Guard against infinite loops:
579        if self.as_address() in visited:
580            out.write('(...)')
581            return
582        visited.add(self.as_address())
583
584        out.write(self.safe_tp_name())
585        self.write_field_repr('args', out, visited)
586
587class PyClassObjectPtr(PyObjectPtr):
588    """
589    Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj>
590    instance within the process being debugged.
591    """
592    _typename = 'PyClassObject'
593
594
595class BuiltInFunctionProxy(object):
596    def __init__(self, ml_name):
597        self.ml_name = ml_name
598
599    def __repr__(self):
600        return "<built-in function %s>" % self.ml_name
601
602class BuiltInMethodProxy(object):
603    def __init__(self, ml_name, pyop_m_self):
604        self.ml_name = ml_name
605        self.pyop_m_self = pyop_m_self
606
607    def __repr__(self):
608        return ('<built-in method %s of %s object at remote 0x%x>'
609                % (self.ml_name,
610                   self.pyop_m_self.safe_tp_name(),
611                   self.pyop_m_self.as_address())
612                )
613
614class PyCFunctionObjectPtr(PyObjectPtr):
615    """
616    Class wrapping a gdb.Value that's a PyCFunctionObject*
617    (see Include/methodobject.h and Objects/methodobject.c)
618    """
619    _typename = 'PyCFunctionObject'
620
621    def proxyval(self, visited):
622        m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*)
623        try:
624            ml_name = m_ml['ml_name'].string()
625        except UnicodeDecodeError:
626            ml_name = '<ml_name:UnicodeDecodeError>'
627
628        pyop_m_self = self.pyop_field('m_self')
629        if pyop_m_self.is_null():
630            return BuiltInFunctionProxy(ml_name)
631        else:
632            return BuiltInMethodProxy(ml_name, pyop_m_self)
633
634
635class PyCodeObjectPtr(PyObjectPtr):
636    """
637    Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance
638    within the process being debugged.
639    """
640    _typename = 'PyCodeObject'
641
642    def addr2line(self, addrq):
643        '''
644        Get the line number for a given bytecode offset
645
646        Analogous to PyCode_Addr2Line; translated from pseudocode in
647        Objects/lnotab_notes.txt
648        '''
649        co_lnotab = self.pyop_field('co_lnotab').proxyval(set())
650
651        # Initialize lineno to co_firstlineno as per PyCode_Addr2Line
652        # not 0, as lnotab_notes.txt has it:
653        lineno = int_from_int(self.field('co_firstlineno'))
654
655        addr = 0
656        for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]):
657            addr += ord(addr_incr)
658            if addr > addrq:
659                return lineno
660            lineno += ord(line_incr)
661        return lineno
662
663
664class PyDictObjectPtr(PyObjectPtr):
665    """
666    Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance
667    within the process being debugged.
668    """
669    _typename = 'PyDictObject'
670
671    def iteritems(self):
672        '''
673        Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs,
674        analogous to dict.iteritems()
675        '''
676        keys = self.field('ma_keys')
677        values = self.field('ma_values')
678        entries, nentries = self._get_entries(keys)
679        for i in safe_range(nentries):
680            ep = entries[i]
681            if long(values):
682                pyop_value = PyObjectPtr.from_pyobject_ptr(values[i])
683            else:
684                pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
685            if not pyop_value.is_null():
686                pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
687                yield (pyop_key, pyop_value)
688
689    def proxyval(self, visited):
690        # Guard against infinite loops:
691        if self.as_address() in visited:
692            return ProxyAlreadyVisited('{...}')
693        visited.add(self.as_address())
694
695        result = {}
696        for pyop_key, pyop_value in self.iteritems():
697            proxy_key = pyop_key.proxyval(visited)
698            proxy_value = pyop_value.proxyval(visited)
699            result[proxy_key] = proxy_value
700        return result
701
702    def write_repr(self, out, visited):
703        # Guard against infinite loops:
704        if self.as_address() in visited:
705            out.write('{...}')
706            return
707        visited.add(self.as_address())
708
709        out.write('{')
710        first = True
711        for pyop_key, pyop_value in self.iteritems():
712            if not first:
713                out.write(', ')
714            first = False
715            pyop_key.write_repr(out, visited)
716            out.write(': ')
717            pyop_value.write_repr(out, visited)
718        out.write('}')
719
720    def _get_entries(self, keys):
721        dk_nentries = int(keys['dk_nentries'])
722        dk_size = int(keys['dk_size'])
723        try:
724            # <= Python 3.5
725            return keys['dk_entries'], dk_size
726        except RuntimeError:
727            # >= Python 3.6
728            pass
729
730        if dk_size <= 0xFF:
731            offset = dk_size
732        elif dk_size <= 0xFFFF:
733            offset = 2 * dk_size
734        elif dk_size <= 0xFFFFFFFF:
735            offset = 4 * dk_size
736        else:
737            offset = 8 * dk_size
738
739        ent_addr = keys['dk_indices'].address
740        ent_addr = ent_addr.cast(_type_unsigned_char_ptr()) + offset
741        ent_ptr_t = gdb.lookup_type('PyDictKeyEntry').pointer()
742        ent_addr = ent_addr.cast(ent_ptr_t)
743
744        return ent_addr, dk_nentries
745
746
747class PyListObjectPtr(PyObjectPtr):
748    _typename = 'PyListObject'
749
750    def __getitem__(self, i):
751        # Get the gdb.Value for the (PyObject*) with the given index:
752        field_ob_item = self.field('ob_item')
753        return field_ob_item[i]
754
755    def proxyval(self, visited):
756        # Guard against infinite loops:
757        if self.as_address() in visited:
758            return ProxyAlreadyVisited('[...]')
759        visited.add(self.as_address())
760
761        result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
762                  for i in safe_range(int_from_int(self.field('ob_size')))]
763        return result
764
765    def write_repr(self, out, visited):
766        # Guard against infinite loops:
767        if self.as_address() in visited:
768            out.write('[...]')
769            return
770        visited.add(self.as_address())
771
772        out.write('[')
773        for i in safe_range(int_from_int(self.field('ob_size'))):
774            if i > 0:
775                out.write(', ')
776            element = PyObjectPtr.from_pyobject_ptr(self[i])
777            element.write_repr(out, visited)
778        out.write(']')
779
780class PyLongObjectPtr(PyObjectPtr):
781    _typename = 'PyLongObject'
782
783    def proxyval(self, visited):
784        '''
785        Python's Include/longobjrep.h has this declaration:
786           struct _longobject {
787               PyObject_VAR_HEAD
788               digit ob_digit[1];
789           };
790
791        with this description:
792            The absolute value of a number is equal to
793                 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
794            Negative numbers are represented with ob_size < 0;
795            zero is represented by ob_size == 0.
796
797        where SHIFT can be either:
798            #define PyLong_SHIFT        30
799            #define PyLong_SHIFT        15
800        '''
801        ob_size = long(self.field('ob_size'))
802        if ob_size == 0:
803            return 0
804
805        ob_digit = self.field('ob_digit')
806
807        if gdb.lookup_type('digit').sizeof == 2:
808            SHIFT = 15
809        else:
810            SHIFT = 30
811
812        digits = [long(ob_digit[i]) * 2**(SHIFT*i)
813                  for i in safe_range(abs(ob_size))]
814        result = sum(digits)
815        if ob_size < 0:
816            result = -result
817        return result
818
819    def write_repr(self, out, visited):
820        # Write this out as a Python 3 int literal, i.e. without the "L" suffix
821        proxy = self.proxyval(visited)
822        out.write("%s" % proxy)
823
824
825class PyBoolObjectPtr(PyLongObjectPtr):
826    """
827    Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
828    <bool> instances (Py_True/Py_False) within the process being debugged.
829    """
830    def proxyval(self, visited):
831        if PyLongObjectPtr.proxyval(self, visited):
832            return True
833        else:
834            return False
835
836class PyNoneStructPtr(PyObjectPtr):
837    """
838    Class wrapping a gdb.Value that's a PyObject* pointing to the
839    singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type
840    """
841    _typename = 'PyObject'
842
843    def proxyval(self, visited):
844        return None
845
846
847class PyFrameObjectPtr(PyObjectPtr):
848    _typename = 'PyFrameObject'
849
850    def __init__(self, gdbval, cast_to=None):
851        PyObjectPtr.__init__(self, gdbval, cast_to)
852
853        if not self.is_optimized_out():
854            self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code'))
855            self.co_name = self.co.pyop_field('co_name')
856            self.co_filename = self.co.pyop_field('co_filename')
857
858            self.f_lineno = int_from_int(self.field('f_lineno'))
859            self.f_lasti = int_from_int(self.field('f_lasti'))
860            self.co_nlocals = int_from_int(self.co.field('co_nlocals'))
861            self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames'))
862
863    def iter_locals(self):
864        '''
865        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
866        the local variables of this frame
867        '''
868        if self.is_optimized_out():
869            return
870
871        f_localsplus = self.field('f_localsplus')
872        for i in safe_range(self.co_nlocals):
873            pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i])
874            if not pyop_value.is_null():
875                pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i])
876                yield (pyop_name, pyop_value)
877
878    def iter_globals(self):
879        '''
880        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
881        the global variables of this frame
882        '''
883        if self.is_optimized_out():
884            return ()
885
886        pyop_globals = self.pyop_field('f_globals')
887        return pyop_globals.iteritems()
888
889    def iter_builtins(self):
890        '''
891        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
892        the builtin variables
893        '''
894        if self.is_optimized_out():
895            return ()
896
897        pyop_builtins = self.pyop_field('f_builtins')
898        return pyop_builtins.iteritems()
899
900    def get_var_by_name(self, name):
901        '''
902        Look for the named local variable, returning a (PyObjectPtr, scope) pair
903        where scope is a string 'local', 'global', 'builtin'
904
905        If not found, return (None, None)
906        '''
907        for pyop_name, pyop_value in self.iter_locals():
908            if name == pyop_name.proxyval(set()):
909                return pyop_value, 'local'
910        for pyop_name, pyop_value in self.iter_globals():
911            if name == pyop_name.proxyval(set()):
912                return pyop_value, 'global'
913        for pyop_name, pyop_value in self.iter_builtins():
914            if name == pyop_name.proxyval(set()):
915                return pyop_value, 'builtin'
916        return None, None
917
918    def filename(self):
919        '''Get the path of the current Python source file, as a string'''
920        if self.is_optimized_out():
921            return '(frame information optimized out)'
922        return self.co_filename.proxyval(set())
923
924    def current_line_num(self):
925        '''Get current line number as an integer (1-based)
926
927        Translated from PyFrame_GetLineNumber and PyCode_Addr2Line
928
929        See Objects/lnotab_notes.txt
930        '''
931        if self.is_optimized_out():
932            return None
933        f_trace = self.field('f_trace')
934        if long(f_trace) != 0:
935            # we have a non-NULL f_trace:
936            return self.f_lineno
937
938        try:
939            return self.co.addr2line(self.f_lasti)
940        except Exception:
941            # bpo-34989: addr2line() is a complex function, it can fail in many
942            # ways. For example, it fails with a TypeError on "FakeRepr" if
943            # gdb fails to load debug symbols. Use a catch-all "except
944            # Exception" to make the whole function safe. The caller has to
945            # handle None anyway for optimized Python.
946            return None
947
948    def current_line(self):
949        '''Get the text of the current source line as a string, with a trailing
950        newline character'''
951        if self.is_optimized_out():
952            return '(frame information optimized out)'
953
954        lineno = self.current_line_num()
955        if lineno is None:
956            return '(failed to get frame line number)'
957
958        filename = self.filename()
959        try:
960            with open(os_fsencode(filename), 'r') as fp:
961                lines = fp.readlines()
962        except IOError:
963            return None
964
965        try:
966            # Convert from 1-based current_line_num to 0-based list offset
967            return lines[lineno - 1]
968        except IndexError:
969            return None
970
971    def write_repr(self, out, visited):
972        if self.is_optimized_out():
973            out.write('(frame information optimized out)')
974            return
975        lineno = self.current_line_num()
976        lineno = str(lineno) if lineno is not None else "?"
977        out.write('Frame 0x%x, for file %s, line %s, in %s ('
978                  % (self.as_address(),
979                     self.co_filename.proxyval(visited),
980                     lineno,
981                     self.co_name.proxyval(visited)))
982        first = True
983        for pyop_name, pyop_value in self.iter_locals():
984            if not first:
985                out.write(', ')
986            first = False
987
988            out.write(pyop_name.proxyval(visited))
989            out.write('=')
990            pyop_value.write_repr(out, visited)
991
992        out.write(')')
993
994    def print_traceback(self):
995        if self.is_optimized_out():
996            sys.stdout.write('  (frame information optimized out)\n')
997            return
998        visited = set()
999        lineno = self.current_line_num()
1000        lineno = str(lineno) if lineno is not None else "?"
1001        sys.stdout.write('  File "%s", line %s, in %s\n'
1002                  % (self.co_filename.proxyval(visited),
1003                     lineno,
1004                     self.co_name.proxyval(visited)))
1005
1006class PySetObjectPtr(PyObjectPtr):
1007    _typename = 'PySetObject'
1008
1009    @classmethod
1010    def _dummy_key(self):
1011        return gdb.lookup_global_symbol('_PySet_Dummy').value()
1012
1013    def __iter__(self):
1014        dummy_ptr = self._dummy_key()
1015        table = self.field('table')
1016        for i in safe_range(self.field('mask') + 1):
1017            setentry = table[i]
1018            key = setentry['key']
1019            if key != 0 and key != dummy_ptr:
1020                yield PyObjectPtr.from_pyobject_ptr(key)
1021
1022    def proxyval(self, visited):
1023        # Guard against infinite loops:
1024        if self.as_address() in visited:
1025            return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name())
1026        visited.add(self.as_address())
1027
1028        members = (key.proxyval(visited) for key in self)
1029        if self.safe_tp_name() == 'frozenset':
1030            return frozenset(members)
1031        else:
1032            return set(members)
1033
1034    def write_repr(self, out, visited):
1035        # Emulate Python 3's set_repr
1036        tp_name = self.safe_tp_name()
1037
1038        # Guard against infinite loops:
1039        if self.as_address() in visited:
1040            out.write('(...)')
1041            return
1042        visited.add(self.as_address())
1043
1044        # Python 3's set_repr special-cases the empty set:
1045        if not self.field('used'):
1046            out.write(tp_name)
1047            out.write('()')
1048            return
1049
1050        # Python 3 uses {} for set literals:
1051        if tp_name != 'set':
1052            out.write(tp_name)
1053            out.write('(')
1054
1055        out.write('{')
1056        first = True
1057        for key in self:
1058            if not first:
1059                out.write(', ')
1060            first = False
1061            key.write_repr(out, visited)
1062        out.write('}')
1063
1064        if tp_name != 'set':
1065            out.write(')')
1066
1067
1068class PyBytesObjectPtr(PyObjectPtr):
1069    _typename = 'PyBytesObject'
1070
1071    def __str__(self):
1072        field_ob_size = self.field('ob_size')
1073        field_ob_sval = self.field('ob_sval')
1074        char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr())
1075        return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)])
1076
1077    def proxyval(self, visited):
1078        return str(self)
1079
1080    def write_repr(self, out, visited):
1081        # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix
1082
1083        # Get a PyStringObject* within the Python 2 gdb process:
1084        proxy = self.proxyval(visited)
1085
1086        # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr
1087        # to Python 2 code:
1088        quote = "'"
1089        if "'" in proxy and not '"' in proxy:
1090            quote = '"'
1091        out.write('b')
1092        out.write(quote)
1093        for byte in proxy:
1094            if byte == quote or byte == '\\':
1095                out.write('\\')
1096                out.write(byte)
1097            elif byte == '\t':
1098                out.write('\\t')
1099            elif byte == '\n':
1100                out.write('\\n')
1101            elif byte == '\r':
1102                out.write('\\r')
1103            elif byte < ' ' or ord(byte) >= 0x7f:
1104                out.write('\\x')
1105                out.write(hexdigits[(ord(byte) & 0xf0) >> 4])
1106                out.write(hexdigits[ord(byte) & 0xf])
1107            else:
1108                out.write(byte)
1109        out.write(quote)
1110
1111class PyTupleObjectPtr(PyObjectPtr):
1112    _typename = 'PyTupleObject'
1113
1114    def __getitem__(self, i):
1115        # Get the gdb.Value for the (PyObject*) with the given index:
1116        field_ob_item = self.field('ob_item')
1117        return field_ob_item[i]
1118
1119    def proxyval(self, visited):
1120        # Guard against infinite loops:
1121        if self.as_address() in visited:
1122            return ProxyAlreadyVisited('(...)')
1123        visited.add(self.as_address())
1124
1125        result = tuple(PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
1126                       for i in safe_range(int_from_int(self.field('ob_size'))))
1127        return result
1128
1129    def write_repr(self, out, visited):
1130        # Guard against infinite loops:
1131        if self.as_address() in visited:
1132            out.write('(...)')
1133            return
1134        visited.add(self.as_address())
1135
1136        out.write('(')
1137        for i in safe_range(int_from_int(self.field('ob_size'))):
1138            if i > 0:
1139                out.write(', ')
1140            element = PyObjectPtr.from_pyobject_ptr(self[i])
1141            element.write_repr(out, visited)
1142        if self.field('ob_size') == 1:
1143            out.write(',)')
1144        else:
1145            out.write(')')
1146
1147class PyTypeObjectPtr(PyObjectPtr):
1148    _typename = 'PyTypeObject'
1149
1150
1151def _unichr_is_printable(char):
1152    # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py
1153    if char == u" ":
1154        return True
1155    import unicodedata
1156    return unicodedata.category(char) not in ("C", "Z")
1157
1158if sys.maxunicode >= 0x10000:
1159    _unichr = unichr
1160else:
1161    # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb
1162    def _unichr(x):
1163        if x < 0x10000:
1164            return unichr(x)
1165        x -= 0x10000
1166        ch1 = 0xD800 | (x >> 10)
1167        ch2 = 0xDC00 | (x & 0x3FF)
1168        return unichr(ch1) + unichr(ch2)
1169
1170
1171class PyUnicodeObjectPtr(PyObjectPtr):
1172    _typename = 'PyUnicodeObject'
1173
1174    def char_width(self):
1175        _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE')
1176        return _type_Py_UNICODE.sizeof
1177
1178    def proxyval(self, visited):
1179        global _is_pep393
1180        if _is_pep393 is None:
1181            fields = gdb.lookup_type('PyUnicodeObject').fields()
1182            _is_pep393 = 'data' in [f.name for f in fields]
1183        if _is_pep393:
1184            # Python 3.3 and newer
1185            may_have_surrogates = False
1186            compact = self.field('_base')
1187            ascii = compact['_base']
1188            state = ascii['state']
1189            is_compact_ascii = (int(state['ascii']) and int(state['compact']))
1190            if not int(state['ready']):
1191                # string is not ready
1192                field_length = long(compact['wstr_length'])
1193                may_have_surrogates = True
1194                field_str = ascii['wstr']
1195            else:
1196                field_length = long(ascii['length'])
1197                if is_compact_ascii:
1198                    field_str = ascii.address + 1
1199                elif int(state['compact']):
1200                    field_str = compact.address + 1
1201                else:
1202                    field_str = self.field('data')['any']
1203                repr_kind = int(state['kind'])
1204                if repr_kind == 1:
1205                    field_str = field_str.cast(_type_unsigned_char_ptr())
1206                elif repr_kind == 2:
1207                    field_str = field_str.cast(_type_unsigned_short_ptr())
1208                elif repr_kind == 4:
1209                    field_str = field_str.cast(_type_unsigned_int_ptr())
1210        else:
1211            # Python 3.2 and earlier
1212            field_length = long(self.field('length'))
1213            field_str = self.field('str')
1214            may_have_surrogates = self.char_width() == 2
1215
1216        # Gather a list of ints from the Py_UNICODE array; these are either
1217        # UCS-1, UCS-2 or UCS-4 code points:
1218        if not may_have_surrogates:
1219            Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
1220        else:
1221            # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
1222            # inferior process: we must join surrogate pairs.
1223            Py_UNICODEs = []
1224            i = 0
1225            limit = safety_limit(field_length)
1226            while i < limit:
1227                ucs = int(field_str[i])
1228                i += 1
1229                if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length:
1230                    Py_UNICODEs.append(ucs)
1231                    continue
1232                # This could be a surrogate pair.
1233                ucs2 = int(field_str[i])
1234                if ucs2 < 0xDC00 or ucs2 > 0xDFFF:
1235                    continue
1236                code = (ucs & 0x03FF) << 10
1237                code |= ucs2 & 0x03FF
1238                code += 0x00010000
1239                Py_UNICODEs.append(code)
1240                i += 1
1241
1242        # Convert the int code points to unicode characters, and generate a
1243        # local unicode instance.
1244        # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb).
1245        result = u''.join([
1246            (_unichr(ucs) if ucs <= 0x10ffff else '\ufffd')
1247            for ucs in Py_UNICODEs])
1248        return result
1249
1250    def write_repr(self, out, visited):
1251        # Write this out as a Python 3 str literal, i.e. without a "u" prefix
1252
1253        # Get a PyUnicodeObject* within the Python 2 gdb process:
1254        proxy = self.proxyval(visited)
1255
1256        # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr
1257        # to Python 2:
1258        if "'" in proxy and '"' not in proxy:
1259            quote = '"'
1260        else:
1261            quote = "'"
1262        out.write(quote)
1263
1264        i = 0
1265        while i < len(proxy):
1266            ch = proxy[i]
1267            i += 1
1268
1269            # Escape quotes and backslashes
1270            if ch == quote or ch == '\\':
1271                out.write('\\')
1272                out.write(ch)
1273
1274            #  Map special whitespace to '\t', \n', '\r'
1275            elif ch == '\t':
1276                out.write('\\t')
1277            elif ch == '\n':
1278                out.write('\\n')
1279            elif ch == '\r':
1280                out.write('\\r')
1281
1282            # Map non-printable US ASCII to '\xhh' */
1283            elif ch < ' ' or ch == 0x7F:
1284                out.write('\\x')
1285                out.write(hexdigits[(ord(ch) >> 4) & 0x000F])
1286                out.write(hexdigits[ord(ch) & 0x000F])
1287
1288            # Copy ASCII characters as-is
1289            elif ord(ch) < 0x7F:
1290                out.write(ch)
1291
1292            # Non-ASCII characters
1293            else:
1294                ucs = ch
1295                ch2 = None
1296                if sys.maxunicode < 0x10000:
1297                    # If sizeof(Py_UNICODE) is 2 here (in gdb), join
1298                    # surrogate pairs before calling _unichr_is_printable.
1299                    if (i < len(proxy)
1300                    and 0xD800 <= ord(ch) < 0xDC00 \
1301                    and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
1302                        ch2 = proxy[i]
1303                        ucs = ch + ch2
1304                        i += 1
1305
1306                # Unfortuately, Python 2's unicode type doesn't seem
1307                # to expose the "isprintable" method
1308                printable = _unichr_is_printable(ucs)
1309                if printable:
1310                    try:
1311                        ucs.encode(ENCODING)
1312                    except UnicodeEncodeError:
1313                        printable = False
1314
1315                # Map Unicode whitespace and control characters
1316                # (categories Z* and C* except ASCII space)
1317                if not printable:
1318                    if ch2 is not None:
1319                        # Match Python 3's representation of non-printable
1320                        # wide characters.
1321                        code = (ord(ch) & 0x03FF) << 10
1322                        code |= ord(ch2) & 0x03FF
1323                        code += 0x00010000
1324                    else:
1325                        code = ord(ucs)
1326
1327                    # Map 8-bit characters to '\\xhh'
1328                    if code <= 0xff:
1329                        out.write('\\x')
1330                        out.write(hexdigits[(code >> 4) & 0x000F])
1331                        out.write(hexdigits[code & 0x000F])
1332                    # Map 21-bit characters to '\U00xxxxxx'
1333                    elif code >= 0x10000:
1334                        out.write('\\U')
1335                        out.write(hexdigits[(code >> 28) & 0x0000000F])
1336                        out.write(hexdigits[(code >> 24) & 0x0000000F])
1337                        out.write(hexdigits[(code >> 20) & 0x0000000F])
1338                        out.write(hexdigits[(code >> 16) & 0x0000000F])
1339                        out.write(hexdigits[(code >> 12) & 0x0000000F])
1340                        out.write(hexdigits[(code >> 8) & 0x0000000F])
1341                        out.write(hexdigits[(code >> 4) & 0x0000000F])
1342                        out.write(hexdigits[code & 0x0000000F])
1343                    # Map 16-bit characters to '\uxxxx'
1344                    else:
1345                        out.write('\\u')
1346                        out.write(hexdigits[(code >> 12) & 0x000F])
1347                        out.write(hexdigits[(code >> 8) & 0x000F])
1348                        out.write(hexdigits[(code >> 4) & 0x000F])
1349                        out.write(hexdigits[code & 0x000F])
1350                else:
1351                    # Copy characters as-is
1352                    out.write(ch)
1353                    if ch2 is not None:
1354                        out.write(ch2)
1355
1356        out.write(quote)
1357
1358
1359class wrapperobject(PyObjectPtr):
1360    _typename = 'wrapperobject'
1361
1362    def safe_name(self):
1363        try:
1364            name = self.field('descr')['d_base']['name'].string()
1365            return repr(name)
1366        except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError):
1367            return '<unknown name>'
1368
1369    def safe_tp_name(self):
1370        try:
1371            return self.field('self')['ob_type']['tp_name'].string()
1372        except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError):
1373            return '<unknown tp_name>'
1374
1375    def safe_self_addresss(self):
1376        try:
1377            address = long(self.field('self'))
1378            return '%#x' % address
1379        except (NullPyObjectPtr, RuntimeError):
1380            return '<failed to get self address>'
1381
1382    def proxyval(self, visited):
1383        name = self.safe_name()
1384        tp_name = self.safe_tp_name()
1385        self_address = self.safe_self_addresss()
1386        return ("<method-wrapper %s of %s object at %s>"
1387                % (name, tp_name, self_address))
1388
1389    def write_repr(self, out, visited):
1390        proxy = self.proxyval(visited)
1391        out.write(proxy)
1392
1393
1394def int_from_int(gdbval):
1395    return int(str(gdbval))
1396
1397
1398def stringify(val):
1399    # TODO: repr() puts everything on one line; pformat can be nicer, but
1400    # can lead to v.long results; this function isolates the choice
1401    if True:
1402        return repr(val)
1403    else:
1404        from pprint import pformat
1405        return pformat(val)
1406
1407
1408class PyObjectPtrPrinter:
1409    "Prints a (PyObject*)"
1410
1411    def __init__ (self, gdbval):
1412        self.gdbval = gdbval
1413
1414    def to_string (self):
1415        pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval)
1416        if True:
1417            return pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1418        else:
1419            # Generate full proxy value then stringify it.
1420            # Doing so could be expensive
1421            proxyval = pyop.proxyval(set())
1422            return stringify(proxyval)
1423
1424def pretty_printer_lookup(gdbval):
1425    type = gdbval.type.unqualified()
1426    if type.code != gdb.TYPE_CODE_PTR:
1427        return None
1428
1429    type = type.target().unqualified()
1430    t = str(type)
1431    if t in ("PyObject", "PyFrameObject", "PyUnicodeObject", "wrapperobject"):
1432        return PyObjectPtrPrinter(gdbval)
1433
1434"""
1435During development, I've been manually invoking the code in this way:
1436(gdb) python
1437
1438import sys
1439sys.path.append('/home/david/coding/python-gdb')
1440import libpython
1441end
1442
1443then reloading it after each edit like this:
1444(gdb) python reload(libpython)
1445
1446The following code should ensure that the prettyprinter is registered
1447if the code is autoloaded by gdb when visiting libpython.so, provided
1448that this python file is installed to the same path as the library (or its
1449.debug file) plus a "-gdb.py" suffix, e.g:
1450  /usr/lib/libpython2.6.so.1.0-gdb.py
1451  /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py
1452"""
1453def register (obj):
1454    if obj is None:
1455        obj = gdb
1456
1457    # Wire up the pretty-printer
1458    obj.pretty_printers.append(pretty_printer_lookup)
1459
1460register (gdb.current_objfile ())
1461
1462
1463
1464# Unfortunately, the exact API exposed by the gdb module varies somewhat
1465# from build to build
1466# See http://bugs.python.org/issue8279?#msg102276
1467
1468class Frame(object):
1469    '''
1470    Wrapper for gdb.Frame, adding various methods
1471    '''
1472    def __init__(self, gdbframe):
1473        self._gdbframe = gdbframe
1474
1475    def older(self):
1476        older = self._gdbframe.older()
1477        if older:
1478            return Frame(older)
1479        else:
1480            return None
1481
1482    def newer(self):
1483        newer = self._gdbframe.newer()
1484        if newer:
1485            return Frame(newer)
1486        else:
1487            return None
1488
1489    def select(self):
1490        '''If supported, select this frame and return True; return False if unsupported
1491
1492        Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12
1493        onwards, but absent on Ubuntu buildbot'''
1494        if not hasattr(self._gdbframe, 'select'):
1495            print ('Unable to select frame: '
1496                   'this build of gdb does not expose a gdb.Frame.select method')
1497            return False
1498        self._gdbframe.select()
1499        return True
1500
1501    def get_index(self):
1502        '''Calculate index of frame, starting at 0 for the newest frame within
1503        this thread'''
1504        index = 0
1505        # Go down until you reach the newest frame:
1506        iter_frame = self
1507        while iter_frame.newer():
1508            index += 1
1509            iter_frame = iter_frame.newer()
1510        return index
1511
1512    # We divide frames into:
1513    #   - "python frames":
1514    #       - "bytecode frames" i.e. PyEval_EvalFrameEx
1515    #       - "other python frames": things that are of interest from a python
1516    #         POV, but aren't bytecode (e.g. GC, GIL)
1517    #   - everything else
1518
1519    def is_python_frame(self):
1520        '''Is this a _PyEval_EvalFrameDefault frame, or some other important
1521        frame? (see is_other_python_frame for what "important" means in this
1522        context)'''
1523        if self.is_evalframe():
1524            return True
1525        if self.is_other_python_frame():
1526            return True
1527        return False
1528
1529    def is_evalframe(self):
1530        '''Is this a _PyEval_EvalFrameDefault frame?'''
1531        if self._gdbframe.name() == EVALFRAME:
1532            '''
1533            I believe we also need to filter on the inline
1534            struct frame_id.inline_depth, only regarding frames with
1535            an inline depth of 0 as actually being this function
1536
1537            So we reject those with type gdb.INLINE_FRAME
1538            '''
1539            if self._gdbframe.type() == gdb.NORMAL_FRAME:
1540                # We have a _PyEval_EvalFrameDefault frame:
1541                return True
1542
1543        return False
1544
1545    def is_other_python_frame(self):
1546        '''Is this frame worth displaying in python backtraces?
1547        Examples:
1548          - waiting on the GIL
1549          - garbage-collecting
1550          - within a CFunction
1551         If it is, return a descriptive string
1552         For other frames, return False
1553         '''
1554        if self.is_waiting_for_gil():
1555            return 'Waiting for the GIL'
1556
1557        if self.is_gc_collect():
1558            return 'Garbage-collecting'
1559
1560        # Detect invocations of PyCFunction instances:
1561        frame = self._gdbframe
1562        caller = frame.name()
1563        if not caller:
1564            return False
1565
1566        if caller in ('_PyCFunction_FastCallDict',
1567                      '_PyCFunction_FastCallKeywords'):
1568            arg_name = 'func'
1569            # Within that frame:
1570            #   "func" is the local containing the PyObject* of the
1571            # PyCFunctionObject instance
1572            #   "f" is the same value, but cast to (PyCFunctionObject*)
1573            #   "self" is the (PyObject*) of the 'self'
1574            try:
1575                # Use the prettyprinter for the func:
1576                func = frame.read_var(arg_name)
1577                return str(func)
1578            except ValueError:
1579                return ('PyCFunction invocation (unable to read %s: '
1580                        'missing debuginfos?)' % arg_name)
1581            except RuntimeError:
1582                return 'PyCFunction invocation (unable to read %s)' % arg_name
1583
1584        if caller == 'wrapper_call':
1585            arg_name = 'wp'
1586            try:
1587                func = frame.read_var(arg_name)
1588                return str(func)
1589            except ValueError:
1590                return ('<wrapper_call invocation (unable to read %s: '
1591                        'missing debuginfos?)>' % arg_name)
1592            except RuntimeError:
1593                return '<wrapper_call invocation (unable to read %s)>' % arg_name
1594
1595        # This frame isn't worth reporting:
1596        return False
1597
1598    def is_waiting_for_gil(self):
1599        '''Is this frame waiting on the GIL?'''
1600        # This assumes the _POSIX_THREADS version of Python/ceval_gil.h:
1601        name = self._gdbframe.name()
1602        if name:
1603            return 'pthread_cond_timedwait' in name
1604
1605    def is_gc_collect(self):
1606        '''Is this frame "collect" within the garbage-collector?'''
1607        return self._gdbframe.name() == 'collect'
1608
1609    def get_pyop(self):
1610        try:
1611            f = self._gdbframe.read_var('f')
1612            frame = PyFrameObjectPtr.from_pyobject_ptr(f)
1613            if not frame.is_optimized_out():
1614                return frame
1615            # gdb is unable to get the "f" argument of PyEval_EvalFrameEx()
1616            # because it was "optimized out". Try to get "f" from the frame
1617            # of the caller, PyEval_EvalCodeEx().
1618            orig_frame = frame
1619            caller = self._gdbframe.older()
1620            if caller:
1621                f = caller.read_var('f')
1622                frame = PyFrameObjectPtr.from_pyobject_ptr(f)
1623                if not frame.is_optimized_out():
1624                    return frame
1625            return orig_frame
1626        except ValueError:
1627            return None
1628
1629    @classmethod
1630    def get_selected_frame(cls):
1631        _gdbframe = gdb.selected_frame()
1632        if _gdbframe:
1633            return Frame(_gdbframe)
1634        return None
1635
1636    @classmethod
1637    def get_selected_python_frame(cls):
1638        '''Try to obtain the Frame for the python-related code in the selected
1639        frame, or None'''
1640        try:
1641            frame = cls.get_selected_frame()
1642        except gdb.error:
1643            # No frame: Python didn't start yet
1644            return None
1645
1646        while frame:
1647            if frame.is_python_frame():
1648                return frame
1649            frame = frame.older()
1650
1651        # Not found:
1652        return None
1653
1654    @classmethod
1655    def get_selected_bytecode_frame(cls):
1656        '''Try to obtain the Frame for the python bytecode interpreter in the
1657        selected GDB frame, or None'''
1658        frame = cls.get_selected_frame()
1659
1660        while frame:
1661            if frame.is_evalframe():
1662                return frame
1663            frame = frame.older()
1664
1665        # Not found:
1666        return None
1667
1668    def print_summary(self):
1669        if self.is_evalframe():
1670            pyop = self.get_pyop()
1671            if pyop:
1672                line = pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1673                write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line))
1674                if not pyop.is_optimized_out():
1675                    line = pyop.current_line()
1676                    if line is not None:
1677                        sys.stdout.write('    %s\n' % line.strip())
1678            else:
1679                sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
1680        else:
1681            info = self.is_other_python_frame()
1682            if info:
1683                sys.stdout.write('#%i %s\n' % (self.get_index(), info))
1684            else:
1685                sys.stdout.write('#%i\n' % self.get_index())
1686
1687    def print_traceback(self):
1688        if self.is_evalframe():
1689            pyop = self.get_pyop()
1690            if pyop:
1691                pyop.print_traceback()
1692                if not pyop.is_optimized_out():
1693                    line = pyop.current_line()
1694                    if line is not None:
1695                        sys.stdout.write('    %s\n' % line.strip())
1696            else:
1697                sys.stdout.write('  (unable to read python frame information)\n')
1698        else:
1699            info = self.is_other_python_frame()
1700            if info:
1701                sys.stdout.write('  %s\n' % info)
1702            else:
1703                sys.stdout.write('  (not a python frame)\n')
1704
1705class PyList(gdb.Command):
1706    '''List the current Python source code, if any
1707
1708    Use
1709       py-list START
1710    to list at a different line number within the python source.
1711
1712    Use
1713       py-list START, END
1714    to list a specific range of lines within the python source.
1715    '''
1716
1717    def __init__(self):
1718        gdb.Command.__init__ (self,
1719                              "py-list",
1720                              gdb.COMMAND_FILES,
1721                              gdb.COMPLETE_NONE)
1722
1723
1724    def invoke(self, args, from_tty):
1725        import re
1726
1727        start = None
1728        end = None
1729
1730        m = re.match(r'\s*(\d+)\s*', args)
1731        if m:
1732            start = int(m.group(0))
1733            end = start + 10
1734
1735        m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args)
1736        if m:
1737            start, end = map(int, m.groups())
1738
1739        # py-list requires an actual PyEval_EvalFrameEx frame:
1740        frame = Frame.get_selected_bytecode_frame()
1741        if not frame:
1742            print('Unable to locate gdb frame for python bytecode interpreter')
1743            return
1744
1745        pyop = frame.get_pyop()
1746        if not pyop or pyop.is_optimized_out():
1747            print('Unable to read information on python frame')
1748            return
1749
1750        filename = pyop.filename()
1751        lineno = pyop.current_line_num()
1752        if lineno is None:
1753            print('Unable to read python frame line number')
1754            return
1755
1756        if start is None:
1757            start = lineno - 5
1758            end = lineno + 5
1759
1760        if start<1:
1761            start = 1
1762
1763        try:
1764            f = open(os_fsencode(filename), 'r')
1765        except IOError as err:
1766            sys.stdout.write('Unable to open %s: %s\n'
1767                             % (filename, err))
1768            return
1769        with f:
1770            all_lines = f.readlines()
1771            # start and end are 1-based, all_lines is 0-based;
1772            # so [start-1:end] as a python slice gives us [start, end] as a
1773            # closed interval
1774            for i, line in enumerate(all_lines[start-1:end]):
1775                linestr = str(i+start)
1776                # Highlight current line:
1777                if i + start == lineno:
1778                    linestr = '>' + linestr
1779                sys.stdout.write('%4s    %s' % (linestr, line))
1780
1781
1782# ...and register the command:
1783PyList()
1784
1785def move_in_stack(move_up):
1786    '''Move up or down the stack (for the py-up/py-down command)'''
1787    frame = Frame.get_selected_python_frame()
1788    if not frame:
1789        print('Unable to locate python frame')
1790        return
1791
1792    while frame:
1793        if move_up:
1794            iter_frame = frame.older()
1795        else:
1796            iter_frame = frame.newer()
1797
1798        if not iter_frame:
1799            break
1800
1801        if iter_frame.is_python_frame():
1802            # Result:
1803            if iter_frame.select():
1804                iter_frame.print_summary()
1805            return
1806
1807        frame = iter_frame
1808
1809    if move_up:
1810        print('Unable to find an older python frame')
1811    else:
1812        print('Unable to find a newer python frame')
1813
1814class PyUp(gdb.Command):
1815    'Select and print the python stack frame that called this one (if any)'
1816    def __init__(self):
1817        gdb.Command.__init__ (self,
1818                              "py-up",
1819                              gdb.COMMAND_STACK,
1820                              gdb.COMPLETE_NONE)
1821
1822
1823    def invoke(self, args, from_tty):
1824        move_in_stack(move_up=True)
1825
1826class PyDown(gdb.Command):
1827    'Select and print the python stack frame called by this one (if any)'
1828    def __init__(self):
1829        gdb.Command.__init__ (self,
1830                              "py-down",
1831                              gdb.COMMAND_STACK,
1832                              gdb.COMPLETE_NONE)
1833
1834
1835    def invoke(self, args, from_tty):
1836        move_in_stack(move_up=False)
1837
1838# Not all builds of gdb have gdb.Frame.select
1839if hasattr(gdb.Frame, 'select'):
1840    PyUp()
1841    PyDown()
1842
1843class PyBacktraceFull(gdb.Command):
1844    'Display the current python frame and all the frames within its call stack (if any)'
1845    def __init__(self):
1846        gdb.Command.__init__ (self,
1847                              "py-bt-full",
1848                              gdb.COMMAND_STACK,
1849                              gdb.COMPLETE_NONE)
1850
1851
1852    def invoke(self, args, from_tty):
1853        frame = Frame.get_selected_python_frame()
1854        if not frame:
1855            print('Unable to locate python frame')
1856            return
1857
1858        while frame:
1859            if frame.is_python_frame():
1860                frame.print_summary()
1861            frame = frame.older()
1862
1863PyBacktraceFull()
1864
1865class PyBacktrace(gdb.Command):
1866    'Display the current python frame and all the frames within its call stack (if any)'
1867    def __init__(self):
1868        gdb.Command.__init__ (self,
1869                              "py-bt",
1870                              gdb.COMMAND_STACK,
1871                              gdb.COMPLETE_NONE)
1872
1873
1874    def invoke(self, args, from_tty):
1875        frame = Frame.get_selected_python_frame()
1876        if not frame:
1877            print('Unable to locate python frame')
1878            return
1879
1880        sys.stdout.write('Traceback (most recent call first):\n')
1881        while frame:
1882            if frame.is_python_frame():
1883                frame.print_traceback()
1884            frame = frame.older()
1885
1886PyBacktrace()
1887
1888class PyPrint(gdb.Command):
1889    'Look up the given python variable name, and print it'
1890    def __init__(self):
1891        gdb.Command.__init__ (self,
1892                              "py-print",
1893                              gdb.COMMAND_DATA,
1894                              gdb.COMPLETE_NONE)
1895
1896
1897    def invoke(self, args, from_tty):
1898        name = str(args)
1899
1900        frame = Frame.get_selected_python_frame()
1901        if not frame:
1902            print('Unable to locate python frame')
1903            return
1904
1905        pyop_frame = frame.get_pyop()
1906        if not pyop_frame:
1907            print('Unable to read information on python frame')
1908            return
1909
1910        pyop_var, scope = pyop_frame.get_var_by_name(name)
1911
1912        if pyop_var:
1913            print('%s %r = %s'
1914                   % (scope,
1915                      name,
1916                      pyop_var.get_truncated_repr(MAX_OUTPUT_LEN)))
1917        else:
1918            print('%r not found' % name)
1919
1920PyPrint()
1921
1922class PyLocals(gdb.Command):
1923    'Look up the given python variable name, and print it'
1924    def __init__(self):
1925        gdb.Command.__init__ (self,
1926                              "py-locals",
1927                              gdb.COMMAND_DATA,
1928                              gdb.COMPLETE_NONE)
1929
1930
1931    def invoke(self, args, from_tty):
1932        name = str(args)
1933
1934        frame = Frame.get_selected_python_frame()
1935        if not frame:
1936            print('Unable to locate python frame')
1937            return
1938
1939        pyop_frame = frame.get_pyop()
1940        if not pyop_frame:
1941            print('Unable to read information on python frame')
1942            return
1943
1944        for pyop_name, pyop_value in pyop_frame.iter_locals():
1945            print('%s = %s'
1946                   % (pyop_name.proxyval(set()),
1947                      pyop_value.get_truncated_repr(MAX_OUTPUT_LEN)))
1948
1949PyLocals()
1950