1# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
2# For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt
3
4"""Raw data collector for coverage.py."""
5
6import os
7import sys
8
9from coverage import env
10from coverage.backward import iitems
11from coverage.files import abs_file
12from coverage.misc import CoverageException, isolate_module
13from coverage.pytracer import PyTracer
14
15os = isolate_module(os)
16
17
18try:
19    # Use the C extension code when we can, for speed.
20    from coverage.tracer import CTracer, CFileDisposition   # pylint: disable=no-name-in-module
21except ImportError:
22    # Couldn't import the C extension, maybe it isn't built.
23    if os.getenv('COVERAGE_TEST_TRACER') == 'c':
24        # During testing, we use the COVERAGE_TEST_TRACER environment variable
25        # to indicate that we've fiddled with the environment to test this
26        # fallback code.  If we thought we had a C tracer, but couldn't import
27        # it, then exit quickly and clearly instead of dribbling confusing
28        # errors. I'm using sys.exit here instead of an exception because an
29        # exception here causes all sorts of other noise in unittest.
30        sys.stderr.write("*** COVERAGE_TEST_TRACER is 'c' but can't import CTracer!\n")
31        sys.exit(1)
32    CTracer = None
33
34
35class FileDisposition(object):
36    """A simple value type for recording what to do with a file."""
37    pass
38
39
40class Collector(object):
41    """Collects trace data.
42
43    Creates a Tracer object for each thread, since they track stack
44    information.  Each Tracer points to the same shared data, contributing
45    traced data points.
46
47    When the Collector is started, it creates a Tracer for the current thread,
48    and installs a function to create Tracers for each new thread started.
49    When the Collector is stopped, all active Tracers are stopped.
50
51    Threads started while the Collector is stopped will never have Tracers
52    associated with them.
53
54    """
55
56    # The stack of active Collectors.  Collectors are added here when started,
57    # and popped when stopped.  Collectors on the stack are paused when not
58    # the top, and resumed when they become the top again.
59    _collectors = []
60
61    def __init__(self, should_trace, check_include, timid, branch, warn, concurrency):
62        """Create a collector.
63
64        `should_trace` is a function, taking a file name, and returning a
65        `coverage.FileDisposition object`.
66
67        `check_include` is a function taking a file name and a frame. It returns
68        a boolean: True if the file should be traced, False if not.
69
70        If `timid` is true, then a slower simpler trace function will be
71        used.  This is important for some environments where manipulation of
72        tracing functions make the faster more sophisticated trace function not
73        operate properly.
74
75        If `branch` is true, then branches will be measured.  This involves
76        collecting data on which statements followed each other (arcs).  Use
77        `get_arc_data` to get the arc data.
78
79        `warn` is a warning function, taking a single string message argument,
80        to be used if a warning needs to be issued.
81
82        `concurrency` is a string indicating the concurrency library in use.
83        Valid values are "greenlet", "eventlet", "gevent", or "thread" (the
84        default).
85
86        """
87        self.should_trace = should_trace
88        self.check_include = check_include
89        self.warn = warn
90        self.branch = branch
91        self.threading = None
92        self.concurrency = concurrency
93
94        self.concur_id_func = None
95
96        try:
97            if concurrency == "greenlet":
98                import greenlet
99                self.concur_id_func = greenlet.getcurrent
100            elif concurrency == "eventlet":
101                import eventlet.greenthread     # pylint: disable=import-error,useless-suppression
102                self.concur_id_func = eventlet.greenthread.getcurrent
103            elif concurrency == "gevent":
104                import gevent                   # pylint: disable=import-error,useless-suppression
105                self.concur_id_func = gevent.getcurrent
106            elif concurrency == "thread" or not concurrency:
107                # It's important to import threading only if we need it.  If
108                # it's imported early, and the program being measured uses
109                # gevent, then gevent's monkey-patching won't work properly.
110                import threading
111                self.threading = threading
112            else:
113                raise CoverageException("Don't understand concurrency=%s" % concurrency)
114        except ImportError:
115            raise CoverageException(
116                "Couldn't trace with concurrency=%s, the module isn't installed." % concurrency
117            )
118
119        self.reset()
120
121        if timid:
122            # Being timid: use the simple Python trace function.
123            self._trace_class = PyTracer
124        else:
125            # Being fast: use the C Tracer if it is available, else the Python
126            # trace function.
127            self._trace_class = CTracer or PyTracer
128
129        if self._trace_class is CTracer:
130            self.file_disposition_class = CFileDisposition
131            self.supports_plugins = True
132        else:
133            self.file_disposition_class = FileDisposition
134            self.supports_plugins = False
135
136    def __repr__(self):
137        return "<Collector at 0x%x: %s>" % (id(self), self.tracer_name())
138
139    def tracer_name(self):
140        """Return the class name of the tracer we're using."""
141        return self._trace_class.__name__
142
143    def reset(self):
144        """Clear collected data, and prepare to collect more."""
145        # A dictionary mapping file names to dicts with line number keys (if not
146        # branch coverage), or mapping file names to dicts with line number
147        # pairs as keys (if branch coverage).
148        self.data = {}
149
150        # A dictionary mapping file names to file tracer plugin names that will
151        # handle them.
152        self.file_tracers = {}
153
154        # The .should_trace_cache attribute is a cache from file names to
155        # coverage.FileDisposition objects, or None.  When a file is first
156        # considered for tracing, a FileDisposition is obtained from
157        # Coverage.should_trace.  Its .trace attribute indicates whether the
158        # file should be traced or not.  If it should be, a plugin with dynamic
159        # file names can decide not to trace it based on the dynamic file name
160        # being excluded by the inclusion rules, in which case the
161        # FileDisposition will be replaced by None in the cache.
162        if env.PYPY:
163            import __pypy__                     # pylint: disable=import-error
164            # Alex Gaynor said:
165            # should_trace_cache is a strictly growing key: once a key is in
166            # it, it never changes.  Further, the keys used to access it are
167            # generally constant, given sufficient context. That is to say, at
168            # any given point _trace() is called, pypy is able to know the key.
169            # This is because the key is determined by the physical source code
170            # line, and that's invariant with the call site.
171            #
172            # This property of a dict with immutable keys, combined with
173            # call-site-constant keys is a match for PyPy's module dict,
174            # which is optimized for such workloads.
175            #
176            # This gives a 20% benefit on the workload described at
177            # https://bitbucket.org/pypy/pypy/issue/1871/10x-slower-than-cpython-under-coverage
178            self.should_trace_cache = __pypy__.newdict("module")
179        else:
180            self.should_trace_cache = {}
181
182        # Our active Tracers.
183        self.tracers = []
184
185    def _start_tracer(self):
186        """Start a new Tracer object, and store it in self.tracers."""
187        tracer = self._trace_class()
188        tracer.data = self.data
189        tracer.trace_arcs = self.branch
190        tracer.should_trace = self.should_trace
191        tracer.should_trace_cache = self.should_trace_cache
192        tracer.warn = self.warn
193
194        if hasattr(tracer, 'concur_id_func'):
195            tracer.concur_id_func = self.concur_id_func
196        elif self.concur_id_func:
197            raise CoverageException(
198                "Can't support concurrency=%s with %s, only threads are supported" % (
199                    self.concurrency, self.tracer_name(),
200                )
201            )
202
203        if hasattr(tracer, 'file_tracers'):
204            tracer.file_tracers = self.file_tracers
205        if hasattr(tracer, 'threading'):
206            tracer.threading = self.threading
207        if hasattr(tracer, 'check_include'):
208            tracer.check_include = self.check_include
209
210        fn = tracer.start()
211        self.tracers.append(tracer)
212
213        return fn
214
215    # The trace function has to be set individually on each thread before
216    # execution begins.  Ironically, the only support the threading module has
217    # for running code before the thread main is the tracing function.  So we
218    # install this as a trace function, and the first time it's called, it does
219    # the real trace installation.
220
221    def _installation_trace(self, frame, event, arg):
222        """Called on new threads, installs the real tracer."""
223        # Remove ourselves as the trace function.
224        sys.settrace(None)
225        # Install the real tracer.
226        fn = self._start_tracer()
227        # Invoke the real trace function with the current event, to be sure
228        # not to lose an event.
229        if fn:
230            fn = fn(frame, event, arg)
231        # Return the new trace function to continue tracing in this scope.
232        return fn
233
234    def start(self):
235        """Start collecting trace information."""
236        if self._collectors:
237            self._collectors[-1].pause()
238
239        # Check to see whether we had a fullcoverage tracer installed. If so,
240        # get the stack frames it stashed away for us.
241        traces0 = []
242        fn0 = sys.gettrace()
243        if fn0:
244            tracer0 = getattr(fn0, '__self__', None)
245            if tracer0:
246                traces0 = getattr(tracer0, 'traces', [])
247
248        try:
249            # Install the tracer on this thread.
250            fn = self._start_tracer()
251        except:
252            if self._collectors:
253                self._collectors[-1].resume()
254            raise
255
256        # If _start_tracer succeeded, then we add ourselves to the global
257        # stack of collectors.
258        self._collectors.append(self)
259
260        # Replay all the events from fullcoverage into the new trace function.
261        for args in traces0:
262            (frame, event, arg), lineno = args
263            try:
264                fn(frame, event, arg, lineno=lineno)
265            except TypeError:
266                raise Exception("fullcoverage must be run with the C trace function.")
267
268        # Install our installation tracer in threading, to jump start other
269        # threads.
270        if self.threading:
271            self.threading.settrace(self._installation_trace)
272
273    def stop(self):
274        """Stop collecting trace information."""
275        assert self._collectors
276        assert self._collectors[-1] is self, (
277            "Expected current collector to be %r, but it's %r" % (self, self._collectors[-1])
278        )
279
280        self.pause()
281        self.tracers = []
282
283        # Remove this Collector from the stack, and resume the one underneath
284        # (if any).
285        self._collectors.pop()
286        if self._collectors:
287            self._collectors[-1].resume()
288
289    def pause(self):
290        """Pause tracing, but be prepared to `resume`."""
291        for tracer in self.tracers:
292            tracer.stop()
293            stats = tracer.get_stats()
294            if stats:
295                print("\nCoverage.py tracer stats:")
296                for k in sorted(stats.keys()):
297                    print("%16s: %s" % (k, stats[k]))
298        if self.threading:
299            self.threading.settrace(None)
300
301    def resume(self):
302        """Resume tracing after a `pause`."""
303        for tracer in self.tracers:
304            tracer.start()
305        if self.threading:
306            self.threading.settrace(self._installation_trace)
307        else:
308            self._start_tracer()
309
310    def save_data(self, covdata):
311        """Save the collected data to a `CoverageData`.
312
313        Also resets the collector.
314
315        """
316        def abs_file_dict(d):
317            """Return a dict like d, but with keys modified by `abs_file`."""
318            return dict((abs_file(k), v) for k, v in iitems(d))
319
320        if self.branch:
321            covdata.add_arcs(abs_file_dict(self.data))
322        else:
323            covdata.add_lines(abs_file_dict(self.data))
324        covdata.add_file_tracers(abs_file_dict(self.file_tracers))
325
326        self.reset()
327