1# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
2# For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt
3
4"""Coverage data for coverage.py."""
5
6import glob
7import json
8import optparse
9import os
10import os.path
11import random
12import re
13import socket
14
15from coverage import env
16from coverage.backward import iitems, string_class
17from coverage.debug import _TEST_NAME_FILE
18from coverage.files import PathAliases
19from coverage.misc import CoverageException, file_be_gone, isolate_module
20
21os = isolate_module(os)
22
23
24class CoverageData(object):
25    """Manages collected coverage data, including file storage.
26
27    This class is the public supported API to the data coverage.py collects
28    during program execution.  It includes information about what code was
29    executed. It does not include information from the analysis phase, to
30    determine what lines could have been executed, or what lines were not
31    executed.
32
33    .. note::
34
35        The file format is not documented or guaranteed.  It will change in
36        the future, in possibly complicated ways.  Do not read coverage.py
37        data files directly.  Use this API to avoid disruption.
38
39    There are a number of kinds of data that can be collected:
40
41    * **lines**: the line numbers of source lines that were executed.
42      These are always available.
43
44    * **arcs**: pairs of source and destination line numbers for transitions
45      between source lines.  These are only available if branch coverage was
46      used.
47
48    * **file tracer names**: the module names of the file tracer plugins that
49      handled each file in the data.
50
51    * **run information**: information about the program execution.  This is
52      written during "coverage run", and then accumulated during "coverage
53      combine".
54
55    Lines, arcs, and file tracer names are stored for each source file. File
56    names in this API are case-sensitive, even on platforms with
57    case-insensitive file systems.
58
59    To read a coverage.py data file, use :meth:`read_file`, or
60    :meth:`read_fileobj` if you have an already-opened file.  You can then
61    access the line, arc, or file tracer data with :meth:`lines`, :meth:`arcs`,
62    or :meth:`file_tracer`.  Run information is available with
63    :meth:`run_infos`.
64
65    The :meth:`has_arcs` method indicates whether arc data is available.  You
66    can get a list of the files in the data with :meth:`measured_files`.
67    A summary of the line data is available from :meth:`line_counts`.  As with
68    most Python containers, you can determine if there is any data at all by
69    using this object as a boolean value.
70
71
72    Most data files will be created by coverage.py itself, but you can use
73    methods here to create data files if you like.  The :meth:`add_lines`,
74    :meth:`add_arcs`, and :meth:`add_file_tracers` methods add data, in ways
75    that are convenient for coverage.py.  The :meth:`add_run_info` method adds
76    key-value pairs to the run information.
77
78    To add a file without any measured data, use :meth:`touch_file`.
79
80    You write to a named file with :meth:`write_file`, or to an already opened
81    file with :meth:`write_fileobj`.
82
83    You can clear the data in memory with :meth:`erase`.  Two data collections
84    can be combined by using :meth:`update` on one :class:`CoverageData`,
85    passing it the other.
86
87    """
88
89    # The data file format is JSON, with these keys:
90    #
91    #     * lines: a dict mapping file names to lists of line numbers
92    #       executed::
93    #
94    #         { "file1": [17,23,45], "file2": [1,2,3], ... }
95    #
96    #     * arcs: a dict mapping file names to lists of line number pairs::
97    #
98    #         { "file1": [[17,23], [17,25], [25,26]], ... }
99    #
100    #     * file_tracers: a dict mapping file names to plugin names::
101    #
102    #         { "file1": "django.coverage", ... }
103    #
104    #     * runs: a list of dicts of information about the coverage.py runs
105    #       contributing to the data::
106    #
107    #         [ { "brief_sys": "CPython 2.7.10 Darwin" }, ... ]
108    #
109    # Only one of `lines` or `arcs` will be present: with branch coverage, data
110    # is stored as arcs. Without branch coverage, it is stored as lines.  The
111    # line data is easily recovered from the arcs: it is all the first elements
112    # of the pairs that are greater than zero.
113
114    def __init__(self, debug=None):
115        """Create a CoverageData.
116
117        `debug` is a `DebugControl` object for writing debug messages.
118
119        """
120        self._debug = debug
121
122        # A map from canonical Python source file name to a dictionary in
123        # which there's an entry for each line number that has been
124        # executed:
125        #
126        #   { 'filename1.py': [12, 47, 1001], ... }
127        #
128        self._lines = None
129
130        # A map from canonical Python source file name to a dictionary with an
131        # entry for each pair of line numbers forming an arc:
132        #
133        #   { 'filename1.py': [(12,14), (47,48), ... ], ... }
134        #
135        self._arcs = None
136
137        # A map from canonical source file name to a plugin module name:
138        #
139        #   { 'filename1.py': 'django.coverage', ... }
140        #
141        self._file_tracers = {}
142
143        # A list of dicts of information about the coverage.py runs.
144        self._runs = []
145
146    def __repr__(self):
147        return "<{klass} lines={lines} arcs={arcs} tracers={tracers} runs={runs}>".format(
148            klass=self.__class__.__name__,
149            lines="None" if self._lines is None else "{{{0}}}".format(len(self._lines)),
150            arcs="None" if self._arcs is None else "{{{0}}}".format(len(self._arcs)),
151            tracers="{{{0}}}".format(len(self._file_tracers)),
152            runs="[{0}]".format(len(self._runs)),
153        )
154
155    ##
156    ## Reading data
157    ##
158
159    def has_arcs(self):
160        """Does this data have arcs?
161
162        Arc data is only available if branch coverage was used during
163        collection.
164
165        Returns a boolean.
166
167        """
168        return self._has_arcs()
169
170    def lines(self, filename):
171        """Get the list of lines executed for a file.
172
173        If the file was not measured, returns None.  A file might be measured,
174        and have no lines executed, in which case an empty list is returned.
175
176        If the file was executed, returns a list of integers, the line numbers
177        executed in the file. The list is in no particular order.
178
179        """
180        if self._arcs is not None:
181            if filename in self._arcs:
182                return [s for s, __ in self._arcs[filename] if s > 0]
183        elif self._lines is not None:
184            if filename in self._lines:
185                return self._lines[filename]
186        return None
187
188    def arcs(self, filename):
189        """Get the list of arcs executed for a file.
190
191        If the file was not measured, returns None.  A file might be measured,
192        and have no arcs executed, in which case an empty list is returned.
193
194        If the file was executed, returns a list of 2-tuples of integers. Each
195        pair is a starting line number and an ending line number for a
196        transition from one line to another. The list is in no particular
197        order.
198
199        Negative numbers have special meaning.  If the starting line number is
200        -N, it represents an entry to the code object that starts at line N.
201        If the ending ling number is -N, it's an exit from the code object that
202        starts at line N.
203
204        """
205        if self._arcs is not None:
206            if filename in self._arcs:
207                return self._arcs[filename]
208        return None
209
210    def file_tracer(self, filename):
211        """Get the plugin name of the file tracer for a file.
212
213        Returns the name of the plugin that handles this file.  If the file was
214        measured, but didn't use a plugin, then "" is returned.  If the file
215        was not measured, then None is returned.
216
217        """
218        # Because the vast majority of files involve no plugin, we don't store
219        # them explicitly in self._file_tracers.  Check the measured data
220        # instead to see if it was a known file with no plugin.
221        if filename in (self._arcs or self._lines or {}):
222            return self._file_tracers.get(filename, "")
223        return None
224
225    def run_infos(self):
226        """Return the list of dicts of run information.
227
228        For data collected during a single run, this will be a one-element
229        list.  If data has been combined, there will be one element for each
230        original data file.
231
232        """
233        return self._runs
234
235    def measured_files(self):
236        """A list of all files that had been measured."""
237        return list(self._arcs or self._lines or {})
238
239    def line_counts(self, fullpath=False):
240        """Return a dict summarizing the line coverage data.
241
242        Keys are based on the file names, and values are the number of executed
243        lines.  If `fullpath` is true, then the keys are the full pathnames of
244        the files, otherwise they are the basenames of the files.
245
246        Returns a dict mapping file names to counts of lines.
247
248        """
249        summ = {}
250        if fullpath:
251            filename_fn = lambda f: f
252        else:
253            filename_fn = os.path.basename
254        for filename in self.measured_files():
255            summ[filename_fn(filename)] = len(self.lines(filename))
256        return summ
257
258    def __nonzero__(self):
259        return bool(self._lines or self._arcs)
260
261    __bool__ = __nonzero__
262
263    def read_fileobj(self, file_obj):
264        """Read the coverage data from the given file object.
265
266        Should only be used on an empty CoverageData object.
267
268        """
269        data = self._read_raw_data(file_obj)
270
271        self._lines = self._arcs = None
272
273        if 'lines' in data:
274            self._lines = data['lines']
275        if 'arcs' in data:
276            self._arcs = dict(
277                (fname, [tuple(pair) for pair in arcs])
278                for fname, arcs in iitems(data['arcs'])
279            )
280        self._file_tracers = data.get('file_tracers', {})
281        self._runs = data.get('runs', [])
282
283        self._validate()
284
285    def read_file(self, filename):
286        """Read the coverage data from `filename` into this object."""
287        if self._debug and self._debug.should('dataio'):
288            self._debug.write("Reading data from %r" % (filename,))
289        try:
290            with self._open_for_reading(filename) as f:
291                self.read_fileobj(f)
292        except Exception as exc:
293            raise CoverageException(
294                "Couldn't read data from '%s': %s: %s" % (
295                    filename, exc.__class__.__name__, exc,
296                )
297            )
298
299    _GO_AWAY = "!coverage.py: This is a private format, don't read it directly!"
300
301    @classmethod
302    def _open_for_reading(cls, filename):
303        """Open a file appropriately for reading data."""
304        return open(filename, "r")
305
306    @classmethod
307    def _read_raw_data(cls, file_obj):
308        """Read the raw data from a file object."""
309        go_away = file_obj.read(len(cls._GO_AWAY))
310        if go_away != cls._GO_AWAY:
311            raise CoverageException("Doesn't seem to be a coverage.py data file")
312        return json.load(file_obj)
313
314    @classmethod
315    def _read_raw_data_file(cls, filename):
316        """Read the raw data from a file, for debugging."""
317        with cls._open_for_reading(filename) as f:
318            return cls._read_raw_data(f)
319
320    ##
321    ## Writing data
322    ##
323
324    def add_lines(self, line_data):
325        """Add measured line data.
326
327        `line_data` is a dictionary mapping file names to dictionaries::
328
329            { filename: { lineno: None, ... }, ...}
330
331        """
332        if self._debug and self._debug.should('dataop'):
333            self._debug.write("Adding lines: %d files, %d lines total" % (
334                len(line_data), sum(len(lines) for lines in line_data.values())
335            ))
336        if self._has_arcs():
337            raise CoverageException("Can't add lines to existing arc data")
338
339        if self._lines is None:
340            self._lines = {}
341        for filename, linenos in iitems(line_data):
342            if filename in self._lines:
343                new_linenos = set(self._lines[filename])
344                new_linenos.update(linenos)
345                linenos = new_linenos
346            self._lines[filename] = list(linenos)
347
348        self._validate()
349
350    def add_arcs(self, arc_data):
351        """Add measured arc data.
352
353        `arc_data` is a dictionary mapping file names to dictionaries::
354
355            { filename: { (l1,l2): None, ... }, ...}
356
357        """
358        if self._debug and self._debug.should('dataop'):
359            self._debug.write("Adding arcs: %d files, %d arcs total" % (
360                len(arc_data), sum(len(arcs) for arcs in arc_data.values())
361            ))
362        if self._has_lines():
363            raise CoverageException("Can't add arcs to existing line data")
364
365        if self._arcs is None:
366            self._arcs = {}
367        for filename, arcs in iitems(arc_data):
368            if filename in self._arcs:
369                new_arcs = set(self._arcs[filename])
370                new_arcs.update(arcs)
371                arcs = new_arcs
372            self._arcs[filename] = list(arcs)
373
374        self._validate()
375
376    def add_file_tracers(self, file_tracers):
377        """Add per-file plugin information.
378
379        `file_tracers` is { filename: plugin_name, ... }
380
381        """
382        if self._debug and self._debug.should('dataop'):
383            self._debug.write("Adding file tracers: %d files" % (len(file_tracers),))
384
385        existing_files = self._arcs or self._lines or {}
386        for filename, plugin_name in iitems(file_tracers):
387            if filename not in existing_files:
388                raise CoverageException(
389                    "Can't add file tracer data for unmeasured file '%s'" % (filename,)
390                )
391            existing_plugin = self._file_tracers.get(filename)
392            if existing_plugin is not None and plugin_name != existing_plugin:
393                raise CoverageException(
394                    "Conflicting file tracer name for '%s': %r vs %r" % (
395                        filename, existing_plugin, plugin_name,
396                    )
397                )
398            self._file_tracers[filename] = plugin_name
399
400        self._validate()
401
402    def add_run_info(self, **kwargs):
403        """Add information about the run.
404
405        Keywords are arbitrary, and are stored in the run dictionary. Values
406        must be JSON serializable.  You may use this function more than once,
407        but repeated keywords overwrite each other.
408
409        """
410        if self._debug and self._debug.should('dataop'):
411            self._debug.write("Adding run info: %r" % (kwargs,))
412        if not self._runs:
413            self._runs = [{}]
414        self._runs[0].update(kwargs)
415        self._validate()
416
417    def touch_file(self, filename):
418        """Ensure that `filename` appears in the data, empty if needed."""
419        if self._debug and self._debug.should('dataop'):
420            self._debug.write("Touching %r" % (filename,))
421        if not self._has_arcs() and not self._has_lines():
422            raise CoverageException("Can't touch files in an empty CoverageData")
423
424        if self._has_arcs():
425            where = self._arcs
426        else:
427            where = self._lines
428        where.setdefault(filename, [])
429
430        self._validate()
431
432    def write_fileobj(self, file_obj):
433        """Write the coverage data to `file_obj`."""
434
435        # Create the file data.
436        file_data = {}
437
438        if self._has_arcs():
439            file_data['arcs'] = self._arcs
440
441        if self._has_lines():
442            file_data['lines'] = self._lines
443
444        if self._file_tracers:
445            file_data['file_tracers'] = self._file_tracers
446
447        if self._runs:
448            file_data['runs'] = self._runs
449
450        # Write the data to the file.
451        file_obj.write(self._GO_AWAY)
452        json.dump(file_data, file_obj)
453
454    def write_file(self, filename):
455        """Write the coverage data to `filename`."""
456        if self._debug and self._debug.should('dataio'):
457            self._debug.write("Writing data to %r" % (filename,))
458        with open(filename, 'w') as fdata:
459            self.write_fileobj(fdata)
460
461    def erase(self):
462        """Erase the data in this object."""
463        self._lines = None
464        self._arcs = None
465        self._file_tracers = {}
466        self._runs = []
467        self._validate()
468
469    def update(self, other_data, aliases=None):
470        """Update this data with data from another `CoverageData`.
471
472        If `aliases` is provided, it's a `PathAliases` object that is used to
473        re-map paths to match the local machine's.
474
475        """
476        if self._has_lines() and other_data._has_arcs():
477            raise CoverageException("Can't combine arc data with line data")
478        if self._has_arcs() and other_data._has_lines():
479            raise CoverageException("Can't combine line data with arc data")
480
481        aliases = aliases or PathAliases()
482
483        # _file_tracers: only have a string, so they have to agree.
484        # Have to do these first, so that our examination of self._arcs and
485        # self._lines won't be confused by data updated from other_data.
486        for filename in other_data.measured_files():
487            other_plugin = other_data.file_tracer(filename)
488            filename = aliases.map(filename)
489            this_plugin = self.file_tracer(filename)
490            if this_plugin is None:
491                if other_plugin:
492                    self._file_tracers[filename] = other_plugin
493            elif this_plugin != other_plugin:
494                raise CoverageException(
495                    "Conflicting file tracer name for '%s': %r vs %r" % (
496                        filename, this_plugin, other_plugin,
497                    )
498                )
499
500        # _runs: add the new runs to these runs.
501        self._runs.extend(other_data._runs)
502
503        # _lines: merge dicts.
504        if other_data._has_lines():
505            if self._lines is None:
506                self._lines = {}
507            for filename, file_lines in iitems(other_data._lines):
508                filename = aliases.map(filename)
509                if filename in self._lines:
510                    lines = set(self._lines[filename])
511                    lines.update(file_lines)
512                    file_lines = list(lines)
513                self._lines[filename] = file_lines
514
515        # _arcs: merge dicts.
516        if other_data._has_arcs():
517            if self._arcs is None:
518                self._arcs = {}
519            for filename, file_arcs in iitems(other_data._arcs):
520                filename = aliases.map(filename)
521                if filename in self._arcs:
522                    arcs = set(self._arcs[filename])
523                    arcs.update(file_arcs)
524                    file_arcs = list(arcs)
525                self._arcs[filename] = file_arcs
526
527        self._validate()
528
529    ##
530    ## Miscellaneous
531    ##
532
533    def _validate(self):
534        """If we are in paranoid mode, validate that everything is right."""
535        if env.TESTING:
536            self._validate_invariants()
537
538    def _validate_invariants(self):
539        """Validate internal invariants."""
540        # Only one of _lines or _arcs should exist.
541        assert not(self._has_lines() and self._has_arcs()), (
542            "Shouldn't have both _lines and _arcs"
543        )
544
545        # _lines should be a dict of lists of ints.
546        if self._has_lines():
547            for fname, lines in iitems(self._lines):
548                assert isinstance(fname, string_class), "Key in _lines shouldn't be %r" % (fname,)
549                assert all(isinstance(x, int) for x in lines), (
550                    "_lines[%r] shouldn't be %r" % (fname, lines)
551                )
552
553        # _arcs should be a dict of lists of pairs of ints.
554        if self._has_arcs():
555            for fname, arcs in iitems(self._arcs):
556                assert isinstance(fname, string_class), "Key in _arcs shouldn't be %r" % (fname,)
557                assert all(isinstance(x, int) and isinstance(y, int) for x, y in arcs), (
558                    "_arcs[%r] shouldn't be %r" % (fname, arcs)
559                )
560
561        # _file_tracers should have only non-empty strings as values.
562        for fname, plugin in iitems(self._file_tracers):
563            assert isinstance(fname, string_class), (
564                "Key in _file_tracers shouldn't be %r" % (fname,)
565            )
566            assert plugin and isinstance(plugin, string_class), (
567                "_file_tracers[%r] shoudn't be %r" % (fname, plugin)
568            )
569
570        # _runs should be a list of dicts.
571        for val in self._runs:
572            assert isinstance(val, dict)
573            for key in val:
574                assert isinstance(key, string_class), "Key in _runs shouldn't be %r" % (key,)
575
576    def add_to_hash(self, filename, hasher):
577        """Contribute `filename`'s data to the `hasher`.
578
579        `hasher` is a `coverage.misc.Hasher` instance to be updated with
580        the file's data.  It should only get the results data, not the run
581        data.
582
583        """
584        if self._has_arcs():
585            hasher.update(sorted(self.arcs(filename) or []))
586        else:
587            hasher.update(sorted(self.lines(filename) or []))
588        hasher.update(self.file_tracer(filename))
589
590    ##
591    ## Internal
592    ##
593
594    def _has_lines(self):
595        """Do we have data in self._lines?"""
596        return self._lines is not None
597
598    def _has_arcs(self):
599        """Do we have data in self._arcs?"""
600        return self._arcs is not None
601
602
603class CoverageDataFiles(object):
604    """Manage the use of coverage data files."""
605
606    def __init__(self, basename=None, warn=None):
607        """Create a CoverageDataFiles to manage data files.
608
609        `warn` is the warning function to use.
610
611        `basename` is the name of the file to use for storing data.
612
613        """
614        self.warn = warn
615        # Construct the file name that will be used for data storage.
616        self.filename = os.path.abspath(basename or ".coverage")
617
618    def erase(self, parallel=False):
619        """Erase the data from the file storage.
620
621        If `parallel` is true, then also deletes data files created from the
622        basename by parallel-mode.
623
624        """
625        file_be_gone(self.filename)
626        if parallel:
627            data_dir, local = os.path.split(self.filename)
628            localdot = local + '.*'
629            pattern = os.path.join(os.path.abspath(data_dir), localdot)
630            for filename in glob.glob(pattern):
631                file_be_gone(filename)
632
633    def read(self, data):
634        """Read the coverage data."""
635        if os.path.exists(self.filename):
636            data.read_file(self.filename)
637
638    def write(self, data, suffix=None):
639        """Write the collected coverage data to a file.
640
641        `suffix` is a suffix to append to the base file name. This can be used
642        for multiple or parallel execution, so that many coverage data files
643        can exist simultaneously.  A dot will be used to join the base name and
644        the suffix.
645
646        """
647        filename = self.filename
648        if suffix is True:
649            # If data_suffix was a simple true value, then make a suffix with
650            # plenty of distinguishing information.  We do this here in
651            # `save()` at the last minute so that the pid will be correct even
652            # if the process forks.
653            extra = ""
654            if _TEST_NAME_FILE:                             # pragma: debugging
655                with open(_TEST_NAME_FILE) as f:
656                    test_name = f.read()
657                extra = "." + test_name
658            suffix = "%s%s.%s.%06d" % (
659                socket.gethostname(), extra, os.getpid(),
660                random.randint(0, 999999)
661            )
662
663        if suffix:
664            filename += "." + suffix
665        data.write_file(filename)
666
667    def combine_parallel_data(self, data, aliases=None, data_paths=None):
668        """Combine a number of data files together.
669
670        Treat `self.filename` as a file prefix, and combine the data from all
671        of the data files starting with that prefix plus a dot.
672
673        If `aliases` is provided, it's a `PathAliases` object that is used to
674        re-map paths to match the local machine's.
675
676        If `data_paths` is provided, it is a list of directories or files to
677        combine.  Directories are searched for files that start with
678        `self.filename` plus dot as a prefix, and those files are combined.
679
680        If `data_paths` is not provided, then the directory portion of
681        `self.filename` is used as the directory to search for data files.
682
683        Every data file found and combined is then deleted from disk. If a file
684        cannot be read, a warning will be issued, and the file will not be
685        deleted.
686
687        """
688        # Because of the os.path.abspath in the constructor, data_dir will
689        # never be an empty string.
690        data_dir, local = os.path.split(self.filename)
691        localdot = local + '.*'
692
693        data_paths = data_paths or [data_dir]
694        files_to_combine = []
695        for p in data_paths:
696            if os.path.isfile(p):
697                files_to_combine.append(os.path.abspath(p))
698            elif os.path.isdir(p):
699                pattern = os.path.join(os.path.abspath(p), localdot)
700                files_to_combine.extend(glob.glob(pattern))
701            else:
702                raise CoverageException("Couldn't combine from non-existent path '%s'" % (p,))
703
704        for f in files_to_combine:
705            new_data = CoverageData()
706            try:
707                new_data.read_file(f)
708            except CoverageException as exc:
709                if self.warn:
710                    # The CoverageException has the file name in it, so just
711                    # use the message as the warning.
712                    self.warn(str(exc))
713            else:
714                data.update(new_data, aliases=aliases)
715                file_be_gone(f)
716
717
718def canonicalize_json_data(data):
719    """Canonicalize our JSON data so it can be compared."""
720    for fname, lines in iitems(data.get('lines', {})):
721        data['lines'][fname] = sorted(lines)
722    for fname, arcs in iitems(data.get('arcs', {})):
723        data['arcs'][fname] = sorted(arcs)
724
725
726def pretty_data(data):
727    """Format data as JSON, but as nicely as possible.
728
729    Returns a string.
730
731    """
732    # Start with a basic JSON dump.
733    out = json.dumps(data, indent=4, sort_keys=True)
734    # But pairs of numbers shouldn't be split across lines...
735    out = re.sub(r"\[\s+(-?\d+),\s+(-?\d+)\s+]", r"[\1, \2]", out)
736    # Trailing spaces mess with tests, get rid of them.
737    out = re.sub(r"(?m)\s+$", "", out)
738    return out
739
740
741def debug_main(args):
742    """Dump the raw data from data files.
743
744    Run this as::
745
746        $ python -m coverage.data [FILE]
747
748    """
749    parser = optparse.OptionParser()
750    parser.add_option(
751        "-c", "--canonical", action="store_true",
752        help="Sort data into a canonical order",
753    )
754    options, args = parser.parse_args(args)
755
756    for filename in (args or [".coverage"]):
757        print("--- {0} ------------------------------".format(filename))
758        data = CoverageData._read_raw_data_file(filename)
759        if options.canonical:
760            canonicalize_json_data(data)
761        print(pretty_data(data))
762
763
764if __name__ == '__main__':
765    import sys
766    debug_main(sys.argv[1:])
767