1# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
2# For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt
3
4"""File wrangling."""
5
6import fnmatch
7import ntpath
8import os
9import os.path
10import posixpath
11import re
12import sys
13
14from coverage import env
15from coverage.backward import unicode_class
16from coverage.misc import contract, CoverageException, join_regex, isolate_module
17
18
19os = isolate_module(os)
20
21
22def set_relative_directory():
23    """Set the directory that `relative_filename` will be relative to."""
24    global RELATIVE_DIR, CANONICAL_FILENAME_CACHE
25
26    # The absolute path to our current directory.
27    RELATIVE_DIR = os.path.normcase(abs_file(os.curdir) + os.sep)
28
29    # Cache of results of calling the canonical_filename() method, to
30    # avoid duplicating work.
31    CANONICAL_FILENAME_CACHE = {}
32
33
34def relative_directory():
35    """Return the directory that `relative_filename` is relative to."""
36    return RELATIVE_DIR
37
38
39@contract(returns='unicode')
40def relative_filename(filename):
41    """Return the relative form of `filename`.
42
43    The file name will be relative to the current directory when the
44    `set_relative_directory` was called.
45
46    """
47    fnorm = os.path.normcase(filename)
48    if fnorm.startswith(RELATIVE_DIR):
49        filename = filename[len(RELATIVE_DIR):]
50    return unicode_filename(filename)
51
52
53@contract(returns='unicode')
54def canonical_filename(filename):
55    """Return a canonical file name for `filename`.
56
57    An absolute path with no redundant components and normalized case.
58
59    """
60    if filename not in CANONICAL_FILENAME_CACHE:
61        if not os.path.isabs(filename):
62            for path in [os.curdir] + sys.path:
63                if path is None:
64                    continue
65                f = os.path.join(path, filename)
66                if os.path.exists(f):
67                    filename = f
68                    break
69        cf = abs_file(filename)
70        CANONICAL_FILENAME_CACHE[filename] = cf
71    return CANONICAL_FILENAME_CACHE[filename]
72
73
74def flat_rootname(filename):
75    """A base for a flat file name to correspond to this file.
76
77    Useful for writing files about the code where you want all the files in
78    the same directory, but need to differentiate same-named files from
79    different directories.
80
81    For example, the file a/b/c.py will return 'a_b_c_py'
82
83    """
84    name = ntpath.splitdrive(filename)[1]
85    return re.sub(r"[\\/.:]", "_", name)
86
87
88if env.WINDOWS:
89
90    _ACTUAL_PATH_CACHE = {}
91    _ACTUAL_PATH_LIST_CACHE = {}
92
93    def actual_path(path):
94        """Get the actual path of `path`, including the correct case."""
95        if env.PY2 and isinstance(path, unicode_class):
96            path = path.encode(sys.getfilesystemencoding())
97        if path in _ACTUAL_PATH_CACHE:
98            return _ACTUAL_PATH_CACHE[path]
99
100        head, tail = os.path.split(path)
101        if not tail:
102            # This means head is the drive spec: normalize it.
103            actpath = head.upper()
104        elif not head:
105            actpath = tail
106        else:
107            head = actual_path(head)
108            if head in _ACTUAL_PATH_LIST_CACHE:
109                files = _ACTUAL_PATH_LIST_CACHE[head]
110            else:
111                try:
112                    files = os.listdir(head)
113                except OSError:
114                    files = []
115                _ACTUAL_PATH_LIST_CACHE[head] = files
116            normtail = os.path.normcase(tail)
117            for f in files:
118                if os.path.normcase(f) == normtail:
119                    tail = f
120                    break
121            actpath = os.path.join(head, tail)
122        _ACTUAL_PATH_CACHE[path] = actpath
123        return actpath
124
125else:
126    def actual_path(filename):
127        """The actual path for non-Windows platforms."""
128        return filename
129
130
131if env.PY2:
132    @contract(returns='unicode')
133    def unicode_filename(filename):
134        """Return a Unicode version of `filename`."""
135        if isinstance(filename, str):
136            encoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
137            filename = filename.decode(encoding, "replace")
138        return filename
139else:
140    @contract(filename='unicode', returns='unicode')
141    def unicode_filename(filename):
142        """Return a Unicode version of `filename`."""
143        return filename
144
145
146@contract(returns='unicode')
147def abs_file(filename):
148    """Return the absolute normalized form of `filename`."""
149    path = os.path.expandvars(os.path.expanduser(filename))
150    path = os.path.abspath(os.path.realpath(path))
151    path = actual_path(path)
152    path = unicode_filename(path)
153    return path
154
155
156RELATIVE_DIR = None
157CANONICAL_FILENAME_CACHE = None
158set_relative_directory()
159
160
161def isabs_anywhere(filename):
162    """Is `filename` an absolute path on any OS?"""
163    return ntpath.isabs(filename) or posixpath.isabs(filename)
164
165
166def prep_patterns(patterns):
167    """Prepare the file patterns for use in a `FnmatchMatcher`.
168
169    If a pattern starts with a wildcard, it is used as a pattern
170    as-is.  If it does not start with a wildcard, then it is made
171    absolute with the current directory.
172
173    If `patterns` is None, an empty list is returned.
174
175    """
176    prepped = []
177    for p in patterns or []:
178        if p.startswith(("*", "?")):
179            prepped.append(p)
180        else:
181            prepped.append(abs_file(p))
182    return prepped
183
184
185class TreeMatcher(object):
186    """A matcher for files in a tree."""
187    def __init__(self, directories):
188        self.dirs = list(directories)
189
190    def __repr__(self):
191        return "<TreeMatcher %r>" % self.dirs
192
193    def info(self):
194        """A list of strings for displaying when dumping state."""
195        return self.dirs
196
197    def match(self, fpath):
198        """Does `fpath` indicate a file in one of our trees?"""
199        for d in self.dirs:
200            if fpath.startswith(d):
201                if fpath == d:
202                    # This is the same file!
203                    return True
204                if fpath[len(d)] == os.sep:
205                    # This is a file in the directory
206                    return True
207        return False
208
209
210class ModuleMatcher(object):
211    """A matcher for modules in a tree."""
212    def __init__(self, module_names):
213        self.modules = list(module_names)
214
215    def __repr__(self):
216        return "<ModuleMatcher %r>" % (self.modules)
217
218    def info(self):
219        """A list of strings for displaying when dumping state."""
220        return self.modules
221
222    def match(self, module_name):
223        """Does `module_name` indicate a module in one of our packages?"""
224        if not module_name:
225            return False
226
227        for m in self.modules:
228            if module_name.startswith(m):
229                if module_name == m:
230                    return True
231                if module_name[len(m)] == '.':
232                    # This is a module in the package
233                    return True
234
235        return False
236
237
238class FnmatchMatcher(object):
239    """A matcher for files by file name pattern."""
240    def __init__(self, pats):
241        self.pats = pats[:]
242        # fnmatch is platform-specific. On Windows, it does the Windows thing
243        # of treating / and \ as equivalent. But on other platforms, we need to
244        # take care of that ourselves.
245        fnpats = (fnmatch.translate(p) for p in pats)
246        fnpats = (p.replace(r"\/", r"[\\/]") for p in fnpats)
247        if env.WINDOWS:
248            # Windows is also case-insensitive.  BTW: the regex docs say that
249            # flags like (?i) have to be at the beginning, but fnmatch puts
250            # them at the end, and having two there seems to work fine.
251            fnpats = (p + "(?i)" for p in fnpats)
252        self.re = re.compile(join_regex(fnpats))
253
254    def __repr__(self):
255        return "<FnmatchMatcher %r>" % self.pats
256
257    def info(self):
258        """A list of strings for displaying when dumping state."""
259        return self.pats
260
261    def match(self, fpath):
262        """Does `fpath` match one of our file name patterns?"""
263        return self.re.match(fpath) is not None
264
265
266def sep(s):
267    """Find the path separator used in this string, or os.sep if none."""
268    sep_match = re.search(r"[\\/]", s)
269    if sep_match:
270        the_sep = sep_match.group(0)
271    else:
272        the_sep = os.sep
273    return the_sep
274
275
276class PathAliases(object):
277    """A collection of aliases for paths.
278
279    When combining data files from remote machines, often the paths to source
280    code are different, for example, due to OS differences, or because of
281    serialized checkouts on continuous integration machines.
282
283    A `PathAliases` object tracks a list of pattern/result pairs, and can
284    map a path through those aliases to produce a unified path.
285
286    """
287    def __init__(self):
288        self.aliases = []
289
290    def add(self, pattern, result):
291        """Add the `pattern`/`result` pair to the list of aliases.
292
293        `pattern` is an `fnmatch`-style pattern.  `result` is a simple
294        string.  When mapping paths, if a path starts with a match against
295        `pattern`, then that match is replaced with `result`.  This models
296        isomorphic source trees being rooted at different places on two
297        different machines.
298
299        `pattern` can't end with a wildcard component, since that would
300        match an entire tree, and not just its root.
301
302        """
303        # The pattern can't end with a wildcard component.
304        pattern = pattern.rstrip(r"\/")
305        if pattern.endswith("*"):
306            raise CoverageException("Pattern must not end with wildcards.")
307        pattern_sep = sep(pattern)
308
309        # The pattern is meant to match a filepath.  Let's make it absolute
310        # unless it already is, or is meant to match any prefix.
311        if not pattern.startswith('*') and not isabs_anywhere(pattern):
312            pattern = abs_file(pattern)
313        pattern += pattern_sep
314
315        # Make a regex from the pattern.  fnmatch always adds a \Z to
316        # match the whole string, which we don't want.
317        regex_pat = fnmatch.translate(pattern).replace(r'\Z(', '(')
318
319        # We want */a/b.py to match on Windows too, so change slash to match
320        # either separator.
321        regex_pat = regex_pat.replace(r"\/", r"[\\/]")
322        # We want case-insensitive matching, so add that flag.
323        regex = re.compile(r"(?i)" + regex_pat)
324
325        # Normalize the result: it must end with a path separator.
326        result_sep = sep(result)
327        result = result.rstrip(r"\/") + result_sep
328        self.aliases.append((regex, result, pattern_sep, result_sep))
329
330    def map(self, path):
331        """Map `path` through the aliases.
332
333        `path` is checked against all of the patterns.  The first pattern to
334        match is used to replace the root of the path with the result root.
335        Only one pattern is ever used.  If no patterns match, `path` is
336        returned unchanged.
337
338        The separator style in the result is made to match that of the result
339        in the alias.
340
341        Returns the mapped path.  If a mapping has happened, this is a
342        canonical path.  If no mapping has happened, it is the original value
343        of `path` unchanged.
344
345        """
346        for regex, result, pattern_sep, result_sep in self.aliases:
347            m = regex.match(path)
348            if m:
349                new = path.replace(m.group(0), result)
350                if pattern_sep != result_sep:
351                    new = new.replace(pattern_sep, result_sep)
352                new = canonical_filename(new)
353                return new
354        return path
355
356
357def find_python_files(dirname):
358    """Yield all of the importable Python files in `dirname`, recursively.
359
360    To be importable, the files have to be in a directory with a __init__.py,
361    except for `dirname` itself, which isn't required to have one.  The
362    assumption is that `dirname` was specified directly, so the user knows
363    best, but sub-directories are checked for a __init__.py to be sure we only
364    find the importable files.
365
366    """
367    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dirname)):
368        if i > 0 and '__init__.py' not in filenames:
369            # If a directory doesn't have __init__.py, then it isn't
370            # importable and neither are its files
371            del dirnames[:]
372            continue
373        for filename in filenames:
374            # We're only interested in files that look like reasonable Python
375            # files: Must end with .py or .pyw, and must not have certain funny
376            # characters that probably mean they are editor junk.
377            if re.match(r"^[^.#~!$@%^&*()+=,]+\.pyw?$", filename):
378                yield os.path.join(dirpath, filename)
379