1# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 2# For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt 3 4"""File wrangling.""" 5 6import fnmatch 7import ntpath 8import os 9import os.path 10import posixpath 11import re 12import sys 13 14from coverage import env 15from coverage.backward import unicode_class 16from coverage.misc import contract, CoverageException, join_regex, isolate_module 17 18 19os = isolate_module(os) 20 21 22def set_relative_directory(): 23 """Set the directory that `relative_filename` will be relative to.""" 24 global RELATIVE_DIR, CANONICAL_FILENAME_CACHE 25 26 # The absolute path to our current directory. 27 RELATIVE_DIR = os.path.normcase(abs_file(os.curdir) + os.sep) 28 29 # Cache of results of calling the canonical_filename() method, to 30 # avoid duplicating work. 31 CANONICAL_FILENAME_CACHE = {} 32 33 34def relative_directory(): 35 """Return the directory that `relative_filename` is relative to.""" 36 return RELATIVE_DIR 37 38 39@contract(returns='unicode') 40def relative_filename(filename): 41 """Return the relative form of `filename`. 42 43 The file name will be relative to the current directory when the 44 `set_relative_directory` was called. 45 46 """ 47 fnorm = os.path.normcase(filename) 48 if fnorm.startswith(RELATIVE_DIR): 49 filename = filename[len(RELATIVE_DIR):] 50 return unicode_filename(filename) 51 52 53@contract(returns='unicode') 54def canonical_filename(filename): 55 """Return a canonical file name for `filename`. 56 57 An absolute path with no redundant components and normalized case. 58 59 """ 60 if filename not in CANONICAL_FILENAME_CACHE: 61 if not os.path.isabs(filename): 62 for path in [os.curdir] + sys.path: 63 if path is None: 64 continue 65 f = os.path.join(path, filename) 66 if os.path.exists(f): 67 filename = f 68 break 69 cf = abs_file(filename) 70 CANONICAL_FILENAME_CACHE[filename] = cf 71 return CANONICAL_FILENAME_CACHE[filename] 72 73 74def flat_rootname(filename): 75 """A base for a flat file name to correspond to this file. 76 77 Useful for writing files about the code where you want all the files in 78 the same directory, but need to differentiate same-named files from 79 different directories. 80 81 For example, the file a/b/c.py will return 'a_b_c_py' 82 83 """ 84 name = ntpath.splitdrive(filename)[1] 85 return re.sub(r"[\\/.:]", "_", name) 86 87 88if env.WINDOWS: 89 90 _ACTUAL_PATH_CACHE = {} 91 _ACTUAL_PATH_LIST_CACHE = {} 92 93 def actual_path(path): 94 """Get the actual path of `path`, including the correct case.""" 95 if env.PY2 and isinstance(path, unicode_class): 96 path = path.encode(sys.getfilesystemencoding()) 97 if path in _ACTUAL_PATH_CACHE: 98 return _ACTUAL_PATH_CACHE[path] 99 100 head, tail = os.path.split(path) 101 if not tail: 102 # This means head is the drive spec: normalize it. 103 actpath = head.upper() 104 elif not head: 105 actpath = tail 106 else: 107 head = actual_path(head) 108 if head in _ACTUAL_PATH_LIST_CACHE: 109 files = _ACTUAL_PATH_LIST_CACHE[head] 110 else: 111 try: 112 files = os.listdir(head) 113 except OSError: 114 files = [] 115 _ACTUAL_PATH_LIST_CACHE[head] = files 116 normtail = os.path.normcase(tail) 117 for f in files: 118 if os.path.normcase(f) == normtail: 119 tail = f 120 break 121 actpath = os.path.join(head, tail) 122 _ACTUAL_PATH_CACHE[path] = actpath 123 return actpath 124 125else: 126 def actual_path(filename): 127 """The actual path for non-Windows platforms.""" 128 return filename 129 130 131if env.PY2: 132 @contract(returns='unicode') 133 def unicode_filename(filename): 134 """Return a Unicode version of `filename`.""" 135 if isinstance(filename, str): 136 encoding = sys.getfilesystemencoding() or sys.getdefaultencoding() 137 filename = filename.decode(encoding, "replace") 138 return filename 139else: 140 @contract(filename='unicode', returns='unicode') 141 def unicode_filename(filename): 142 """Return a Unicode version of `filename`.""" 143 return filename 144 145 146@contract(returns='unicode') 147def abs_file(filename): 148 """Return the absolute normalized form of `filename`.""" 149 path = os.path.expandvars(os.path.expanduser(filename)) 150 path = os.path.abspath(os.path.realpath(path)) 151 path = actual_path(path) 152 path = unicode_filename(path) 153 return path 154 155 156RELATIVE_DIR = None 157CANONICAL_FILENAME_CACHE = None 158set_relative_directory() 159 160 161def isabs_anywhere(filename): 162 """Is `filename` an absolute path on any OS?""" 163 return ntpath.isabs(filename) or posixpath.isabs(filename) 164 165 166def prep_patterns(patterns): 167 """Prepare the file patterns for use in a `FnmatchMatcher`. 168 169 If a pattern starts with a wildcard, it is used as a pattern 170 as-is. If it does not start with a wildcard, then it is made 171 absolute with the current directory. 172 173 If `patterns` is None, an empty list is returned. 174 175 """ 176 prepped = [] 177 for p in patterns or []: 178 if p.startswith(("*", "?")): 179 prepped.append(p) 180 else: 181 prepped.append(abs_file(p)) 182 return prepped 183 184 185class TreeMatcher(object): 186 """A matcher for files in a tree.""" 187 def __init__(self, directories): 188 self.dirs = list(directories) 189 190 def __repr__(self): 191 return "<TreeMatcher %r>" % self.dirs 192 193 def info(self): 194 """A list of strings for displaying when dumping state.""" 195 return self.dirs 196 197 def match(self, fpath): 198 """Does `fpath` indicate a file in one of our trees?""" 199 for d in self.dirs: 200 if fpath.startswith(d): 201 if fpath == d: 202 # This is the same file! 203 return True 204 if fpath[len(d)] == os.sep: 205 # This is a file in the directory 206 return True 207 return False 208 209 210class ModuleMatcher(object): 211 """A matcher for modules in a tree.""" 212 def __init__(self, module_names): 213 self.modules = list(module_names) 214 215 def __repr__(self): 216 return "<ModuleMatcher %r>" % (self.modules) 217 218 def info(self): 219 """A list of strings for displaying when dumping state.""" 220 return self.modules 221 222 def match(self, module_name): 223 """Does `module_name` indicate a module in one of our packages?""" 224 if not module_name: 225 return False 226 227 for m in self.modules: 228 if module_name.startswith(m): 229 if module_name == m: 230 return True 231 if module_name[len(m)] == '.': 232 # This is a module in the package 233 return True 234 235 return False 236 237 238class FnmatchMatcher(object): 239 """A matcher for files by file name pattern.""" 240 def __init__(self, pats): 241 self.pats = pats[:] 242 # fnmatch is platform-specific. On Windows, it does the Windows thing 243 # of treating / and \ as equivalent. But on other platforms, we need to 244 # take care of that ourselves. 245 fnpats = (fnmatch.translate(p) for p in pats) 246 fnpats = (p.replace(r"\/", r"[\\/]") for p in fnpats) 247 if env.WINDOWS: 248 # Windows is also case-insensitive. BTW: the regex docs say that 249 # flags like (?i) have to be at the beginning, but fnmatch puts 250 # them at the end, and having two there seems to work fine. 251 fnpats = (p + "(?i)" for p in fnpats) 252 self.re = re.compile(join_regex(fnpats)) 253 254 def __repr__(self): 255 return "<FnmatchMatcher %r>" % self.pats 256 257 def info(self): 258 """A list of strings for displaying when dumping state.""" 259 return self.pats 260 261 def match(self, fpath): 262 """Does `fpath` match one of our file name patterns?""" 263 return self.re.match(fpath) is not None 264 265 266def sep(s): 267 """Find the path separator used in this string, or os.sep if none.""" 268 sep_match = re.search(r"[\\/]", s) 269 if sep_match: 270 the_sep = sep_match.group(0) 271 else: 272 the_sep = os.sep 273 return the_sep 274 275 276class PathAliases(object): 277 """A collection of aliases for paths. 278 279 When combining data files from remote machines, often the paths to source 280 code are different, for example, due to OS differences, or because of 281 serialized checkouts on continuous integration machines. 282 283 A `PathAliases` object tracks a list of pattern/result pairs, and can 284 map a path through those aliases to produce a unified path. 285 286 """ 287 def __init__(self): 288 self.aliases = [] 289 290 def add(self, pattern, result): 291 """Add the `pattern`/`result` pair to the list of aliases. 292 293 `pattern` is an `fnmatch`-style pattern. `result` is a simple 294 string. When mapping paths, if a path starts with a match against 295 `pattern`, then that match is replaced with `result`. This models 296 isomorphic source trees being rooted at different places on two 297 different machines. 298 299 `pattern` can't end with a wildcard component, since that would 300 match an entire tree, and not just its root. 301 302 """ 303 # The pattern can't end with a wildcard component. 304 pattern = pattern.rstrip(r"\/") 305 if pattern.endswith("*"): 306 raise CoverageException("Pattern must not end with wildcards.") 307 pattern_sep = sep(pattern) 308 309 # The pattern is meant to match a filepath. Let's make it absolute 310 # unless it already is, or is meant to match any prefix. 311 if not pattern.startswith('*') and not isabs_anywhere(pattern): 312 pattern = abs_file(pattern) 313 pattern += pattern_sep 314 315 # Make a regex from the pattern. fnmatch always adds a \Z to 316 # match the whole string, which we don't want. 317 regex_pat = fnmatch.translate(pattern).replace(r'\Z(', '(') 318 319 # We want */a/b.py to match on Windows too, so change slash to match 320 # either separator. 321 regex_pat = regex_pat.replace(r"\/", r"[\\/]") 322 # We want case-insensitive matching, so add that flag. 323 regex = re.compile(r"(?i)" + regex_pat) 324 325 # Normalize the result: it must end with a path separator. 326 result_sep = sep(result) 327 result = result.rstrip(r"\/") + result_sep 328 self.aliases.append((regex, result, pattern_sep, result_sep)) 329 330 def map(self, path): 331 """Map `path` through the aliases. 332 333 `path` is checked against all of the patterns. The first pattern to 334 match is used to replace the root of the path with the result root. 335 Only one pattern is ever used. If no patterns match, `path` is 336 returned unchanged. 337 338 The separator style in the result is made to match that of the result 339 in the alias. 340 341 Returns the mapped path. If a mapping has happened, this is a 342 canonical path. If no mapping has happened, it is the original value 343 of `path` unchanged. 344 345 """ 346 for regex, result, pattern_sep, result_sep in self.aliases: 347 m = regex.match(path) 348 if m: 349 new = path.replace(m.group(0), result) 350 if pattern_sep != result_sep: 351 new = new.replace(pattern_sep, result_sep) 352 new = canonical_filename(new) 353 return new 354 return path 355 356 357def find_python_files(dirname): 358 """Yield all of the importable Python files in `dirname`, recursively. 359 360 To be importable, the files have to be in a directory with a __init__.py, 361 except for `dirname` itself, which isn't required to have one. The 362 assumption is that `dirname` was specified directly, so the user knows 363 best, but sub-directories are checked for a __init__.py to be sure we only 364 find the importable files. 365 366 """ 367 for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dirname)): 368 if i > 0 and '__init__.py' not in filenames: 369 # If a directory doesn't have __init__.py, then it isn't 370 # importable and neither are its files 371 del dirnames[:] 372 continue 373 for filename in filenames: 374 # We're only interested in files that look like reasonable Python 375 # files: Must end with .py or .pyw, and must not have certain funny 376 # characters that probably mean they are editor junk. 377 if re.match(r"^[^.#~!$@%^&*()+=,]+\.pyw?$", filename): 378 yield os.path.join(dirpath, filename) 379