1"""Common operations on Posix pathnames. 2 3Instead of importing this module directly, import os and refer to 4this module as os.path. The "os.path" name is an alias for this 5module on Posix systems; on other systems (e.g. Mac, Windows), 6os.path provides the same operations in a manner specific to that 7platform, and is an alias to another module (e.g. macpath, ntpath). 8 9Some of this can actually be useful on non-Posix systems too, e.g. 10for manipulation of the pathname component of URLs. 11""" 12 13import os 14import sys 15import stat 16import genericpath 17import warnings 18from genericpath import * 19 20__all__ = ["normcase","isabs","join","splitdrive","split","splitext", 21 "basename","dirname","commonprefix","getsize","getmtime", 22 "getatime","getctime","islink","exists","lexists","isdir","isfile", 23 "ismount","walk","expanduser","expandvars","normpath","abspath", 24 "samefile","sameopenfile","samestat", 25 "curdir","pardir","sep","pathsep","defpath","altsep","extsep", 26 "devnull","realpath","supports_unicode_filenames","relpath"] 27 28# strings representing various path-related bits and pieces 29curdir = '.' 30pardir = '..' 31extsep = '.' 32sep = '/' 33pathsep = ':' 34defpath = ':/bin:/usr/bin' 35altsep = None 36devnull = '/dev/null' 37 38# Normalize the case of a pathname. Trivial in Posix, string.lower on Mac. 39# On MS-DOS this may also turn slashes into backslashes; however, other 40# normalizations (such as optimizing '../' away) are not allowed 41# (another function should be defined to do that). 42 43def normcase(s): 44 """Normalize case of pathname. Has no effect under Posix""" 45 return s 46 47 48# Return whether a path is absolute. 49# Trivial in Posix, harder on the Mac or MS-DOS. 50 51def isabs(s): 52 """Test whether a path is absolute""" 53 return s.startswith('/') 54 55 56# Join pathnames. 57# Ignore the previous parts if a part is absolute. 58# Insert a '/' unless the first part is empty or already ends in '/'. 59 60def join(a, *p): 61 """Join two or more pathname components, inserting '/' as needed. 62 If any component is an absolute path, all previous path components 63 will be discarded.""" 64 path = a 65 for b in p: 66 if b.startswith('/'): 67 path = b 68 elif path == '' or path.endswith('/'): 69 path += b 70 else: 71 path += '/' + b 72 return path 73 74 75# Split a path in head (everything up to the last '/') and tail (the 76# rest). If the path ends in '/', tail will be empty. If there is no 77# '/' in the path, head will be empty. 78# Trailing '/'es are stripped from head unless it is the root. 79 80def split(p): 81 """Split a pathname. Returns tuple "(head, tail)" where "tail" is 82 everything after the final slash. Either part may be empty.""" 83 i = p.rfind('/') + 1 84 head, tail = p[:i], p[i:] 85 if head and head != '/'*len(head): 86 head = head.rstrip('/') 87 return head, tail 88 89 90# Split a path in root and extension. 91# The extension is everything starting at the last dot in the last 92# pathname component; the root is everything before that. 93# It is always true that root + ext == p. 94 95def splitext(p): 96 return genericpath._splitext(p, sep, altsep, extsep) 97splitext.__doc__ = genericpath._splitext.__doc__ 98 99# Split a pathname into a drive specification and the rest of the 100# path. Useful on DOS/Windows/NT; on Unix, the drive is always empty. 101 102def splitdrive(p): 103 """Split a pathname into drive and path. On Posix, drive is always 104 empty.""" 105 return '', p 106 107 108# Return the tail (basename) part of a path, same as split(path)[1]. 109 110def basename(p): 111 """Returns the final component of a pathname""" 112 i = p.rfind('/') + 1 113 return p[i:] 114 115 116# Return the head (dirname) part of a path, same as split(path)[0]. 117 118def dirname(p): 119 """Returns the directory component of a pathname""" 120 i = p.rfind('/') + 1 121 head = p[:i] 122 if head and head != '/'*len(head): 123 head = head.rstrip('/') 124 return head 125 126 127# Is a path a symbolic link? 128# This will always return false on systems where os.lstat doesn't exist. 129 130def islink(path): 131 """Test whether a path is a symbolic link""" 132 try: 133 st = os.lstat(path) 134 except (os.error, AttributeError): 135 return False 136 return stat.S_ISLNK(st.st_mode) 137 138# Being true for dangling symbolic links is also useful. 139 140def lexists(path): 141 """Test whether a path exists. Returns True for broken symbolic links""" 142 try: 143 os.lstat(path) 144 except os.error: 145 return False 146 return True 147 148 149# Are two filenames really pointing to the same file? 150 151def samefile(f1, f2): 152 """Test whether two pathnames reference the same actual file""" 153 s1 = os.stat(f1) 154 s2 = os.stat(f2) 155 return samestat(s1, s2) 156 157 158# Are two open files really referencing the same file? 159# (Not necessarily the same file descriptor!) 160 161def sameopenfile(fp1, fp2): 162 """Test whether two open file objects reference the same file""" 163 s1 = os.fstat(fp1) 164 s2 = os.fstat(fp2) 165 return samestat(s1, s2) 166 167 168# Are two stat buffers (obtained from stat, fstat or lstat) 169# describing the same file? 170 171def samestat(s1, s2): 172 """Test whether two stat buffers reference the same file""" 173 return s1.st_ino == s2.st_ino and \ 174 s1.st_dev == s2.st_dev 175 176 177# Is a path a mount point? 178# (Does this work for all UNIXes? Is it even guaranteed to work by Posix?) 179 180def ismount(path): 181 """Test whether a path is a mount point""" 182 if islink(path): 183 # A symlink can never be a mount point 184 return False 185 try: 186 s1 = os.lstat(path) 187 s2 = os.lstat(join(path, '..')) 188 except os.error: 189 return False # It doesn't exist -- so not a mount point :-) 190 dev1 = s1.st_dev 191 dev2 = s2.st_dev 192 if dev1 != dev2: 193 return True # path/.. on a different device as path 194 ino1 = s1.st_ino 195 ino2 = s2.st_ino 196 if ino1 == ino2: 197 return True # path/.. is the same i-node as path 198 return False 199 200 201# Directory tree walk. 202# For each directory under top (including top itself, but excluding 203# '.' and '..'), func(arg, dirname, filenames) is called, where 204# dirname is the name of the directory and filenames is the list 205# of files (and subdirectories etc.) in the directory. 206# The func may modify the filenames list, to implement a filter, 207# or to impose a different order of visiting. 208 209def walk(top, func, arg): 210 """Directory tree walk with callback function. 211 212 For each directory in the directory tree rooted at top (including top 213 itself, but excluding '.' and '..'), call func(arg, dirname, fnames). 214 dirname is the name of the directory, and fnames a list of the names of 215 the files and subdirectories in dirname (excluding '.' and '..'). func 216 may modify the fnames list in-place (e.g. via del or slice assignment), 217 and walk will only recurse into the subdirectories whose names remain in 218 fnames; this can be used to implement a filter, or to impose a specific 219 order of visiting. No semantics are defined for, or required of, arg, 220 beyond that arg is always passed to func. It can be used, e.g., to pass 221 a filename pattern, or a mutable object designed to accumulate 222 statistics. Passing None for arg is common.""" 223 warnings.warnpy3k("In 3.x, os.path.walk is removed in favor of os.walk.", 224 stacklevel=2) 225 try: 226 names = os.listdir(top) 227 except os.error: 228 return 229 func(arg, top, names) 230 for name in names: 231 name = join(top, name) 232 try: 233 st = os.lstat(name) 234 except os.error: 235 continue 236 if stat.S_ISDIR(st.st_mode): 237 walk(name, func, arg) 238 239 240# Expand paths beginning with '~' or '~user'. 241# '~' means $HOME; '~user' means that user's home directory. 242# If the path doesn't begin with '~', or if the user or $HOME is unknown, 243# the path is returned unchanged (leaving error reporting to whatever 244# function is called with the expanded path as argument). 245# See also module 'glob' for expansion of *, ? and [...] in pathnames. 246# (A function should also be defined to do full *sh-style environment 247# variable expansion.) 248 249def expanduser(path): 250 """Expand ~ and ~user constructions. If user or $HOME is unknown, 251 do nothing.""" 252 if not path.startswith('~'): 253 return path 254 i = path.find('/', 1) 255 if i < 0: 256 i = len(path) 257 if i == 1: 258 if 'HOME' not in os.environ: 259 import pwd 260 userhome = pwd.getpwuid(os.getuid()).pw_dir 261 else: 262 userhome = os.environ['HOME'] 263 else: 264 import pwd 265 try: 266 pwent = pwd.getpwnam(path[1:i]) 267 except KeyError: 268 return path 269 userhome = pwent.pw_dir 270 userhome = userhome.rstrip('/') or userhome 271 return userhome + path[i:] 272 273 274# Expand paths containing shell variable substitutions. 275# This expands the forms $variable and ${variable} only. 276# Non-existent variables are left unchanged. 277 278_varprog = None 279 280def expandvars(path): 281 """Expand shell variables of form $var and ${var}. Unknown variables 282 are left unchanged.""" 283 global _varprog 284 if '$' not in path: 285 return path 286 if not _varprog: 287 import re 288 _varprog = re.compile(r'\$(\w+|\{[^}]*\})') 289 i = 0 290 while True: 291 m = _varprog.search(path, i) 292 if not m: 293 break 294 i, j = m.span(0) 295 name = m.group(1) 296 if name.startswith('{') and name.endswith('}'): 297 name = name[1:-1] 298 if name in os.environ: 299 tail = path[j:] 300 path = path[:i] + os.environ[name] 301 i = len(path) 302 path += tail 303 else: 304 i = j 305 return path 306 307 308# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B. 309# It should be understood that this may change the meaning of the path 310# if it contains symbolic links! 311 312def normpath(path): 313 """Normalize path, eliminating double slashes, etc.""" 314 # Preserve unicode (if path is unicode) 315 slash, dot = (u'/', u'.') if isinstance(path, unicode) else ('/', '.') 316 if path == '': 317 return dot 318 initial_slashes = path.startswith('/') 319 # POSIX allows one or two initial slashes, but treats three or more 320 # as single slash. 321 if (initial_slashes and 322 path.startswith('//') and not path.startswith('///')): 323 initial_slashes = 2 324 comps = path.split('/') 325 new_comps = [] 326 for comp in comps: 327 if comp in ('', '.'): 328 continue 329 if (comp != '..' or (not initial_slashes and not new_comps) or 330 (new_comps and new_comps[-1] == '..')): 331 new_comps.append(comp) 332 elif new_comps: 333 new_comps.pop() 334 comps = new_comps 335 path = slash.join(comps) 336 if initial_slashes: 337 path = slash*initial_slashes + path 338 return path or dot 339 340 341def abspath(path): 342 """Return an absolute path.""" 343 if not isabs(path): 344 if isinstance(path, unicode): 345 cwd = os.getcwdu() 346 else: 347 cwd = os.getcwd() 348 path = join(cwd, path) 349 return normpath(path) 350 351 352# Return a canonical path (i.e. the absolute location of a file on the 353# filesystem). 354 355def realpath(filename): 356 """Return the canonical path of the specified filename, eliminating any 357symbolic links encountered in the path.""" 358 if isabs(filename): 359 bits = ['/'] + filename.split('/')[1:] 360 else: 361 bits = [''] + filename.split('/') 362 363 for i in range(2, len(bits)+1): 364 component = join(*bits[0:i]) 365 # Resolve symbolic links. 366 if islink(component): 367 resolved = _resolve_link(component) 368 if resolved is None: 369 # Infinite loop -- return original component + rest of the path 370 return abspath(join(*([component] + bits[i:]))) 371 else: 372 newpath = join(*([resolved] + bits[i:])) 373 return realpath(newpath) 374 375 return abspath(filename) 376 377 378def _resolve_link(path): 379 """Internal helper function. Takes a path and follows symlinks 380 until we either arrive at something that isn't a symlink, or 381 encounter a path we've seen before (meaning that there's a loop). 382 """ 383 paths_seen = set() 384 while islink(path): 385 if path in paths_seen: 386 # Already seen this path, so we must have a symlink loop 387 return None 388 paths_seen.add(path) 389 # Resolve where the link points to 390 resolved = os.readlink(path) 391 if not isabs(resolved): 392 dir = dirname(path) 393 path = normpath(join(dir, resolved)) 394 else: 395 path = normpath(resolved) 396 return path 397 398supports_unicode_filenames = (sys.platform == 'darwin') 399 400def relpath(path, start=curdir): 401 """Return a relative version of a path""" 402 403 if not path: 404 raise ValueError("no path specified") 405 406 start_list = [x for x in abspath(start).split(sep) if x] 407 path_list = [x for x in abspath(path).split(sep) if x] 408 409 # Work out how much of the filepath is shared by start and path. 410 i = len(commonprefix([start_list, path_list])) 411 412 rel_list = [pardir] * (len(start_list)-i) + path_list[i:] 413 if not rel_list: 414 return curdir 415 return join(*rel_list) 416