1"""Common operations on Posix pathnames.
2
3Instead of importing this module directly, import os and refer to
4this module as os.path.  The "os.path" name is an alias for this
5module on Posix systems; on other systems (e.g. Mac, Windows),
6os.path provides the same operations in a manner specific to that
7platform, and is an alias to another module (e.g. macpath, ntpath).
8
9Some of this can actually be useful on non-Posix systems too, e.g.
10for manipulation of the pathname component of URLs.
11"""
12
13import os
14import sys
15import stat
16import genericpath
17import warnings
18from genericpath import *
19from genericpath import _unicode
20
21__all__ = ["normcase","isabs","join","splitdrive","split","splitext",
22           "basename","dirname","commonprefix","getsize","getmtime",
23           "getatime","getctime","islink","exists","lexists","isdir","isfile",
24           "ismount","walk","expanduser","expandvars","normpath","abspath",
25           "samefile","sameopenfile","samestat",
26           "curdir","pardir","sep","pathsep","defpath","altsep","extsep",
27           "devnull","realpath","supports_unicode_filenames","relpath"]
28
29# strings representing various path-related bits and pieces
30curdir = '.'
31pardir = '..'
32extsep = '.'
33sep = '/'
34pathsep = ':'
35defpath = ':/bin:/usr/bin'
36altsep = None
37devnull = '/dev/null'
38
39# Normalize the case of a pathname.  Trivial in Posix, string.lower on Mac.
40# On MS-DOS this may also turn slashes into backslashes; however, other
41# normalizations (such as optimizing '../' away) are not allowed
42# (another function should be defined to do that).
43
44def normcase(s):
45    """Normalize case of pathname.  Has no effect under Posix"""
46    return s
47
48
49# Return whether a path is absolute.
50# Trivial in Posix, harder on the Mac or MS-DOS.
51
52def isabs(s):
53    """Test whether a path is absolute"""
54    return s.startswith('/')
55
56
57# Join pathnames.
58# Ignore the previous parts if a part is absolute.
59# Insert a '/' unless the first part is empty or already ends in '/'.
60
61def join(a, *p):
62    """Join two or more pathname components, inserting '/' as needed.
63    If any component is an absolute path, all previous path components
64    will be discarded.  An empty last part will result in a path that
65    ends with a separator."""
66    path = a
67    for b in p:
68        if b.startswith('/'):
69            path = b
70        elif path == '' or path.endswith('/'):
71            path +=  b
72        else:
73            path += '/' + b
74    return path
75
76
77# Split a path in head (everything up to the last '/') and tail (the
78# rest).  If the path ends in '/', tail will be empty.  If there is no
79# '/' in the path, head  will be empty.
80# Trailing '/'es are stripped from head unless it is the root.
81
82def split(p):
83    """Split a pathname.  Returns tuple "(head, tail)" where "tail" is
84    everything after the final slash.  Either part may be empty."""
85    i = p.rfind('/') + 1
86    head, tail = p[:i], p[i:]
87    if head and head != '/'*len(head):
88        head = head.rstrip('/')
89    return head, tail
90
91
92# Split a path in root and extension.
93# The extension is everything starting at the last dot in the last
94# pathname component; the root is everything before that.
95# It is always true that root + ext == p.
96
97def splitext(p):
98    return genericpath._splitext(p, sep, altsep, extsep)
99splitext.__doc__ = genericpath._splitext.__doc__
100
101# Split a pathname into a drive specification and the rest of the
102# path.  Useful on DOS/Windows/NT; on Unix, the drive is always empty.
103
104def splitdrive(p):
105    """Split a pathname into drive and path. On Posix, drive is always
106    empty."""
107    return '', p
108
109
110# Return the tail (basename) part of a path, same as split(path)[1].
111
112def basename(p):
113    """Returns the final component of a pathname"""
114    i = p.rfind('/') + 1
115    return p[i:]
116
117
118# Return the head (dirname) part of a path, same as split(path)[0].
119
120def dirname(p):
121    """Returns the directory component of a pathname"""
122    i = p.rfind('/') + 1
123    head = p[:i]
124    if head and head != '/'*len(head):
125        head = head.rstrip('/')
126    return head
127
128
129# Is a path a symbolic link?
130# This will always return false on systems where os.lstat doesn't exist.
131
132def islink(path):
133    """Test whether a path is a symbolic link"""
134    try:
135        st = os.lstat(path)
136    except (os.error, AttributeError):
137        return False
138    return stat.S_ISLNK(st.st_mode)
139
140# Being true for dangling symbolic links is also useful.
141
142def lexists(path):
143    """Test whether a path exists.  Returns True for broken symbolic links"""
144    try:
145        os.lstat(path)
146    except os.error:
147        return False
148    return True
149
150
151# Are two filenames really pointing to the same file?
152
153def samefile(f1, f2):
154    """Test whether two pathnames reference the same actual file"""
155    s1 = os.stat(f1)
156    s2 = os.stat(f2)
157    return samestat(s1, s2)
158
159
160# Are two open files really referencing the same file?
161# (Not necessarily the same file descriptor!)
162
163def sameopenfile(fp1, fp2):
164    """Test whether two open file objects reference the same file"""
165    s1 = os.fstat(fp1)
166    s2 = os.fstat(fp2)
167    return samestat(s1, s2)
168
169
170# Are two stat buffers (obtained from stat, fstat or lstat)
171# describing the same file?
172
173def samestat(s1, s2):
174    """Test whether two stat buffers reference the same file"""
175    return s1.st_ino == s2.st_ino and \
176           s1.st_dev == s2.st_dev
177
178
179# Is a path a mount point?
180# (Does this work for all UNIXes?  Is it even guaranteed to work by Posix?)
181
182def ismount(path):
183    """Test whether a path is a mount point"""
184    if islink(path):
185        # A symlink can never be a mount point
186        return False
187    try:
188        s1 = os.lstat(path)
189        s2 = os.lstat(realpath(join(path, '..')))
190    except os.error:
191        return False # It doesn't exist -- so not a mount point :-)
192    dev1 = s1.st_dev
193    dev2 = s2.st_dev
194    if dev1 != dev2:
195        return True     # path/.. on a different device as path
196    ino1 = s1.st_ino
197    ino2 = s2.st_ino
198    if ino1 == ino2:
199        return True     # path/.. is the same i-node as path
200    return False
201
202
203# Directory tree walk.
204# For each directory under top (including top itself, but excluding
205# '.' and '..'), func(arg, dirname, filenames) is called, where
206# dirname is the name of the directory and filenames is the list
207# of files (and subdirectories etc.) in the directory.
208# The func may modify the filenames list, to implement a filter,
209# or to impose a different order of visiting.
210
211def walk(top, func, arg):
212    """Directory tree walk with callback function.
213
214    For each directory in the directory tree rooted at top (including top
215    itself, but excluding '.' and '..'), call func(arg, dirname, fnames).
216    dirname is the name of the directory, and fnames a list of the names of
217    the files and subdirectories in dirname (excluding '.' and '..').  func
218    may modify the fnames list in-place (e.g. via del or slice assignment),
219    and walk will only recurse into the subdirectories whose names remain in
220    fnames; this can be used to implement a filter, or to impose a specific
221    order of visiting.  No semantics are defined for, or required of, arg,
222    beyond that arg is always passed to func.  It can be used, e.g., to pass
223    a filename pattern, or a mutable object designed to accumulate
224    statistics.  Passing None for arg is common."""
225    warnings.warnpy3k("In 3.x, os.path.walk is removed in favor of os.walk.",
226                      stacklevel=2)
227    try:
228        names = os.listdir(top)
229    except os.error:
230        return
231    func(arg, top, names)
232    for name in names:
233        name = join(top, name)
234        try:
235            st = os.lstat(name)
236        except os.error:
237            continue
238        if stat.S_ISDIR(st.st_mode):
239            walk(name, func, arg)
240
241
242# Expand paths beginning with '~' or '~user'.
243# '~' means $HOME; '~user' means that user's home directory.
244# If the path doesn't begin with '~', or if the user or $HOME is unknown,
245# the path is returned unchanged (leaving error reporting to whatever
246# function is called with the expanded path as argument).
247# See also module 'glob' for expansion of *, ? and [...] in pathnames.
248# (A function should also be defined to do full *sh-style environment
249# variable expansion.)
250
251def expanduser(path):
252    """Expand ~ and ~user constructions.  If user or $HOME is unknown,
253    do nothing."""
254    if not path.startswith('~'):
255        return path
256    i = path.find('/', 1)
257    if i < 0:
258        i = len(path)
259    if i == 1:
260        if 'HOME' not in os.environ:
261            import pwd
262            userhome = pwd.getpwuid(os.getuid()).pw_dir
263        else:
264            userhome = os.environ['HOME']
265    else:
266        import pwd
267        try:
268            pwent = pwd.getpwnam(path[1:i])
269        except KeyError:
270            return path
271        userhome = pwent.pw_dir
272    userhome = userhome.rstrip('/')
273    return (userhome + path[i:]) or '/'
274
275
276# Expand paths containing shell variable substitutions.
277# This expands the forms $variable and ${variable} only.
278# Non-existent variables are left unchanged.
279
280_varprog = None
281_uvarprog = None
282
283def expandvars(path):
284    """Expand shell variables of form $var and ${var}.  Unknown variables
285    are left unchanged."""
286    global _varprog, _uvarprog
287    if '$' not in path:
288        return path
289    if isinstance(path, _unicode):
290        if not _uvarprog:
291            import re
292            _uvarprog = re.compile(ur'\$(\w+|\{[^}]*\})', re.UNICODE)
293        varprog = _uvarprog
294        encoding = sys.getfilesystemencoding()
295    else:
296        if not _varprog:
297            import re
298            _varprog = re.compile(r'\$(\w+|\{[^}]*\})')
299        varprog = _varprog
300        encoding = None
301    i = 0
302    while True:
303        m = varprog.search(path, i)
304        if not m:
305            break
306        i, j = m.span(0)
307        name = m.group(1)
308        if name.startswith('{') and name.endswith('}'):
309            name = name[1:-1]
310        if encoding:
311            name = name.encode(encoding)
312        if name in os.environ:
313            tail = path[j:]
314            value = os.environ[name]
315            if encoding:
316                value = value.decode(encoding)
317            path = path[:i] + value
318            i = len(path)
319            path += tail
320        else:
321            i = j
322    return path
323
324
325# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B.
326# It should be understood that this may change the meaning of the path
327# if it contains symbolic links!
328
329def normpath(path):
330    """Normalize path, eliminating double slashes, etc."""
331    # Preserve unicode (if path is unicode)
332    slash, dot = (u'/', u'.') if isinstance(path, _unicode) else ('/', '.')
333    if path == '':
334        return dot
335    initial_slashes = path.startswith('/')
336    # POSIX allows one or two initial slashes, but treats three or more
337    # as single slash.
338    if (initial_slashes and
339        path.startswith('//') and not path.startswith('///')):
340        initial_slashes = 2
341    comps = path.split('/')
342    new_comps = []
343    for comp in comps:
344        if comp in ('', '.'):
345            continue
346        if (comp != '..' or (not initial_slashes and not new_comps) or
347             (new_comps and new_comps[-1] == '..')):
348            new_comps.append(comp)
349        elif new_comps:
350            new_comps.pop()
351    comps = new_comps
352    path = slash.join(comps)
353    if initial_slashes:
354        path = slash*initial_slashes + path
355    return path or dot
356
357
358def abspath(path):
359    """Return an absolute path."""
360    if not isabs(path):
361        if isinstance(path, _unicode):
362            cwd = os.getcwdu()
363        else:
364            cwd = os.getcwd()
365        path = join(cwd, path)
366    return normpath(path)
367
368
369# Return a canonical path (i.e. the absolute location of a file on the
370# filesystem).
371
372def realpath(filename):
373    """Return the canonical path of the specified filename, eliminating any
374symbolic links encountered in the path."""
375    path, ok = _joinrealpath('', filename, {})
376    return abspath(path)
377
378# Join two paths, normalizing and eliminating any symbolic links
379# encountered in the second path.
380def _joinrealpath(path, rest, seen):
381    if isabs(rest):
382        rest = rest[1:]
383        path = sep
384
385    while rest:
386        name, _, rest = rest.partition(sep)
387        if not name or name == curdir:
388            # current dir
389            continue
390        if name == pardir:
391            # parent dir
392            if path:
393                path, name = split(path)
394                if name == pardir:
395                    path = join(path, pardir, pardir)
396            else:
397                path = pardir
398            continue
399        newpath = join(path, name)
400        if not islink(newpath):
401            path = newpath
402            continue
403        # Resolve the symbolic link
404        if newpath in seen:
405            # Already seen this path
406            path = seen[newpath]
407            if path is not None:
408                # use cached value
409                continue
410            # The symlink is not resolved, so we must have a symlink loop.
411            # Return already resolved part + rest of the path unchanged.
412            return join(newpath, rest), False
413        seen[newpath] = None # not resolved symlink
414        path, ok = _joinrealpath(path, os.readlink(newpath), seen)
415        if not ok:
416            return join(path, rest), False
417        seen[newpath] = path # resolved symlink
418
419    return path, True
420
421
422supports_unicode_filenames = (sys.platform == 'darwin')
423
424def relpath(path, start=curdir):
425    """Return a relative version of a path"""
426
427    if not path:
428        raise ValueError("no path specified")
429
430    start_list = [x for x in abspath(start).split(sep) if x]
431    path_list = [x for x in abspath(path).split(sep) if x]
432
433    # Work out how much of the filepath is shared by start and path.
434    i = len(commonprefix([start_list, path_list]))
435
436    rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
437    if not rel_list:
438        return curdir
439    return join(*rel_list)
440