1#!/usr/bin/env python3
2
3"""\
4List python source files.
5
6There are three functions to check whether a file is a Python source, listed
7here with increasing complexity:
8
9- has_python_ext() checks whether a file name ends in '.py[w]'.
10- look_like_python() checks whether the file is not binary and either has
11  the '.py[w]' extension or the first line contains the word 'python'.
12- can_be_compiled() checks whether the file can be compiled by compile().
13
14The file also must be of appropriate size - not bigger than a megabyte.
15
16walk_python_files() recursively lists all Python files under the given directories.
17"""
18__author__ = "Oleg Broytmann, Georg Brandl"
19
20__all__ = ["has_python_ext", "looks_like_python", "can_be_compiled", "walk_python_files"]
21
22
23import os, re
24
25binary_re = re.compile(br'[\x00-\x08\x0E-\x1F\x7F]')
26
27debug = False
28
29def print_debug(msg):
30    if debug: print(msg)
31
32
33def _open(fullpath):
34    try:
35        size = os.stat(fullpath).st_size
36    except OSError as err: # Permission denied - ignore the file
37        print_debug("%s: permission denied: %s" % (fullpath, err))
38        return None
39
40    if size > 1024*1024: # too big
41        print_debug("%s: the file is too big: %d bytes" % (fullpath, size))
42        return None
43
44    try:
45        return open(fullpath, "rb")
46    except IOError as err: # Access denied, or a special file - ignore it
47        print_debug("%s: access denied: %s" % (fullpath, err))
48        return None
49
50def has_python_ext(fullpath):
51    return fullpath.endswith(".py") or fullpath.endswith(".pyw")
52
53def looks_like_python(fullpath):
54    infile = _open(fullpath)
55    if infile is None:
56        return False
57
58    with infile:
59        line = infile.readline()
60
61    if binary_re.search(line):
62        # file appears to be binary
63        print_debug("%s: appears to be binary" % fullpath)
64        return False
65
66    if fullpath.endswith(".py") or fullpath.endswith(".pyw"):
67        return True
68    elif b"python" in line:
69        # disguised Python script (e.g. CGI)
70        return True
71
72    return False
73
74def can_be_compiled(fullpath):
75    infile = _open(fullpath)
76    if infile is None:
77        return False
78
79    with infile:
80        code = infile.read()
81
82    try:
83        compile(code, fullpath, "exec")
84    except Exception as err:
85        print_debug("%s: cannot compile: %s" % (fullpath, err))
86        return False
87
88    return True
89
90
91def walk_python_files(paths, is_python=looks_like_python, exclude_dirs=None):
92    """\
93    Recursively yield all Python source files below the given paths.
94
95    paths: a list of files and/or directories to be checked.
96    is_python: a function that takes a file name and checks whether it is a
97               Python source file
98    exclude_dirs: a list of directory base names that should be excluded in
99                  the search
100    """
101    if exclude_dirs is None:
102        exclude_dirs=[]
103
104    for path in paths:
105        print_debug("testing: %s" % path)
106        if os.path.isfile(path):
107            if is_python(path):
108                yield path
109        elif os.path.isdir(path):
110            print_debug("    it is a directory")
111            for dirpath, dirnames, filenames in os.walk(path):
112                for exclude in exclude_dirs:
113                    if exclude in dirnames:
114                        dirnames.remove(exclude)
115                for filename in filenames:
116                    fullpath = os.path.join(dirpath, filename)
117                    print_debug("testing: %s" % fullpath)
118                    if is_python(fullpath):
119                        yield fullpath
120        else:
121            print_debug("    unknown type")
122
123
124if __name__ == "__main__":
125    # Two simple examples/tests
126    for fullpath in walk_python_files(['.']):
127        print(fullpath)
128    print("----------")
129    for fullpath in walk_python_files(['.'], is_python=can_be_compiled):
130        print(fullpath)
131