1#! /usr/bin/env python
2
3"""Show file statistics by extension."""
4
5from __future__ import print_function
6
7import os
8import sys
9
10class Stats:
11
12    def __init__(self):
13        self.stats = {}
14
15    def statargs(self, args):
16        for arg in args:
17            if os.path.isdir(arg):
18                self.statdir(arg)
19            elif os.path.isfile(arg):
20                self.statfile(arg)
21            else:
22                sys.stderr.write("Can't find %s\n" % arg)
23                self.addstats("<???>", "unknown", 1)
24
25    def statdir(self, dir):
26        self.addstats("<dir>", "dirs", 1)
27        try:
28            names = sorted(os.listdir(dir))
29        except os.error as err:
30            sys.stderr.write("Can't list %s: %s\n" % (dir, err))
31            self.addstats("<dir>", "unlistable", 1)
32            return
33        for name in names:
34            if name.startswith(".#"):
35                continue # Skip CVS temp files
36            if name.endswith("~"):
37                continue# Skip Emacs backup files
38            full = os.path.join(dir, name)
39            if os.path.islink(full):
40                self.addstats("<lnk>", "links", 1)
41            elif os.path.isdir(full):
42                self.statdir(full)
43            else:
44                self.statfile(full)
45
46    def statfile(self, filename):
47        head, ext = os.path.splitext(filename)
48        head, base = os.path.split(filename)
49        if ext == base:
50            ext = "" # E.g. .cvsignore is deemed not to have an extension
51        ext = os.path.normcase(ext)
52        if not ext:
53            ext = "<none>"
54        self.addstats(ext, "files", 1)
55        try:
56            f = open(filename, "rb")
57        except IOError as err:
58            sys.stderr.write("Can't open %s: %s\n" % (filename, err))
59            self.addstats(ext, "unopenable", 1)
60            return
61        data = f.read()
62        f.close()
63        self.addstats(ext, "bytes", len(data))
64        if b'\0' in data:
65            self.addstats(ext, "binary", 1)
66            return
67        if not data:
68            self.addstats(ext, "empty", 1)
69        #self.addstats(ext, "chars", len(data))
70        lines = data.splitlines()
71        self.addstats(ext, "lines", len(lines))
72        del lines
73        words = data.split()
74        self.addstats(ext, "words", len(words))
75
76    def addstats(self, ext, key, n):
77        d = self.stats.setdefault(ext, {})
78        d[key] = d.get(key, 0) + n
79
80    def report(self):
81        exts = sorted(self.stats.keys())
82        # Get the column keys
83        columns = {}
84        for ext in exts:
85            columns.update(self.stats[ext])
86        cols = sorted(columns.keys())
87        colwidth = {}
88        colwidth["ext"] = max([len(ext) for ext in exts])
89        minwidth = 6
90        self.stats["TOTAL"] = {}
91        for col in cols:
92            total = 0
93            cw = max(minwidth, len(col))
94            for ext in exts:
95                value = self.stats[ext].get(col)
96                if value is None:
97                    w = 0
98                else:
99                    w = len("%d" % value)
100                    total += value
101                cw = max(cw, w)
102            cw = max(cw, len(str(total)))
103            colwidth[col] = cw
104            self.stats["TOTAL"][col] = total
105        exts.append("TOTAL")
106        for ext in exts:
107            self.stats[ext]["ext"] = ext
108        cols.insert(0, "ext")
109        def printheader():
110            for col in cols:
111                print("%*s" % (colwidth[col], col), end=" ")
112            print()
113        printheader()
114        for ext in exts:
115            for col in cols:
116                value = self.stats[ext].get(col, "")
117                print("%*s" % (colwidth[col], value), end=" ")
118            print()
119        printheader() # Another header at the bottom
120
121def main():
122    args = sys.argv[1:]
123    if not args:
124        args = [os.curdir]
125    s = Stats()
126    s.statargs(args)
127    s.report()
128
129if __name__ == "__main__":
130    main()
131