1#! /usr/bin/env python
2
3"""The Tab Nanny despises ambiguous indentation.  She knows no mercy.
4
5tabnanny -- Detection of ambiguous indentation
6
7For the time being this module is intended to be called as a script.
8However it is possible to import it into an IDE and use the function
9check() described below.
10
11Warning: The API provided by this module is likely to change in future
12releases; such changes may not be backward compatible.
13"""
14
15# Released to the public domain, by Tim Peters, 15 April 1998.
16
17# XXX Note: this is now a standard library module.
18# XXX The API needs to undergo changes however; the current code is too
19# XXX script-like.  This will be addressed later.
20
21__version__ = "6"
22
23import os
24import sys
25import getopt
26import tokenize
27if not hasattr(tokenize, 'NL'):
28    raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
29
30__all__ = ["check", "NannyNag", "process_tokens"]
31
32verbose = 0
33filename_only = 0
34
35def errprint(*args):
36    sep = ""
37    for arg in args:
38        sys.stderr.write(sep + str(arg))
39        sep = " "
40    sys.stderr.write("\n")
41
42def main():
43    global verbose, filename_only
44    try:
45        opts, args = getopt.getopt(sys.argv[1:], "qv")
46    except getopt.error, msg:
47        errprint(msg)
48        return
49    for o, a in opts:
50        if o == '-q':
51            filename_only = filename_only + 1
52        if o == '-v':
53            verbose = verbose + 1
54    if not args:
55        errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
56        return
57    for arg in args:
58        check(arg)
59
60class NannyNag(Exception):
61    """
62    Raised by process_tokens() if detecting an ambiguous indent.
63    Captured and handled in check().
64    """
65    def __init__(self, lineno, msg, line):
66        self.lineno, self.msg, self.line = lineno, msg, line
67    def get_lineno(self):
68        return self.lineno
69    def get_msg(self):
70        return self.msg
71    def get_line(self):
72        return self.line
73
74def check(file):
75    """check(file_or_dir)
76
77    If file_or_dir is a directory and not a symbolic link, then recursively
78    descend the directory tree named by file_or_dir, checking all .py files
79    along the way. If file_or_dir is an ordinary Python source file, it is
80    checked for whitespace related problems. The diagnostic messages are
81    written to standard output using the print statement.
82    """
83
84    if os.path.isdir(file) and not os.path.islink(file):
85        if verbose:
86            print "%r: listing directory" % (file,)
87        names = os.listdir(file)
88        for name in names:
89            fullname = os.path.join(file, name)
90            if (os.path.isdir(fullname) and
91                not os.path.islink(fullname) or
92                os.path.normcase(name[-3:]) == ".py"):
93                check(fullname)
94        return
95
96    try:
97        f = open(file)
98    except IOError, msg:
99        errprint("%r: I/O Error: %s" % (file, msg))
100        return
101
102    if verbose > 1:
103        print "checking %r ..." % file
104
105    try:
106        process_tokens(tokenize.generate_tokens(f.readline))
107
108    except tokenize.TokenError, msg:
109        errprint("%r: Token Error: %s" % (file, msg))
110        return
111
112    except IndentationError, msg:
113        errprint("%r: Indentation Error: %s" % (file, msg))
114        return
115
116    except NannyNag, nag:
117        badline = nag.get_lineno()
118        line = nag.get_line()
119        if verbose:
120            print "%r: *** Line %d: trouble in tab city! ***" % (file, badline)
121            print "offending line: %r" % (line,)
122            print nag.get_msg()
123        else:
124            if ' ' in file: file = '"' + file + '"'
125            if filename_only: print file
126            else: print file, badline, repr(line)
127        return
128
129    if verbose:
130        print "%r: Clean bill of health." % (file,)
131
132class Whitespace:
133    # the characters used for space and tab
134    S, T = ' \t'
135
136    # members:
137    #   raw
138    #       the original string
139    #   n
140    #       the number of leading whitespace characters in raw
141    #   nt
142    #       the number of tabs in raw[:n]
143    #   norm
144    #       the normal form as a pair (count, trailing), where:
145    #       count
146    #           a tuple such that raw[:n] contains count[i]
147    #           instances of S * i + T
148    #       trailing
149    #           the number of trailing spaces in raw[:n]
150    #       It's A Theorem that m.indent_level(t) ==
151    #       n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
152    #   is_simple
153    #       true iff raw[:n] is of the form (T*)(S*)
154
155    def __init__(self, ws):
156        self.raw  = ws
157        S, T = Whitespace.S, Whitespace.T
158        count = []
159        b = n = nt = 0
160        for ch in self.raw:
161            if ch == S:
162                n = n + 1
163                b = b + 1
164            elif ch == T:
165                n = n + 1
166                nt = nt + 1
167                if b >= len(count):
168                    count = count + [0] * (b - len(count) + 1)
169                count[b] = count[b] + 1
170                b = 0
171            else:
172                break
173        self.n    = n
174        self.nt   = nt
175        self.norm = tuple(count), b
176        self.is_simple = len(count) <= 1
177
178    # return length of longest contiguous run of spaces (whether or not
179    # preceding a tab)
180    def longest_run_of_spaces(self):
181        count, trailing = self.norm
182        return max(len(count)-1, trailing)
183
184    def indent_level(self, tabsize):
185        # count, il = self.norm
186        # for i in range(len(count)):
187        #    if count[i]:
188        #        il = il + (i/tabsize + 1)*tabsize * count[i]
189        # return il
190
191        # quicker:
192        # il = trailing + sum (i/ts + 1)*ts*count[i] =
193        # trailing + ts * sum (i/ts + 1)*count[i] =
194        # trailing + ts * sum i/ts*count[i] + count[i] =
195        # trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] =
196        # trailing + ts * [(sum i/ts*count[i]) + num_tabs]
197        # and note that i/ts*count[i] is 0 when i < ts
198
199        count, trailing = self.norm
200        il = 0
201        for i in range(tabsize, len(count)):
202            il = il + i/tabsize * count[i]
203        return trailing + tabsize * (il + self.nt)
204
205    # return true iff self.indent_level(t) == other.indent_level(t)
206    # for all t >= 1
207    def equal(self, other):
208        return self.norm == other.norm
209
210    # return a list of tuples (ts, i1, i2) such that
211    # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
212    # Intended to be used after not self.equal(other) is known, in which
213    # case it will return at least one witnessing tab size.
214    def not_equal_witness(self, other):
215        n = max(self.longest_run_of_spaces(),
216                other.longest_run_of_spaces()) + 1
217        a = []
218        for ts in range(1, n+1):
219            if self.indent_level(ts) != other.indent_level(ts):
220                a.append( (ts,
221                           self.indent_level(ts),
222                           other.indent_level(ts)) )
223        return a
224
225    # Return True iff self.indent_level(t) < other.indent_level(t)
226    # for all t >= 1.
227    # The algorithm is due to Vincent Broman.
228    # Easy to prove it's correct.
229    # XXXpost that.
230    # Trivial to prove n is sharp (consider T vs ST).
231    # Unknown whether there's a faster general way.  I suspected so at
232    # first, but no longer.
233    # For the special (but common!) case where M and N are both of the
234    # form (T*)(S*), M.less(N) iff M.len() < N.len() and
235    # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
236    # XXXwrite that up.
237    # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
238    def less(self, other):
239        if self.n >= other.n:
240            return False
241        if self.is_simple and other.is_simple:
242            return self.nt <= other.nt
243        n = max(self.longest_run_of_spaces(),
244                other.longest_run_of_spaces()) + 1
245        # the self.n >= other.n test already did it for ts=1
246        for ts in range(2, n+1):
247            if self.indent_level(ts) >= other.indent_level(ts):
248                return False
249        return True
250
251    # return a list of tuples (ts, i1, i2) such that
252    # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
253    # Intended to be used after not self.less(other) is known, in which
254    # case it will return at least one witnessing tab size.
255    def not_less_witness(self, other):
256        n = max(self.longest_run_of_spaces(),
257                other.longest_run_of_spaces()) + 1
258        a = []
259        for ts in range(1, n+1):
260            if self.indent_level(ts) >= other.indent_level(ts):
261                a.append( (ts,
262                           self.indent_level(ts),
263                           other.indent_level(ts)) )
264        return a
265
266def format_witnesses(w):
267    firsts = map(lambda tup: str(tup[0]), w)
268    prefix = "at tab size"
269    if len(w) > 1:
270        prefix = prefix + "s"
271    return prefix + " " + ', '.join(firsts)
272
273def process_tokens(tokens):
274    INDENT = tokenize.INDENT
275    DEDENT = tokenize.DEDENT
276    NEWLINE = tokenize.NEWLINE
277    JUNK = tokenize.COMMENT, tokenize.NL
278    indents = [Whitespace("")]
279    check_equal = 0
280
281    for (type, token, start, end, line) in tokens:
282        if type == NEWLINE:
283            # a program statement, or ENDMARKER, will eventually follow,
284            # after some (possibly empty) run of tokens of the form
285            #     (NL | COMMENT)* (INDENT | DEDENT+)?
286            # If an INDENT appears, setting check_equal is wrong, and will
287            # be undone when we see the INDENT.
288            check_equal = 1
289
290        elif type == INDENT:
291            check_equal = 0
292            thisguy = Whitespace(token)
293            if not indents[-1].less(thisguy):
294                witness = indents[-1].not_less_witness(thisguy)
295                msg = "indent not greater e.g. " + format_witnesses(witness)
296                raise NannyNag(start[0], msg, line)
297            indents.append(thisguy)
298
299        elif type == DEDENT:
300            # there's nothing we need to check here!  what's important is
301            # that when the run of DEDENTs ends, the indentation of the
302            # program statement (or ENDMARKER) that triggered the run is
303            # equal to what's left at the top of the indents stack
304
305            # Ouch!  This assert triggers if the last line of the source
306            # is indented *and* lacks a newline -- then DEDENTs pop out
307            # of thin air.
308            # assert check_equal  # else no earlier NEWLINE, or an earlier INDENT
309            check_equal = 1
310
311            del indents[-1]
312
313        elif check_equal and type not in JUNK:
314            # this is the first "real token" following a NEWLINE, so it
315            # must be the first token of the next program statement, or an
316            # ENDMARKER; the "line" argument exposes the leading whitespace
317            # for this statement; in the case of ENDMARKER, line is an empty
318            # string, so will properly match the empty string with which the
319            # "indents" stack was seeded
320            check_equal = 0
321            thisguy = Whitespace(line)
322            if not indents[-1].equal(thisguy):
323                witness = indents[-1].not_equal_witness(thisguy)
324                msg = "indent not equal e.g. " + format_witnesses(witness)
325                raise NannyNag(start[0], msg, line)
326
327
328if __name__ == '__main__':
329    main()
330