1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3
4# Check for stylistic and formal issues in .rst and .py
5# files included in the documentation.
6#
7# 01/2009, Georg Brandl
8
9# TODO: - wrong versions in versionadded/changed
10#       - wrong markup after versionchanged directive
11
12import os
13import re
14import sys
15import getopt
16from os.path import join, splitext, abspath, exists
17from collections import defaultdict
18
19directives = [
20    # standard docutils ones
21    'admonition', 'attention', 'caution', 'class', 'compound', 'container',
22    'contents', 'csv-table', 'danger', 'date', 'default-role', 'epigraph',
23    'error', 'figure', 'footer', 'header', 'highlights', 'hint', 'image',
24    'important', 'include', 'line-block', 'list-table', 'meta', 'note',
25    'parsed-literal', 'pull-quote', 'raw', 'replace',
26    'restructuredtext-test-directive', 'role', 'rubric', 'sectnum', 'sidebar',
27    'table', 'target-notes', 'tip', 'title', 'topic', 'unicode', 'warning',
28    # Sphinx and Python docs custom ones
29    'acks', 'attribute', 'autoattribute', 'autoclass', 'autodata',
30    'autoexception', 'autofunction', 'automethod', 'automodule',
31    'availability', 'centered', 'cfunction', 'class', 'classmethod', 'cmacro',
32    'cmdoption', 'cmember', 'code-block', 'confval', 'cssclass', 'ctype',
33    'currentmodule', 'cvar', 'data', 'decorator', 'decoratormethod',
34    'deprecated-removed', 'deprecated(?!-removed)', 'describe', 'directive',
35    'doctest', 'envvar', 'event', 'exception', 'function', 'glossary',
36    'highlight', 'highlightlang', 'impl-detail', 'index', 'literalinclude',
37    'method', 'miscnews', 'module', 'moduleauthor', 'opcode', 'pdbcommand',
38    'productionlist', 'program', 'role', 'sectionauthor', 'seealso',
39    'sourcecode', 'staticmethod', 'tabularcolumns', 'testcode', 'testoutput',
40    'testsetup', 'toctree', 'todo', 'todolist', 'versionadded',
41    'versionchanged'
42]
43
44all_directives = '(' + '|'.join(directives) + ')'
45seems_directive_re = re.compile(r'(?<!\.)\.\. %s([^a-z:]|:(?!:))' % all_directives)
46default_role_re = re.compile(r'(^| )`\w([^`]*?\w)?`($| )')
47leaked_markup_re = re.compile(r'[a-z]::\s|`|\.\.\s*\w+:')
48
49
50checkers = {}
51
52checker_props = {'severity': 1, 'falsepositives': False}
53
54
55def checker(*suffixes, **kwds):
56    """Decorator to register a function as a checker."""
57    def deco(func):
58        for suffix in suffixes:
59            checkers.setdefault(suffix, []).append(func)
60        for prop in checker_props:
61            setattr(func, prop, kwds.get(prop, checker_props[prop]))
62        return func
63    return deco
64
65
66@checker('.py', severity=4)
67def check_syntax(fn, lines):
68    """Check Python examples for valid syntax."""
69    code = ''.join(lines)
70    if '\r' in code:
71        if os.name != 'nt':
72            yield 0, '\\r in code file'
73        code = code.replace('\r', '')
74    try:
75        compile(code, fn, 'exec')
76    except SyntaxError as err:
77        yield err.lineno, 'not compilable: %s' % err
78
79
80@checker('.rst', severity=2)
81def check_suspicious_constructs(fn, lines):
82    """Check for suspicious reST constructs."""
83    inprod = False
84    for lno, line in enumerate(lines):
85        if seems_directive_re.search(line):
86            yield lno+1, 'comment seems to be intended as a directive'
87        if '.. productionlist::' in line:
88            inprod = True
89        elif not inprod and default_role_re.search(line):
90            yield lno+1, 'default role used'
91        elif inprod and not line.strip():
92            inprod = False
93
94
95@checker('.py', '.rst')
96def check_whitespace(fn, lines):
97    """Check for whitespace and line length issues."""
98    for lno, line in enumerate(lines):
99        if '\r' in line:
100            yield lno+1, '\\r in line'
101        if '\t' in line:
102            yield lno+1, 'OMG TABS!!!1'
103        if line[:-1].rstrip(' \t') != line[:-1]:
104            yield lno+1, 'trailing whitespace'
105
106
107@checker('.rst', severity=0)
108def check_line_length(fn, lines):
109    """Check for line length; this checker is not run by default."""
110    for lno, line in enumerate(lines):
111        if len(line) > 81:
112            # don't complain about tables, links and function signatures
113            if line.lstrip()[0] not in '+|' and \
114               'http://' not in line and \
115               not line.lstrip().startswith(('.. function',
116                                             '.. method',
117                                             '.. cfunction')):
118                yield lno+1, "line too long"
119
120
121@checker('.html', severity=2, falsepositives=True)
122def check_leaked_markup(fn, lines):
123    """Check HTML files for leaked reST markup; this only works if
124    the HTML files have been built.
125    """
126    for lno, line in enumerate(lines):
127        if leaked_markup_re.search(line):
128            yield lno+1, 'possibly leaked markup: %r' % line
129
130
131def main(argv):
132    usage = '''\
133Usage: %s [-v] [-f] [-s sev] [-i path]* [path]
134
135Options:  -v       verbose (print all checked file names)
136          -f       enable checkers that yield many false positives
137          -s sev   only show problems with severity >= sev
138          -i path  ignore subdir or file path
139''' % argv[0]
140    try:
141        gopts, args = getopt.getopt(argv[1:], 'vfs:i:')
142    except getopt.GetoptError:
143        print(usage)
144        return 2
145
146    verbose = False
147    severity = 1
148    ignore = []
149    falsepos = False
150    for opt, val in gopts:
151        if opt == '-v':
152            verbose = True
153        elif opt == '-f':
154            falsepos = True
155        elif opt == '-s':
156            severity = int(val)
157        elif opt == '-i':
158            ignore.append(abspath(val))
159
160    if len(args) == 0:
161        path = '.'
162    elif len(args) == 1:
163        path = args[0]
164    else:
165        print(usage)
166        return 2
167
168    if not exists(path):
169        print('Error: path %s does not exist' % path)
170        return 2
171
172    count = defaultdict(int)
173
174    for root, dirs, files in os.walk(path):
175        # ignore subdirs in ignore list
176        if abspath(root) in ignore:
177            del dirs[:]
178            continue
179
180        for fn in files:
181            fn = join(root, fn)
182            if fn[:2] == './':
183                fn = fn[2:]
184
185            # ignore files in ignore list
186            if abspath(fn) in ignore:
187                continue
188
189            ext = splitext(fn)[1]
190            checkerlist = checkers.get(ext, None)
191            if not checkerlist:
192                continue
193
194            if verbose:
195                print('Checking %s...' % fn)
196
197            try:
198                with open(fn, 'r', encoding='utf-8') as f:
199                    lines = list(f)
200            except (IOError, OSError) as err:
201                print('%s: cannot open: %s' % (fn, err))
202                count[4] += 1
203                continue
204
205            for checker in checkerlist:
206                if checker.falsepositives and not falsepos:
207                    continue
208                csev = checker.severity
209                if csev >= severity:
210                    for lno, msg in checker(fn, lines):
211                        print('[%d] %s:%d: %s' % (csev, fn, lno, msg))
212                        count[csev] += 1
213    if verbose:
214        print()
215    if not count:
216        if severity > 1:
217            print('No problems with severity >= %d found.' % severity)
218        else:
219            print('No problems found.')
220    else:
221        for severity in sorted(count):
222            number = count[severity]
223            print('%d problem%s with severity %d found.' %
224                  (number, number > 1 and 's' or '', severity))
225    return int(bool(count))
226
227
228if __name__ == '__main__':
229    sys.exit(main(sys.argv))
230