1import token
2import tokenize
3from typing import List, Iterator
4
5Mark = int  # NewType('Mark', int)
6
7exact_token_types = token.EXACT_TOKEN_TYPES  # type: ignore
8
9
10def shorttok(tok: tokenize.TokenInfo) -> str:
11    return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"
12
13
14class Tokenizer:
15    """Caching wrapper for the tokenize module.
16
17    This is pretty tied to Python's syntax.
18    """
19
20    _tokens: List[tokenize.TokenInfo]
21
22    def __init__(self, tokengen: Iterator[tokenize.TokenInfo], *, verbose: bool = False):
23        self._tokengen = tokengen
24        self._tokens = []
25        self._index = 0
26        self._verbose = verbose
27        if verbose:
28            self.report(False, False)
29
30    def getnext(self) -> tokenize.TokenInfo:
31        """Return the next token and updates the index."""
32        cached = True
33        while self._index == len(self._tokens):
34            tok = next(self._tokengen)
35            if tok.type in (tokenize.NL, tokenize.COMMENT):
36                continue
37            if tok.type == token.ERRORTOKEN and tok.string.isspace():
38                continue
39            self._tokens.append(tok)
40            cached = False
41        tok = self._tokens[self._index]
42        self._index += 1
43        if self._verbose:
44            self.report(cached, False)
45        return tok
46
47    def peek(self) -> tokenize.TokenInfo:
48        """Return the next token *without* updating the index."""
49        while self._index == len(self._tokens):
50            tok = next(self._tokengen)
51            if tok.type in (tokenize.NL, tokenize.COMMENT):
52                continue
53            if tok.type == token.ERRORTOKEN and tok.string.isspace():
54                continue
55            self._tokens.append(tok)
56        return self._tokens[self._index]
57
58    def diagnose(self) -> tokenize.TokenInfo:
59        if not self._tokens:
60            self.getnext()
61        return self._tokens[-1]
62
63    def mark(self) -> Mark:
64        return self._index
65
66    def reset(self, index: Mark) -> None:
67        if index == self._index:
68            return
69        assert 0 <= index <= len(self._tokens), (index, len(self._tokens))
70        old_index = self._index
71        self._index = index
72        if self._verbose:
73            self.report(True, index < old_index)
74
75    def report(self, cached: bool, back: bool) -> None:
76        if back:
77            fill = "-" * self._index + "-"
78        elif cached:
79            fill = "-" * self._index + ">"
80        else:
81            fill = "-" * self._index + "*"
82        if self._index == 0:
83            print(f"{fill} (Bof)")
84        else:
85            tok = self._tokens[self._index - 1]
86            print(f"{fill} {shorttok(tok)}")
87