1"""text_file
2
3provides the TextFile class, which gives an interface to text files
4that (optionally) takes care of stripping comments, ignoring blank
5lines, and joining lines with backslashes."""
6
7import sys, io
8
9
10class TextFile:
11    """Provides a file-like object that takes care of all the things you
12       commonly want to do when processing a text file that has some
13       line-by-line syntax: strip comments (as long as "#" is your
14       comment character), skip blank lines, join adjacent lines by
15       escaping the newline (ie. backslash at end of line), strip
16       leading and/or trailing whitespace.  All of these are optional
17       and independently controllable.
18
19       Provides a 'warn()' method so you can generate warning messages that
20       report physical line number, even if the logical line in question
21       spans multiple physical lines.  Also provides 'unreadline()' for
22       implementing line-at-a-time lookahead.
23
24       Constructor is called as:
25
26           TextFile (filename=None, file=None, **options)
27
28       It bombs (RuntimeError) if both 'filename' and 'file' are None;
29       'filename' should be a string, and 'file' a file object (or
30       something that provides 'readline()' and 'close()' methods).  It is
31       recommended that you supply at least 'filename', so that TextFile
32       can include it in warning messages.  If 'file' is not supplied,
33       TextFile creates its own using 'io.open()'.
34
35       The options are all boolean, and affect the value returned by
36       'readline()':
37         strip_comments [default: true]
38           strip from "#" to end-of-line, as well as any whitespace
39           leading up to the "#" -- unless it is escaped by a backslash
40         lstrip_ws [default: false]
41           strip leading whitespace from each line before returning it
42         rstrip_ws [default: true]
43           strip trailing whitespace (including line terminator!) from
44           each line before returning it
45         skip_blanks [default: true}
46           skip lines that are empty *after* stripping comments and
47           whitespace.  (If both lstrip_ws and rstrip_ws are false,
48           then some lines may consist of solely whitespace: these will
49           *not* be skipped, even if 'skip_blanks' is true.)
50         join_lines [default: false]
51           if a backslash is the last non-newline character on a line
52           after stripping comments and whitespace, join the following line
53           to it to form one "logical line"; if N consecutive lines end
54           with a backslash, then N+1 physical lines will be joined to
55           form one logical line.
56         collapse_join [default: false]
57           strip leading whitespace from lines that are joined to their
58           predecessor; only matters if (join_lines and not lstrip_ws)
59         errors [default: 'strict']
60           error handler used to decode the file content
61
62       Note that since 'rstrip_ws' can strip the trailing newline, the
63       semantics of 'readline()' must differ from those of the builtin file
64       object's 'readline()' method!  In particular, 'readline()' returns
65       None for end-of-file: an empty string might just be a blank line (or
66       an all-whitespace line), if 'rstrip_ws' is true but 'skip_blanks' is
67       not."""
68
69    default_options = { 'strip_comments': 1,
70                        'skip_blanks':    1,
71                        'lstrip_ws':      0,
72                        'rstrip_ws':      1,
73                        'join_lines':     0,
74                        'collapse_join':  0,
75                        'errors':         'strict',
76                      }
77
78    def __init__(self, filename=None, file=None, **options):
79        """Construct a new TextFile object.  At least one of 'filename'
80           (a string) and 'file' (a file-like object) must be supplied.
81           They keyword argument options are described above and affect
82           the values returned by 'readline()'."""
83        if filename is None and file is None:
84            raise RuntimeError("you must supply either or both of 'filename' and 'file'")
85
86        # set values for all options -- either from client option hash
87        # or fallback to default_options
88        for opt in self.default_options.keys():
89            if opt in options:
90                setattr(self, opt, options[opt])
91            else:
92                setattr(self, opt, self.default_options[opt])
93
94        # sanity check client option hash
95        for opt in options.keys():
96            if opt not in self.default_options:
97                raise KeyError("invalid TextFile option '%s'" % opt)
98
99        if file is None:
100            self.open(filename)
101        else:
102            self.filename = filename
103            self.file = file
104            self.current_line = 0       # assuming that file is at BOF!
105
106        # 'linebuf' is a stack of lines that will be emptied before we
107        # actually read from the file; it's only populated by an
108        # 'unreadline()' operation
109        self.linebuf = []
110
111    def open(self, filename):
112        """Open a new file named 'filename'.  This overrides both the
113           'filename' and 'file' arguments to the constructor."""
114        self.filename = filename
115        self.file = io.open(self.filename, 'r', errors=self.errors)
116        self.current_line = 0
117
118    def close(self):
119        """Close the current file and forget everything we know about it
120           (filename, current line number)."""
121        file = self.file
122        self.file = None
123        self.filename = None
124        self.current_line = None
125        file.close()
126
127    def gen_error(self, msg, line=None):
128        outmsg = []
129        if line is None:
130            line = self.current_line
131        outmsg.append(self.filename + ", ")
132        if isinstance(line, (list, tuple)):
133            outmsg.append("lines %d-%d: " % tuple(line))
134        else:
135            outmsg.append("line %d: " % line)
136        outmsg.append(str(msg))
137        return "".join(outmsg)
138
139    def error(self, msg, line=None):
140        raise ValueError("error: " + self.gen_error(msg, line))
141
142    def warn(self, msg, line=None):
143        """Print (to stderr) a warning message tied to the current logical
144           line in the current file.  If the current logical line in the
145           file spans multiple physical lines, the warning refers to the
146           whole range, eg. "lines 3-5".  If 'line' supplied, it overrides
147           the current line number; it may be a list or tuple to indicate a
148           range of physical lines, or an integer for a single physical
149           line."""
150        sys.stderr.write("warning: " + self.gen_error(msg, line) + "\n")
151
152    def readline(self):
153        """Read and return a single logical line from the current file (or
154           from an internal buffer if lines have previously been "unread"
155           with 'unreadline()').  If the 'join_lines' option is true, this
156           may involve reading multiple physical lines concatenated into a
157           single string.  Updates the current line number, so calling
158           'warn()' after 'readline()' emits a warning about the physical
159           line(s) just read.  Returns None on end-of-file, since the empty
160           string can occur if 'rstrip_ws' is true but 'strip_blanks' is
161           not."""
162        # If any "unread" lines waiting in 'linebuf', return the top
163        # one.  (We don't actually buffer read-ahead data -- lines only
164        # get put in 'linebuf' if the client explicitly does an
165        # 'unreadline()'.
166        if self.linebuf:
167            line = self.linebuf[-1]
168            del self.linebuf[-1]
169            return line
170
171        buildup_line = ''
172
173        while True:
174            # read the line, make it None if EOF
175            line = self.file.readline()
176            if line == '':
177                line = None
178
179            if self.strip_comments and line:
180
181                # Look for the first "#" in the line.  If none, never
182                # mind.  If we find one and it's the first character, or
183                # is not preceded by "\", then it starts a comment --
184                # strip the comment, strip whitespace before it, and
185                # carry on.  Otherwise, it's just an escaped "#", so
186                # unescape it (and any other escaped "#"'s that might be
187                # lurking in there) and otherwise leave the line alone.
188
189                pos = line.find("#")
190                if pos == -1: # no "#" -- no comments
191                    pass
192
193                # It's definitely a comment -- either "#" is the first
194                # character, or it's elsewhere and unescaped.
195                elif pos == 0 or line[pos-1] != "\\":
196                    # Have to preserve the trailing newline, because it's
197                    # the job of a later step (rstrip_ws) to remove it --
198                    # and if rstrip_ws is false, we'd better preserve it!
199                    # (NB. this means that if the final line is all comment
200                    # and has no trailing newline, we will think that it's
201                    # EOF; I think that's OK.)
202                    eol = (line[-1] == '\n') and '\n' or ''
203                    line = line[0:pos] + eol
204
205                    # If all that's left is whitespace, then skip line
206                    # *now*, before we try to join it to 'buildup_line' --
207                    # that way constructs like
208                    #   hello \\
209                    #   # comment that should be ignored
210                    #   there
211                    # result in "hello there".
212                    if line.strip() == "":
213                        continue
214                else: # it's an escaped "#"
215                    line = line.replace("\\#", "#")
216
217            # did previous line end with a backslash? then accumulate
218            if self.join_lines and buildup_line:
219                # oops: end of file
220                if line is None:
221                    self.warn("continuation line immediately precedes "
222                              "end-of-file")
223                    return buildup_line
224
225                if self.collapse_join:
226                    line = line.lstrip()
227                line = buildup_line + line
228
229                # careful: pay attention to line number when incrementing it
230                if isinstance(self.current_line, list):
231                    self.current_line[1] = self.current_line[1] + 1
232                else:
233                    self.current_line = [self.current_line,
234                                         self.current_line + 1]
235            # just an ordinary line, read it as usual
236            else:
237                if line is None: # eof
238                    return None
239
240                # still have to be careful about incrementing the line number!
241                if isinstance(self.current_line, list):
242                    self.current_line = self.current_line[1] + 1
243                else:
244                    self.current_line = self.current_line + 1
245
246            # strip whitespace however the client wants (leading and
247            # trailing, or one or the other, or neither)
248            if self.lstrip_ws and self.rstrip_ws:
249                line = line.strip()
250            elif self.lstrip_ws:
251                line = line.lstrip()
252            elif self.rstrip_ws:
253                line = line.rstrip()
254
255            # blank line (whether we rstrip'ed or not)? skip to next line
256            # if appropriate
257            if (line == '' or line == '\n') and self.skip_blanks:
258                continue
259
260            if self.join_lines:
261                if line[-1] == '\\':
262                    buildup_line = line[:-1]
263                    continue
264
265                if line[-2:] == '\\\n':
266                    buildup_line = line[0:-2] + '\n'
267                    continue
268
269            # well, I guess there's some actual content there: return it
270            return line
271
272    def readlines(self):
273        """Read and return the list of all logical lines remaining in the
274           current file."""
275        lines = []
276        while True:
277            line = self.readline()
278            if line is None:
279                return lines
280            lines.append(line)
281
282    def unreadline(self, line):
283        """Push 'line' (a string) onto an internal buffer that will be
284           checked by future 'readline()' calls.  Handy for implementing
285           a parser with line-at-a-time lookahead."""
286        self.linebuf.append(line)
287