1"""Conversion pipeline templates.
2
3The problem:
4------------
5
6Suppose you have some data that you want to convert to another format,
7such as from GIF image format to PPM image format.  Maybe the
8conversion involves several steps (e.g. piping it through compress or
9uuencode).  Some of the conversion steps may require that their input
10is a disk file, others may be able to read standard input; similar for
11their output.  The input to the entire conversion may also be read
12from a disk file or from an open file, and similar for its output.
13
14The module lets you construct a pipeline template by sticking one or
15more conversion steps together.  It will take care of creating and
16removing temporary files if they are necessary to hold intermediate
17data.  You can then use the template to do conversions from many
18different sources to many different destinations.  The temporary
19file names used are different each time the template is used.
20
21The templates are objects so you can create templates for many
22different conversion steps and store them in a dictionary, for
23instance.
24
25
26Directions:
27-----------
28
29To create a template:
30    t = Template()
31
32To add a conversion step to a template:
33   t.append(command, kind)
34where kind is a string of two characters: the first is '-' if the
35command reads its standard input or 'f' if it requires a file; the
36second likewise for the output. The command must be valid /bin/sh
37syntax.  If input or output files are required, they are passed as
38$IN and $OUT; otherwise, it must be  possible to use the command in
39a pipeline.
40
41To add a conversion step at the beginning:
42   t.prepend(command, kind)
43
44To convert a file to another file using a template:
45  sts = t.copy(infile, outfile)
46If infile or outfile are the empty string, standard input is read or
47standard output is written, respectively.  The return value is the
48exit status of the conversion pipeline.
49
50To open a file for reading or writing through a conversion pipeline:
51   fp = t.open(file, mode)
52where mode is 'r' to read the file, or 'w' to write it -- just like
53for the built-in function open() or for os.popen().
54
55To create a new template object initialized to a given one:
56   t2 = t.clone()
57"""                                     # '
58
59
60import re
61import os
62import tempfile
63import string
64
65__all__ = ["Template"]
66
67# Conversion step kinds
68
69FILEIN_FILEOUT = 'ff'                   # Must read & write real files
70STDIN_FILEOUT  = '-f'                   # Must write a real file
71FILEIN_STDOUT  = 'f-'                   # Must read a real file
72STDIN_STDOUT   = '--'                   # Normal pipeline element
73SOURCE         = '.-'                   # Must be first, writes stdout
74SINK           = '-.'                   # Must be last, reads stdin
75
76stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \
77             SOURCE, SINK]
78
79
80class Template:
81    """Class representing a pipeline template."""
82
83    def __init__(self):
84        """Template() returns a fresh pipeline template."""
85        self.debugging = 0
86        self.reset()
87
88    def __repr__(self):
89        """t.__repr__() implements repr(t)."""
90        return '<Template instance, steps=%r>' % (self.steps,)
91
92    def reset(self):
93        """t.reset() restores a pipeline template to its initial state."""
94        self.steps = []
95
96    def clone(self):
97        """t.clone() returns a new pipeline template with identical
98        initial state as the current one."""
99        t = Template()
100        t.steps = self.steps[:]
101        t.debugging = self.debugging
102        return t
103
104    def debug(self, flag):
105        """t.debug(flag) turns debugging on or off."""
106        self.debugging = flag
107
108    def append(self, cmd, kind):
109        """t.append(cmd, kind) adds a new step at the end."""
110        if type(cmd) is not type(''):
111            raise TypeError, \
112                  'Template.append: cmd must be a string'
113        if kind not in stepkinds:
114            raise ValueError, \
115                  'Template.append: bad kind %r' % (kind,)
116        if kind == SOURCE:
117            raise ValueError, \
118                  'Template.append: SOURCE can only be prepended'
119        if self.steps and self.steps[-1][1] == SINK:
120            raise ValueError, \
121                  'Template.append: already ends with SINK'
122        if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
123            raise ValueError, \
124                  'Template.append: missing $IN in cmd'
125        if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
126            raise ValueError, \
127                  'Template.append: missing $OUT in cmd'
128        self.steps.append((cmd, kind))
129
130    def prepend(self, cmd, kind):
131        """t.prepend(cmd, kind) adds a new step at the front."""
132        if type(cmd) is not type(''):
133            raise TypeError, \
134                  'Template.prepend: cmd must be a string'
135        if kind not in stepkinds:
136            raise ValueError, \
137                  'Template.prepend: bad kind %r' % (kind,)
138        if kind == SINK:
139            raise ValueError, \
140                  'Template.prepend: SINK can only be appended'
141        if self.steps and self.steps[0][1] == SOURCE:
142            raise ValueError, \
143                  'Template.prepend: already begins with SOURCE'
144        if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
145            raise ValueError, \
146                  'Template.prepend: missing $IN in cmd'
147        if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
148            raise ValueError, \
149                  'Template.prepend: missing $OUT in cmd'
150        self.steps.insert(0, (cmd, kind))
151
152    def open(self, file, rw):
153        """t.open(file, rw) returns a pipe or file object open for
154        reading or writing; the file is the other end of the pipeline."""
155        if rw == 'r':
156            return self.open_r(file)
157        if rw == 'w':
158            return self.open_w(file)
159        raise ValueError, \
160              'Template.open: rw must be \'r\' or \'w\', not %r' % (rw,)
161
162    def open_r(self, file):
163        """t.open_r(file) and t.open_w(file) implement
164        t.open(file, 'r') and t.open(file, 'w') respectively."""
165        if not self.steps:
166            return open(file, 'r')
167        if self.steps[-1][1] == SINK:
168            raise ValueError, \
169                  'Template.open_r: pipeline ends width SINK'
170        cmd = self.makepipeline(file, '')
171        return os.popen(cmd, 'r')
172
173    def open_w(self, file):
174        if not self.steps:
175            return open(file, 'w')
176        if self.steps[0][1] == SOURCE:
177            raise ValueError, \
178                  'Template.open_w: pipeline begins with SOURCE'
179        cmd = self.makepipeline('', file)
180        return os.popen(cmd, 'w')
181
182    def copy(self, infile, outfile):
183        return os.system(self.makepipeline(infile, outfile))
184
185    def makepipeline(self, infile, outfile):
186        cmd = makepipeline(infile, self.steps, outfile)
187        if self.debugging:
188            print cmd
189            cmd = 'set -x; ' + cmd
190        return cmd
191
192
193def makepipeline(infile, steps, outfile):
194    # Build a list with for each command:
195    # [input filename or '', command string, kind, output filename or '']
196
197    list = []
198    for cmd, kind in steps:
199        list.append(['', cmd, kind, ''])
200    #
201    # Make sure there is at least one step
202    #
203    if not list:
204        list.append(['', 'cat', '--', ''])
205    #
206    # Take care of the input and output ends
207    #
208    [cmd, kind] = list[0][1:3]
209    if kind[0] == 'f' and not infile:
210        list.insert(0, ['', 'cat', '--', ''])
211    list[0][0] = infile
212    #
213    [cmd, kind] = list[-1][1:3]
214    if kind[1] == 'f' and not outfile:
215        list.append(['', 'cat', '--', ''])
216    list[-1][-1] = outfile
217    #
218    # Invent temporary files to connect stages that need files
219    #
220    garbage = []
221    for i in range(1, len(list)):
222        lkind = list[i-1][2]
223        rkind = list[i][2]
224        if lkind[1] == 'f' or rkind[0] == 'f':
225            (fd, temp) = tempfile.mkstemp()
226            os.close(fd)
227            garbage.append(temp)
228            list[i-1][-1] = list[i][0] = temp
229    #
230    for item in list:
231        [inf, cmd, kind, outf] = item
232        if kind[1] == 'f':
233            cmd = 'OUT=' + quote(outf) + '; ' + cmd
234        if kind[0] == 'f':
235            cmd = 'IN=' + quote(inf) + '; ' + cmd
236        if kind[0] == '-' and inf:
237            cmd = cmd + ' <' + quote(inf)
238        if kind[1] == '-' and outf:
239            cmd = cmd + ' >' + quote(outf)
240        item[1] = cmd
241    #
242    cmdlist = list[0][1]
243    for item in list[1:]:
244        [cmd, kind] = item[1:3]
245        if item[0] == '':
246            if 'f' in kind:
247                cmd = '{ ' + cmd + '; }'
248            cmdlist = cmdlist + ' |\n' + cmd
249        else:
250            cmdlist = cmdlist + '\n' + cmd
251    #
252    if garbage:
253        rmcmd = 'rm -f'
254        for file in garbage:
255            rmcmd = rmcmd + ' ' + quote(file)
256        trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15'
257        cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd
258    #
259    return cmdlist
260
261
262# Reliably quote a string as a single argument for /bin/sh
263
264# Safe unquoted
265_safechars = frozenset(string.ascii_letters + string.digits + '@%_-+=:,./')
266
267def quote(file):
268    """Return a shell-escaped version of the file string."""
269    for c in file:
270        if c not in _safechars:
271            break
272    else:
273        if not file:
274            return "''"
275        return file
276    # use single quotes, and put single quotes into double quotes
277    # the string $'b is then quoted as '$'"'"'b'
278    return "'" + file.replace("'", "'\"'\"'") + "'"
279