1"""Conversion pipeline templates. 2 3The problem: 4------------ 5 6Suppose you have some data that you want to convert to another format, 7such as from GIF image format to PPM image format. Maybe the 8conversion involves several steps (e.g. piping it through compress or 9uuencode). Some of the conversion steps may require that their input 10is a disk file, others may be able to read standard input; similar for 11their output. The input to the entire conversion may also be read 12from a disk file or from an open file, and similar for its output. 13 14The module lets you construct a pipeline template by sticking one or 15more conversion steps together. It will take care of creating and 16removing temporary files if they are necessary to hold intermediate 17data. You can then use the template to do conversions from many 18different sources to many different destinations. The temporary 19file names used are different each time the template is used. 20 21The templates are objects so you can create templates for many 22different conversion steps and store them in a dictionary, for 23instance. 24 25 26Directions: 27----------- 28 29To create a template: 30 t = Template() 31 32To add a conversion step to a template: 33 t.append(command, kind) 34where kind is a string of two characters: the first is '-' if the 35command reads its standard input or 'f' if it requires a file; the 36second likewise for the output. The command must be valid /bin/sh 37syntax. If input or output files are required, they are passed as 38$IN and $OUT; otherwise, it must be possible to use the command in 39a pipeline. 40 41To add a conversion step at the beginning: 42 t.prepend(command, kind) 43 44To convert a file to another file using a template: 45 sts = t.copy(infile, outfile) 46If infile or outfile are the empty string, standard input is read or 47standard output is written, respectively. The return value is the 48exit status of the conversion pipeline. 49 50To open a file for reading or writing through a conversion pipeline: 51 fp = t.open(file, mode) 52where mode is 'r' to read the file, or 'w' to write it -- just like 53for the built-in function open() or for os.popen(). 54 55To create a new template object initialized to a given one: 56 t2 = t.clone() 57 58For an example, see the function test() at the end of the file. 59""" # ' 60 61 62import re 63import os 64import tempfile 65import string 66 67__all__ = ["Template"] 68 69# Conversion step kinds 70 71FILEIN_FILEOUT = 'ff' # Must read & write real files 72STDIN_FILEOUT = '-f' # Must write a real file 73FILEIN_STDOUT = 'f-' # Must read a real file 74STDIN_STDOUT = '--' # Normal pipeline element 75SOURCE = '.-' # Must be first, writes stdout 76SINK = '-.' # Must be last, reads stdin 77 78stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \ 79 SOURCE, SINK] 80 81 82class Template: 83 """Class representing a pipeline template.""" 84 85 def __init__(self): 86 """Template() returns a fresh pipeline template.""" 87 self.debugging = 0 88 self.reset() 89 90 def __repr__(self): 91 """t.__repr__() implements repr(t).""" 92 return '<Template instance, steps=%r>' % (self.steps,) 93 94 def reset(self): 95 """t.reset() restores a pipeline template to its initial state.""" 96 self.steps = [] 97 98 def clone(self): 99 """t.clone() returns a new pipeline template with identical 100 initial state as the current one.""" 101 t = Template() 102 t.steps = self.steps[:] 103 t.debugging = self.debugging 104 return t 105 106 def debug(self, flag): 107 """t.debug(flag) turns debugging on or off.""" 108 self.debugging = flag 109 110 def append(self, cmd, kind): 111 """t.append(cmd, kind) adds a new step at the end.""" 112 if type(cmd) is not type(''): 113 raise TypeError, \ 114 'Template.append: cmd must be a string' 115 if kind not in stepkinds: 116 raise ValueError, \ 117 'Template.append: bad kind %r' % (kind,) 118 if kind == SOURCE: 119 raise ValueError, \ 120 'Template.append: SOURCE can only be prepended' 121 if self.steps and self.steps[-1][1] == SINK: 122 raise ValueError, \ 123 'Template.append: already ends with SINK' 124 if kind[0] == 'f' and not re.search(r'\$IN\b', cmd): 125 raise ValueError, \ 126 'Template.append: missing $IN in cmd' 127 if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd): 128 raise ValueError, \ 129 'Template.append: missing $OUT in cmd' 130 self.steps.append((cmd, kind)) 131 132 def prepend(self, cmd, kind): 133 """t.prepend(cmd, kind) adds a new step at the front.""" 134 if type(cmd) is not type(''): 135 raise TypeError, \ 136 'Template.prepend: cmd must be a string' 137 if kind not in stepkinds: 138 raise ValueError, \ 139 'Template.prepend: bad kind %r' % (kind,) 140 if kind == SINK: 141 raise ValueError, \ 142 'Template.prepend: SINK can only be appended' 143 if self.steps and self.steps[0][1] == SOURCE: 144 raise ValueError, \ 145 'Template.prepend: already begins with SOURCE' 146 if kind[0] == 'f' and not re.search(r'\$IN\b', cmd): 147 raise ValueError, \ 148 'Template.prepend: missing $IN in cmd' 149 if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd): 150 raise ValueError, \ 151 'Template.prepend: missing $OUT in cmd' 152 self.steps.insert(0, (cmd, kind)) 153 154 def open(self, file, rw): 155 """t.open(file, rw) returns a pipe or file object open for 156 reading or writing; the file is the other end of the pipeline.""" 157 if rw == 'r': 158 return self.open_r(file) 159 if rw == 'w': 160 return self.open_w(file) 161 raise ValueError, \ 162 'Template.open: rw must be \'r\' or \'w\', not %r' % (rw,) 163 164 def open_r(self, file): 165 """t.open_r(file) and t.open_w(file) implement 166 t.open(file, 'r') and t.open(file, 'w') respectively.""" 167 if not self.steps: 168 return open(file, 'r') 169 if self.steps[-1][1] == SINK: 170 raise ValueError, \ 171 'Template.open_r: pipeline ends width SINK' 172 cmd = self.makepipeline(file, '') 173 return os.popen(cmd, 'r') 174 175 def open_w(self, file): 176 if not self.steps: 177 return open(file, 'w') 178 if self.steps[0][1] == SOURCE: 179 raise ValueError, \ 180 'Template.open_w: pipeline begins with SOURCE' 181 cmd = self.makepipeline('', file) 182 return os.popen(cmd, 'w') 183 184 def copy(self, infile, outfile): 185 return os.system(self.makepipeline(infile, outfile)) 186 187 def makepipeline(self, infile, outfile): 188 cmd = makepipeline(infile, self.steps, outfile) 189 if self.debugging: 190 print cmd 191 cmd = 'set -x; ' + cmd 192 return cmd 193 194 195def makepipeline(infile, steps, outfile): 196 # Build a list with for each command: 197 # [input filename or '', command string, kind, output filename or ''] 198 199 list = [] 200 for cmd, kind in steps: 201 list.append(['', cmd, kind, '']) 202 # 203 # Make sure there is at least one step 204 # 205 if not list: 206 list.append(['', 'cat', '--', '']) 207 # 208 # Take care of the input and output ends 209 # 210 [cmd, kind] = list[0][1:3] 211 if kind[0] == 'f' and not infile: 212 list.insert(0, ['', 'cat', '--', '']) 213 list[0][0] = infile 214 # 215 [cmd, kind] = list[-1][1:3] 216 if kind[1] == 'f' and not outfile: 217 list.append(['', 'cat', '--', '']) 218 list[-1][-1] = outfile 219 # 220 # Invent temporary files to connect stages that need files 221 # 222 garbage = [] 223 for i in range(1, len(list)): 224 lkind = list[i-1][2] 225 rkind = list[i][2] 226 if lkind[1] == 'f' or rkind[0] == 'f': 227 (fd, temp) = tempfile.mkstemp() 228 os.close(fd) 229 garbage.append(temp) 230 list[i-1][-1] = list[i][0] = temp 231 # 232 for item in list: 233 [inf, cmd, kind, outf] = item 234 if kind[1] == 'f': 235 cmd = 'OUT=' + quote(outf) + '; ' + cmd 236 if kind[0] == 'f': 237 cmd = 'IN=' + quote(inf) + '; ' + cmd 238 if kind[0] == '-' and inf: 239 cmd = cmd + ' <' + quote(inf) 240 if kind[1] == '-' and outf: 241 cmd = cmd + ' >' + quote(outf) 242 item[1] = cmd 243 # 244 cmdlist = list[0][1] 245 for item in list[1:]: 246 [cmd, kind] = item[1:3] 247 if item[0] == '': 248 if 'f' in kind: 249 cmd = '{ ' + cmd + '; }' 250 cmdlist = cmdlist + ' |\n' + cmd 251 else: 252 cmdlist = cmdlist + '\n' + cmd 253 # 254 if garbage: 255 rmcmd = 'rm -f' 256 for file in garbage: 257 rmcmd = rmcmd + ' ' + quote(file) 258 trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15' 259 cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd 260 # 261 return cmdlist 262 263 264# Reliably quote a string as a single argument for /bin/sh 265 266# Safe unquoted 267_safechars = frozenset(string.ascii_letters + string.digits + '@%_-+=:,./') 268 269def quote(file): 270 """Return a shell-escaped version of the file string.""" 271 for c in file: 272 if c not in _safechars: 273 break 274 else: 275 if not file: 276 return "''" 277 return file 278 # use single quotes, and put single quotes into double quotes 279 # the string $'b is then quoted as '$'"'"'b' 280 return "'" + file.replace("'", "'\"'\"'") + "'" 281