1#! /usr/bin/env python3 2 3# This file contains a class and a main program that perform three 4# related (though complimentary) formatting operations on Python 5# programs. When called as "pindent -c", it takes a valid Python 6# program as input and outputs a version augmented with block-closing 7# comments. When called as "pindent -d", it assumes its input is a 8# Python program with block-closing comments and outputs a commentless 9# version. When called as "pindent -r" it assumes its input is a 10# Python program with block-closing comments but with its indentation 11# messed up, and outputs a properly indented version. 12 13# A "block-closing comment" is a comment of the form '# end <keyword>' 14# where <keyword> is the keyword that opened the block. If the 15# opening keyword is 'def' or 'class', the function or class name may 16# be repeated in the block-closing comment as well. Here is an 17# example of a program fully augmented with block-closing comments: 18 19# def foobar(a, b): 20# if a == b: 21# a = a+1 22# elif a < b: 23# b = b-1 24# if b > a: a = a-1 25# # end if 26# else: 27# print 'oops!' 28# # end if 29# # end def foobar 30 31# Note that only the last part of an if...elif...else... block needs a 32# block-closing comment; the same is true for other compound 33# statements (e.g. try...except). Also note that "short-form" blocks 34# like the second 'if' in the example must be closed as well; 35# otherwise the 'else' in the example would be ambiguous (remember 36# that indentation is not significant when interpreting block-closing 37# comments). 38 39# The operations are idempotent (i.e. applied to their own output 40# they yield an identical result). Running first "pindent -c" and 41# then "pindent -r" on a valid Python program produces a program that 42# is semantically identical to the input (though its indentation may 43# be different). Running "pindent -e" on that output produces a 44# program that only differs from the original in indentation. 45 46# Other options: 47# -s stepsize: set the indentation step size (default 8) 48# -t tabsize : set the number of spaces a tab character is worth (default 8) 49# -e : expand TABs into spaces 50# file ... : input file(s) (default standard input) 51# The results always go to standard output 52 53# Caveats: 54# - comments ending in a backslash will be mistaken for continued lines 55# - continuations using backslash are always left unchanged 56# - continuations inside parentheses are not extra indented by -r 57# but must be indented for -c to work correctly (this breaks 58# idempotency!) 59# - continued lines inside triple-quoted strings are totally garbled 60 61# Secret feature: 62# - On input, a block may also be closed with an "end statement" -- 63# this is a block-closing comment without the '#' sign. 64 65# Possible improvements: 66# - check syntax based on transitions in 'next' table 67# - better error reporting 68# - better error recovery 69# - check identifier after class/def 70 71# The following wishes need a more complete tokenization of the source: 72# - Don't get fooled by comments ending in backslash 73# - reindent continuation lines indicated by backslash 74# - handle continuation lines inside parentheses/braces/brackets 75# - handle triple quoted strings spanning lines 76# - realign comments 77# - optionally do much more thorough reformatting, a la C indent 78 79# Defaults 80STEPSIZE = 8 81TABSIZE = 8 82EXPANDTABS = False 83 84import io 85import re 86import sys 87 88next = {} 89next['if'] = next['elif'] = 'elif', 'else', 'end' 90next['while'] = next['for'] = 'else', 'end' 91next['try'] = 'except', 'finally' 92next['except'] = 'except', 'else', 'finally', 'end' 93next['else'] = next['finally'] = next['with'] = \ 94 next['def'] = next['class'] = 'end' 95next['end'] = () 96start = 'if', 'while', 'for', 'try', 'with', 'def', 'class' 97 98class PythonIndenter: 99 100 def __init__(self, fpi = sys.stdin, fpo = sys.stdout, 101 indentsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 102 self.fpi = fpi 103 self.fpo = fpo 104 self.indentsize = indentsize 105 self.tabsize = tabsize 106 self.lineno = 0 107 self.expandtabs = expandtabs 108 self._write = fpo.write 109 self.kwprog = re.compile( 110 r'^(?:\s|\\\n)*(?P<kw>[a-z]+)' 111 r'((?:\s|\\\n)+(?P<id>[a-zA-Z_]\w*))?' 112 r'[^\w]') 113 self.endprog = re.compile( 114 r'^(?:\s|\\\n)*#?\s*end\s+(?P<kw>[a-z]+)' 115 r'(\s+(?P<id>[a-zA-Z_]\w*))?' 116 r'[^\w]') 117 self.wsprog = re.compile(r'^[ \t]*') 118 # end def __init__ 119 120 def write(self, line): 121 if self.expandtabs: 122 self._write(line.expandtabs(self.tabsize)) 123 else: 124 self._write(line) 125 # end if 126 # end def write 127 128 def readline(self): 129 line = self.fpi.readline() 130 if line: self.lineno += 1 131 # end if 132 return line 133 # end def readline 134 135 def error(self, fmt, *args): 136 if args: fmt = fmt % args 137 # end if 138 sys.stderr.write('Error at line %d: %s\n' % (self.lineno, fmt)) 139 self.write('### %s ###\n' % fmt) 140 # end def error 141 142 def getline(self): 143 line = self.readline() 144 while line[-2:] == '\\\n': 145 line2 = self.readline() 146 if not line2: break 147 # end if 148 line += line2 149 # end while 150 return line 151 # end def getline 152 153 def putline(self, line, indent): 154 tabs, spaces = divmod(indent*self.indentsize, self.tabsize) 155 i = self.wsprog.match(line).end() 156 line = line[i:] 157 if line[:1] not in ('\n', '\r', ''): 158 line = '\t'*tabs + ' '*spaces + line 159 # end if 160 self.write(line) 161 # end def putline 162 163 def reformat(self): 164 stack = [] 165 while True: 166 line = self.getline() 167 if not line: break # EOF 168 # end if 169 m = self.endprog.match(line) 170 if m: 171 kw = 'end' 172 kw2 = m.group('kw') 173 if not stack: 174 self.error('unexpected end') 175 elif stack.pop()[0] != kw2: 176 self.error('unmatched end') 177 # end if 178 self.putline(line, len(stack)) 179 continue 180 # end if 181 m = self.kwprog.match(line) 182 if m: 183 kw = m.group('kw') 184 if kw in start: 185 self.putline(line, len(stack)) 186 stack.append((kw, kw)) 187 continue 188 # end if 189 if kw in next and stack: 190 self.putline(line, len(stack)-1) 191 kwa, kwb = stack[-1] 192 stack[-1] = kwa, kw 193 continue 194 # end if 195 # end if 196 self.putline(line, len(stack)) 197 # end while 198 if stack: 199 self.error('unterminated keywords') 200 for kwa, kwb in stack: 201 self.write('\t%s\n' % kwa) 202 # end for 203 # end if 204 # end def reformat 205 206 def delete(self): 207 begin_counter = 0 208 end_counter = 0 209 while True: 210 line = self.getline() 211 if not line: break # EOF 212 # end if 213 m = self.endprog.match(line) 214 if m: 215 end_counter += 1 216 continue 217 # end if 218 m = self.kwprog.match(line) 219 if m: 220 kw = m.group('kw') 221 if kw in start: 222 begin_counter += 1 223 # end if 224 # end if 225 self.write(line) 226 # end while 227 if begin_counter - end_counter < 0: 228 sys.stderr.write('Warning: input contained more end tags than expected\n') 229 elif begin_counter - end_counter > 0: 230 sys.stderr.write('Warning: input contained less end tags than expected\n') 231 # end if 232 # end def delete 233 234 def complete(self): 235 stack = [] 236 todo = [] 237 currentws = thisid = firstkw = lastkw = topid = '' 238 while True: 239 line = self.getline() 240 i = self.wsprog.match(line).end() 241 m = self.endprog.match(line) 242 if m: 243 thiskw = 'end' 244 endkw = m.group('kw') 245 thisid = m.group('id') 246 else: 247 m = self.kwprog.match(line) 248 if m: 249 thiskw = m.group('kw') 250 if thiskw not in next: 251 thiskw = '' 252 # end if 253 if thiskw in ('def', 'class'): 254 thisid = m.group('id') 255 else: 256 thisid = '' 257 # end if 258 elif line[i:i+1] in ('\n', '#'): 259 todo.append(line) 260 continue 261 else: 262 thiskw = '' 263 # end if 264 # end if 265 indentws = line[:i] 266 indent = len(indentws.expandtabs(self.tabsize)) 267 current = len(currentws.expandtabs(self.tabsize)) 268 while indent < current: 269 if firstkw: 270 if topid: 271 s = '# end %s %s\n' % ( 272 firstkw, topid) 273 else: 274 s = '# end %s\n' % firstkw 275 # end if 276 self.write(currentws + s) 277 firstkw = lastkw = '' 278 # end if 279 currentws, firstkw, lastkw, topid = stack.pop() 280 current = len(currentws.expandtabs(self.tabsize)) 281 # end while 282 if indent == current and firstkw: 283 if thiskw == 'end': 284 if endkw != firstkw: 285 self.error('mismatched end') 286 # end if 287 firstkw = lastkw = '' 288 elif not thiskw or thiskw in start: 289 if topid: 290 s = '# end %s %s\n' % ( 291 firstkw, topid) 292 else: 293 s = '# end %s\n' % firstkw 294 # end if 295 self.write(currentws + s) 296 firstkw = lastkw = topid = '' 297 # end if 298 # end if 299 if indent > current: 300 stack.append((currentws, firstkw, lastkw, topid)) 301 if thiskw and thiskw not in start: 302 # error 303 thiskw = '' 304 # end if 305 currentws, firstkw, lastkw, topid = \ 306 indentws, thiskw, thiskw, thisid 307 # end if 308 if thiskw: 309 if thiskw in start: 310 firstkw = lastkw = thiskw 311 topid = thisid 312 else: 313 lastkw = thiskw 314 # end if 315 # end if 316 for l in todo: self.write(l) 317 # end for 318 todo = [] 319 if not line: break 320 # end if 321 self.write(line) 322 # end while 323 # end def complete 324# end class PythonIndenter 325 326# Simplified user interface 327# - xxx_filter(input, output): read and write file objects 328# - xxx_string(s): take and return string object 329# - xxx_file(filename): process file in place, return true iff changed 330 331def complete_filter(input = sys.stdin, output = sys.stdout, 332 stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 333 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs) 334 pi.complete() 335# end def complete_filter 336 337def delete_filter(input= sys.stdin, output = sys.stdout, 338 stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 339 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs) 340 pi.delete() 341# end def delete_filter 342 343def reformat_filter(input = sys.stdin, output = sys.stdout, 344 stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 345 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs) 346 pi.reformat() 347# end def reformat_filter 348 349def complete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 350 input = io.StringIO(source) 351 output = io.StringIO() 352 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs) 353 pi.complete() 354 return output.getvalue() 355# end def complete_string 356 357def delete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 358 input = io.StringIO(source) 359 output = io.StringIO() 360 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs) 361 pi.delete() 362 return output.getvalue() 363# end def delete_string 364 365def reformat_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 366 input = io.StringIO(source) 367 output = io.StringIO() 368 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs) 369 pi.reformat() 370 return output.getvalue() 371# end def reformat_string 372 373def make_backup(filename): 374 import os, os.path 375 backup = filename + '~' 376 if os.path.lexists(backup): 377 try: 378 os.remove(backup) 379 except OSError: 380 print("Can't remove backup %r" % (backup,), file=sys.stderr) 381 # end try 382 # end if 383 try: 384 os.rename(filename, backup) 385 except OSError: 386 print("Can't rename %r to %r" % (filename, backup), file=sys.stderr) 387 # end try 388# end def make_backup 389 390def complete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 391 with open(filename, 'r') as f: 392 source = f.read() 393 # end with 394 result = complete_string(source, stepsize, tabsize, expandtabs) 395 if source == result: return 0 396 # end if 397 make_backup(filename) 398 with open(filename, 'w') as f: 399 f.write(result) 400 # end with 401 return 1 402# end def complete_file 403 404def delete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 405 with open(filename, 'r') as f: 406 source = f.read() 407 # end with 408 result = delete_string(source, stepsize, tabsize, expandtabs) 409 if source == result: return 0 410 # end if 411 make_backup(filename) 412 with open(filename, 'w') as f: 413 f.write(result) 414 # end with 415 return 1 416# end def delete_file 417 418def reformat_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 419 with open(filename, 'r') as f: 420 source = f.read() 421 # end with 422 result = reformat_string(source, stepsize, tabsize, expandtabs) 423 if source == result: return 0 424 # end if 425 make_backup(filename) 426 with open(filename, 'w') as f: 427 f.write(result) 428 # end with 429 return 1 430# end def reformat_file 431 432# Test program when called as a script 433 434usage = """ 435usage: pindent (-c|-d|-r) [-s stepsize] [-t tabsize] [-e] [file] ... 436-c : complete a correctly indented program (add #end directives) 437-d : delete #end directives 438-r : reformat a completed program (use #end directives) 439-s stepsize: indentation step (default %(STEPSIZE)d) 440-t tabsize : the worth in spaces of a tab (default %(TABSIZE)d) 441-e : expand TABs into spaces (default OFF) 442[file] ... : files are changed in place, with backups in file~ 443If no files are specified or a single - is given, 444the program acts as a filter (reads stdin, writes stdout). 445""" % vars() 446 447def error_both(op1, op2): 448 sys.stderr.write('Error: You can not specify both '+op1+' and -'+op2[0]+' at the same time\n') 449 sys.stderr.write(usage) 450 sys.exit(2) 451# end def error_both 452 453def test(): 454 import getopt 455 try: 456 opts, args = getopt.getopt(sys.argv[1:], 'cdrs:t:e') 457 except getopt.error as msg: 458 sys.stderr.write('Error: %s\n' % msg) 459 sys.stderr.write(usage) 460 sys.exit(2) 461 # end try 462 action = None 463 stepsize = STEPSIZE 464 tabsize = TABSIZE 465 expandtabs = EXPANDTABS 466 for o, a in opts: 467 if o == '-c': 468 if action: error_both(o, action) 469 # end if 470 action = 'complete' 471 elif o == '-d': 472 if action: error_both(o, action) 473 # end if 474 action = 'delete' 475 elif o == '-r': 476 if action: error_both(o, action) 477 # end if 478 action = 'reformat' 479 elif o == '-s': 480 stepsize = int(a) 481 elif o == '-t': 482 tabsize = int(a) 483 elif o == '-e': 484 expandtabs = True 485 # end if 486 # end for 487 if not action: 488 sys.stderr.write( 489 'You must specify -c(omplete), -d(elete) or -r(eformat)\n') 490 sys.stderr.write(usage) 491 sys.exit(2) 492 # end if 493 if not args or args == ['-']: 494 action = eval(action + '_filter') 495 action(sys.stdin, sys.stdout, stepsize, tabsize, expandtabs) 496 else: 497 action = eval(action + '_file') 498 for filename in args: 499 action(filename, stepsize, tabsize, expandtabs) 500 # end for 501 # end if 502# end def test 503 504if __name__ == '__main__': 505 test() 506# end if 507