1#! /usr/bin/env python 2 3# Fix Python source files to use the new equality test operator, i.e., 4# if x = y: ... 5# is changed to 6# if x == y: ... 7# The script correctly tokenizes the Python program to reliably 8# distinguish between assignments and equality tests. 9# 10# Command line arguments are files or directories to be processed. 11# Directories are searched recursively for files whose name looks 12# like a python module. 13# Symbolic links are always ignored (except as explicit directory 14# arguments). Of course, the original file is kept as a back-up 15# (with a "~" attached to its name). 16# It complains about binaries (files containing null bytes) 17# and about files that are ostensibly not Python files: if the first 18# line starts with '#!' and does not contain the string 'python'. 19# 20# Changes made are reported to stdout in a diff-like format. 21# 22# Undoubtedly you can do this using find and sed or perl, but this is 23# a nice example of Python code that recurses down a directory tree 24# and uses regular expressions. Also note several subtleties like 25# preserving the file's mode and avoiding to even write a temp file 26# when no changes are needed for a file. 27# 28# NB: by changing only the function fixline() you can turn this 29# into a program for a different change to Python programs... 30 31import sys 32import re 33import os 34from stat import * 35import string 36 37err = sys.stderr.write 38dbg = err 39rep = sys.stdout.write 40 41def main(): 42 bad = 0 43 if not sys.argv[1:]: # No arguments 44 err('usage: ' + sys.argv[0] + ' file-or-directory ...\n') 45 sys.exit(2) 46 for arg in sys.argv[1:]: 47 if os.path.isdir(arg): 48 if recursedown(arg): bad = 1 49 elif os.path.islink(arg): 50 err(arg + ': will not process symbolic links\n') 51 bad = 1 52 else: 53 if fix(arg): bad = 1 54 sys.exit(bad) 55 56ispythonprog = re.compile('^[a-zA-Z0-9_]+\.py$') 57def ispython(name): 58 return ispythonprog.match(name) >= 0 59 60def recursedown(dirname): 61 dbg('recursedown(%r)\n' % (dirname,)) 62 bad = 0 63 try: 64 names = os.listdir(dirname) 65 except os.error, msg: 66 err('%s: cannot list directory: %r\n' % (dirname, msg)) 67 return 1 68 names.sort() 69 subdirs = [] 70 for name in names: 71 if name in (os.curdir, os.pardir): continue 72 fullname = os.path.join(dirname, name) 73 if os.path.islink(fullname): pass 74 elif os.path.isdir(fullname): 75 subdirs.append(fullname) 76 elif ispython(name): 77 if fix(fullname): bad = 1 78 for fullname in subdirs: 79 if recursedown(fullname): bad = 1 80 return bad 81 82def fix(filename): 83## dbg('fix(%r)\n' % (dirname,)) 84 try: 85 f = open(filename, 'r') 86 except IOError, msg: 87 err('%s: cannot open: %r\n' % (filename, msg)) 88 return 1 89 head, tail = os.path.split(filename) 90 tempname = os.path.join(head, '@' + tail) 91 g = None 92 # If we find a match, we rewind the file and start over but 93 # now copy everything to a temp file. 94 lineno = 0 95 while 1: 96 line = f.readline() 97 if not line: break 98 lineno = lineno + 1 99 if g is None and '\0' in line: 100 # Check for binary files 101 err(filename + ': contains null bytes; not fixed\n') 102 f.close() 103 return 1 104 if lineno == 1 and g is None and line[:2] == '#!': 105 # Check for non-Python scripts 106 words = string.split(line[2:]) 107 if words and re.search('[pP]ython', words[0]) < 0: 108 msg = filename + ': ' + words[0] 109 msg = msg + ' script; not fixed\n' 110 err(msg) 111 f.close() 112 return 1 113 while line[-2:] == '\\\n': 114 nextline = f.readline() 115 if not nextline: break 116 line = line + nextline 117 lineno = lineno + 1 118 newline = fixline(line) 119 if newline != line: 120 if g is None: 121 try: 122 g = open(tempname, 'w') 123 except IOError, msg: 124 f.close() 125 err('%s: cannot create: %r\n' % (tempname, msg)) 126 return 1 127 f.seek(0) 128 lineno = 0 129 rep(filename + ':\n') 130 continue # restart from the beginning 131 rep(repr(lineno) + '\n') 132 rep('< ' + line) 133 rep('> ' + newline) 134 if g is not None: 135 g.write(newline) 136 137 # End of file 138 f.close() 139 if not g: return 0 # No changes 140 141 # Finishing touch -- move files 142 143 # First copy the file's mode to the temp file 144 try: 145 statbuf = os.stat(filename) 146 os.chmod(tempname, statbuf[ST_MODE] & 07777) 147 except os.error, msg: 148 err('%s: warning: chmod failed (%r)\n' % (tempname, msg)) 149 # Then make a backup of the original file as filename~ 150 try: 151 os.rename(filename, filename + '~') 152 except os.error, msg: 153 err('%s: warning: backup failed (%r)\n' % (filename, msg)) 154 # Now move the temp file to the original file 155 try: 156 os.rename(tempname, filename) 157 except os.error, msg: 158 err('%s: rename failed (%r)\n' % (filename, msg)) 159 return 1 160 # Return succes 161 return 0 162 163 164from tokenize import tokenprog 165 166match = {'if':':', 'elif':':', 'while':':', 'return':'\n', \ 167 '(':')', '[':']', '{':'}', '`':'`'} 168 169def fixline(line): 170 # Quick check for easy case 171 if '=' not in line: return line 172 173 i, n = 0, len(line) 174 stack = [] 175 while i < n: 176 j = tokenprog.match(line, i) 177 if j < 0: 178 # A bad token; forget about the rest of this line 179 print '(Syntax error:)' 180 print line, 181 return line 182 a, b = tokenprog.regs[3] # Location of the token proper 183 token = line[a:b] 184 i = i+j 185 if stack and token == stack[-1]: 186 del stack[-1] 187 elif match.has_key(token): 188 stack.append(match[token]) 189 elif token == '=' and stack: 190 line = line[:a] + '==' + line[b:] 191 i, n = a + len('=='), len(line) 192 elif token == '==' and not stack: 193 print '(Warning: \'==\' at top level:)' 194 print line, 195 return line 196 197if __name__ == "__main__": 198 main() 199