1#!/usr/bin/env python 2 3""" 4strip_asm.py - Cleanup ASM output for the specified file 5""" 6 7from argparse import ArgumentParser 8import sys 9import os 10import re 11 12def find_used_labels(asm): 13 found = set() 14 label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)") 15 for l in asm.splitlines(): 16 m = label_re.match(l) 17 if m: 18 found.add('.L%s' % m.group(1)) 19 return found 20 21 22def normalize_labels(asm): 23 decls = set() 24 label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") 25 for l in asm.splitlines(): 26 m = label_decl.match(l) 27 if m: 28 decls.add(m.group(0)) 29 if len(decls) == 0: 30 return asm 31 needs_dot = next(iter(decls))[0] != '.' 32 if not needs_dot: 33 return asm 34 for ld in decls: 35 asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm) 36 return asm 37 38 39def transform_labels(asm): 40 asm = normalize_labels(asm) 41 used_decls = find_used_labels(asm) 42 new_asm = '' 43 label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") 44 for l in asm.splitlines(): 45 m = label_decl.match(l) 46 if not m or m.group(0) in used_decls: 47 new_asm += l 48 new_asm += '\n' 49 return new_asm 50 51 52def is_identifier(tk): 53 if len(tk) == 0: 54 return False 55 first = tk[0] 56 if not first.isalpha() and first != '_': 57 return False 58 for i in range(1, len(tk)): 59 c = tk[i] 60 if not c.isalnum() and c != '_': 61 return False 62 return True 63 64def process_identifiers(l): 65 """ 66 process_identifiers - process all identifiers and modify them to have 67 consistent names across all platforms; specifically across ELF and MachO. 68 For example, MachO inserts an additional understore at the beginning of 69 names. This function removes that. 70 """ 71 parts = re.split(r'([a-zA-Z0-9_]+)', l) 72 new_line = '' 73 for tk in parts: 74 if is_identifier(tk): 75 if tk.startswith('__Z'): 76 tk = tk[1:] 77 elif tk.startswith('_') and len(tk) > 1 and \ 78 tk[1].isalpha() and tk[1] != 'Z': 79 tk = tk[1:] 80 new_line += tk 81 return new_line 82 83 84def process_asm(asm): 85 """ 86 Strip the ASM of unwanted directives and lines 87 """ 88 new_contents = '' 89 asm = transform_labels(asm) 90 91 # TODO: Add more things we want to remove 92 discard_regexes = [ 93 re.compile("\s+\..*$"), # directive 94 re.compile("\s*#(NO_APP|APP)$"), #inline ASM 95 re.compile("\s*#.*$"), # comment line 96 re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive 97 re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"), 98 ] 99 keep_regexes = [ 100 101 ] 102 fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:") 103 for l in asm.splitlines(): 104 # Remove Mach-O attribute 105 l = l.replace('@GOTPCREL', '') 106 add_line = True 107 for reg in discard_regexes: 108 if reg.match(l) is not None: 109 add_line = False 110 break 111 for reg in keep_regexes: 112 if reg.match(l) is not None: 113 add_line = True 114 break 115 if add_line: 116 if fn_label_def.match(l) and len(new_contents) != 0: 117 new_contents += '\n' 118 l = process_identifiers(l) 119 new_contents += l 120 new_contents += '\n' 121 return new_contents 122 123def main(): 124 parser = ArgumentParser( 125 description='generate a stripped assembly file') 126 parser.add_argument( 127 'input', metavar='input', type=str, nargs=1, 128 help='An input assembly file') 129 parser.add_argument( 130 'out', metavar='output', type=str, nargs=1, 131 help='The output file') 132 args, unknown_args = parser.parse_known_args() 133 input = args.input[0] 134 output = args.out[0] 135 if not os.path.isfile(input): 136 print(("ERROR: input file '%s' does not exist") % input) 137 sys.exit(1) 138 contents = None 139 with open(input, 'r') as f: 140 contents = f.read() 141 new_contents = process_asm(contents) 142 with open(output, 'w') as f: 143 f.write(new_contents) 144 145 146if __name__ == '__main__': 147 main() 148 149# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 150# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; 151# kate: indent-mode python; remove-trailing-spaces modified; 152