1"""ARM/NEON assembly emitter. 2 3Used by code generators to produce ARM assembly with NEON simd code. 4Provides tools for easier register management: named register variable 5allocation/deallocation, and offers a more procedural/structured approach 6to generating assembly. 7 8TODO: right now neon emitter prints out assembly instructions immediately, 9it might be beneficial to keep the whole structure and emit the assembly after 10applying some optimizations like: instruction reordering or register reuse. 11 12TODO: NeonRegister object assigns explicit registers at allocation time. 13Similarily to emiting code, register mapping and reuse can be performed and 14optimized lazily. 15""" 16 17 18class Error(Exception): 19 """Module level error.""" 20 21 22class RegisterAllocationError(Error): 23 """Cannot alocate registers.""" 24 25 26class LaneError(Error): 27 """Wrong lane number.""" 28 29 30def Low(register): 31 assert register[0] == 'q' 32 num = int(register[1:]) 33 return 'd%d' % (num * 2) 34 35 36def High(register): 37 assert register[0] == 'q' 38 num = int(register[1:]) 39 return 'd%d' % (num * 2 + 1) 40 41 42class NeonRegisters(object): 43 """Utility that keeps track of used ARM/NEON registers.""" 44 45 def __init__(self): 46 self.double = set() 47 self.double_ever = set() 48 self.general = set() 49 self.general_ever = set() 50 self.parameters = set() 51 52 def MapParameter(self, parameter): 53 self.parameters.add(parameter) 54 return '%%[%s]' % parameter 55 56 def DoubleRegister(self, min_val=0): 57 for i in range(min_val, 32): 58 if i not in self.double: 59 self.double.add(i) 60 self.double_ever.add(i) 61 return 'd%d' % i 62 raise RegisterAllocationError('Not enough double registers.') 63 64 def QuadRegister(self, min_val=0): 65 for i in range(min_val, 16): 66 if ((i * 2) not in self.double) and ((i * 2 + 1) not in self.double): 67 self.double.add(i * 2) 68 self.double.add(i * 2 + 1) 69 self.double_ever.add(i * 2) 70 self.double_ever.add(i * 2 + 1) 71 return 'q%d' % i 72 raise RegisterAllocationError('Not enough quad registers.') 73 74 def GeneralRegister(self): 75 for i in range(0, 16): 76 if i not in self.general: 77 self.general.add(i) 78 self.general_ever.add(i) 79 return 'r%d' % i 80 raise RegisterAllocationError('Not enough general registers.') 81 82 def MappedParameters(self): 83 return [x for x in self.parameters] 84 85 def Clobbers(self): 86 return (['r%d' % i 87 for i in self.general_ever] + ['d%d' % i 88 for i in self.DoubleClobbers()]) 89 90 def DoubleClobbers(self): 91 return sorted(self.double_ever) 92 93 def Low(self, register): 94 return Low(register) 95 96 def High(self, register): 97 return High(register) 98 99 def FreeRegister(self, register): 100 assert len(register) > 1 101 num = int(register[1:]) 102 103 if register[0] == 'r': 104 assert num in self.general 105 self.general.remove(num) 106 elif register[0] == 'd': 107 assert num in self.double 108 self.double.remove(num) 109 elif register[0] == 'q': 110 assert num * 2 in self.double 111 assert num * 2 + 1 in self.double 112 self.double.remove(num * 2) 113 self.double.remove(num * 2 + 1) 114 else: 115 raise RegisterDeallocationError('Register not allocated: %s' % register) 116 117 118class NeonEmitter(object): 119 """Emits ARM/NEON assembly opcodes.""" 120 121 def __init__(self, debug=False): 122 self.ops = {} 123 self.indent = '' 124 self.debug = debug 125 126 def PushIndent(self): 127 self.indent += ' ' 128 129 def PopIndent(self): 130 self.indent = self.indent[:-2] 131 132 def EmitIndented(self, what): 133 print self.indent + what 134 135 def PushOp(self, op): 136 if op in self.ops.keys(): 137 self.ops[op] += 1 138 else: 139 self.ops[op] = 1 140 141 def ClearCounters(self): 142 self.ops.clear() 143 144 def EmitNewline(self): 145 print '' 146 147 def EmitPreprocessor1(self, op, param): 148 print '#%s %s' % (op, param) 149 150 def EmitPreprocessor(self, op): 151 print '#%s' % op 152 153 def EmitInclude(self, include): 154 self.EmitPreprocessor1('include', include) 155 156 def EmitCall1(self, function, param): 157 self.EmitIndented('%s(%s);' % (function, param)) 158 159 def EmitAssert(self, assert_expression): 160 if self.debug: 161 self.EmitCall1('assert', assert_expression) 162 163 def EmitHeaderBegin(self, header_name, includes): 164 self.EmitPreprocessor1('ifndef', (header_name + '_H_').upper()) 165 self.EmitPreprocessor1('define', (header_name + '_H_').upper()) 166 self.EmitNewline() 167 if includes: 168 for include in includes: 169 self.EmitInclude(include) 170 self.EmitNewline() 171 172 def EmitHeaderEnd(self): 173 self.EmitPreprocessor('endif') 174 175 def EmitCode(self, code): 176 self.EmitIndented('%s;' % code) 177 178 def EmitFunctionBeginA(self, function_name, params, return_type): 179 self.EmitIndented('%s %s(%s) {' % 180 (return_type, function_name, 181 ', '.join(['%s %s' % (t, n) for (t, n) in params]))) 182 self.PushIndent() 183 184 def EmitFunctionEnd(self): 185 self.PopIndent() 186 self.EmitIndented('}') 187 188 def EmitAsmBegin(self): 189 self.EmitIndented('asm volatile(') 190 self.PushIndent() 191 192 def EmitAsmMapping(self, elements, modifier): 193 if elements: 194 self.EmitIndented(': ' + ', '.join(['[%s] "%s"(%s)' % (d, modifier, d) 195 for d in elements])) 196 else: 197 self.EmitIndented(':') 198 199 def EmitClobbers(self, elements): 200 if elements: 201 self.EmitIndented(': ' + ', '.join(['"%s"' % c for c in elements])) 202 else: 203 self.EmitIndented(':') 204 205 def EmitAsmEnd(self, outputs, inputs, clobbers): 206 self.EmitAsmMapping(outputs, '+r') 207 self.EmitAsmMapping(inputs, 'r') 208 self.EmitClobbers(clobbers) 209 self.PopIndent() 210 self.EmitIndented(');') 211 212 def EmitComment(self, comment): 213 self.EmitIndented('// ' + comment) 214 215 def EmitNumericalLabel(self, label): 216 self.EmitIndented('"%d:"' % label) 217 218 def EmitOp1(self, op, param1): 219 self.PushOp(op) 220 self.EmitIndented('"%s %s\\n"' % (op, param1)) 221 222 def EmitOp2(self, op, param1, param2): 223 self.PushOp(op) 224 self.EmitIndented('"%s %s, %s\\n"' % (op, param1, param2)) 225 226 def EmitOp3(self, op, param1, param2, param3): 227 self.PushOp(op) 228 self.EmitIndented('"%s %s, %s, %s\\n"' % (op, param1, param2, param3)) 229 230 def EmitZip(self, size, param1, param2): 231 self.EmitOp2('vzip.%d' % size, param1, param2) 232 233 def EmitZip8(self, param1, param2): 234 self.EmitZip(8, param1, param2) 235 236 def EmitZip16(self, param1, param2): 237 self.EmitZip(16, param1, param2) 238 239 def EmitZip32(self, param1, param2): 240 self.EmitZip(32, param1, param2) 241 242 def EmitAdd(self, destination, source, param): 243 self.EmitOp3('add', destination, source, param) 244 245 def EmitSubs(self, destination, source, param): 246 self.EmitOp3('subs', destination, source, param) 247 248 def EmitSub(self, destination, source, param): 249 self.EmitOp3('sub', destination, source, param) 250 251 def EmitMul(self, destination, source, param): 252 self.EmitOp3('mul', destination, source, param) 253 254 def EmitMov(self, param1, param2): 255 self.EmitOp2('mov', param1, param2) 256 257 def EmitSkip(self, register, skip, stride): 258 self.EmitOp3('add', register, register, '#%d' % (skip * stride)) 259 260 def EmitBeqBack(self, label): 261 self.EmitOp1('beq', '%db' % label) 262 263 def EmitBeqFront(self, label): 264 self.EmitOp1('beq', '%df' % label) 265 266 def EmitBneBack(self, label): 267 self.EmitOp1('bne', '%db' % label) 268 269 def EmitBneFront(self, label): 270 self.EmitOp1('bne', '%df' % label) 271 272 def EmitVAdd(self, add_type, destination, source_1, source_2): 273 self.EmitOp3('vadd.%s' % add_type, destination, source_1, source_2) 274 275 def EmitVAddw(self, add_type, destination, source_1, source_2): 276 self.EmitOp3('vaddw.%s' % add_type, destination, source_1, source_2) 277 278 def EmitVCvt(self, cvt_to, cvt_from, destination, source): 279 self.EmitOp2('vcvt.%s.%s' % (cvt_to, cvt_from), destination, source) 280 281 def EmitVDup(self, dup_type, destination, source): 282 self.EmitOp2('vdup.%s' % dup_type, destination, source) 283 284 def EmitVMov(self, mov_type, destination, source): 285 self.EmitOp2('vmov.%s' % mov_type, destination, source) 286 287 def EmitVQmovn(self, mov_type, destination, source): 288 self.EmitOp2('vqmovn.%s' % mov_type, destination, source) 289 290 def EmitVQmovun(self, mov_type, destination, source): 291 self.EmitOp2('vqmovun.%s' % mov_type, destination, source) 292 293 def EmitVMul(self, mul_type, destination, source_1, source_2): 294 self.EmitOp3('vmul.%s' % mul_type, destination, source_1, source_2) 295 296 def EmitVMull(self, mul_type, destination, source_1, source_2): 297 self.EmitOp3('vmull.%s' % mul_type, destination, source_1, source_2) 298 299 def EmitVPadd(self, add_type, destination, source_1, source_2): 300 self.EmitOp3('vpadd.%s' % add_type, destination, source_1, source_2) 301 302 def EmitVPaddl(self, add_type, destination, source): 303 self.EmitOp2('vpaddl.%s' % add_type, destination, source) 304 305 def EmitVPadal(self, add_type, destination, source): 306 self.EmitOp2('vpadal.%s' % add_type, destination, source) 307 308 def EmitVLoad(self, load_type, destination, source): 309 self.EmitOp2('vld%s' % load_type, '{%s}' % destination, '%s' % source) 310 311 def EmitVLoadA(self, load_type, destinations, source): 312 self.EmitVLoad(load_type, ', '.join(destinations), source) 313 314 def EmitPld(self, load_address_register): 315 self.EmitOp1('pld', '[%s]' % load_address_register) 316 317 def EmitPldOffset(self, load_address_register, offset): 318 self.EmitOp1('pld', '[%s, %s]' % (load_address_register, offset)) 319 320 def EmitInstructionPreload(self, label): 321 self.EmitOp1('pli', label) 322 323 def EmitVShl(self, shift_type, destination, source, shift): 324 self.EmitOp3('vshl.%s' % shift_type, destination, source, shift) 325 326 def EmitVStore(self, store_type, source, destination): 327 self.EmitOp2('vst%s' % store_type, '{%s}' % source, destination) 328 329 def EmitVStoreA(self, store_type, sources, destination): 330 self.EmitVStore(store_type, ', '.join(sources), destination) 331 332 def EmitVStoreOffset(self, store_type, source, destination, offset): 333 self.EmitOp3('vst%s' % store_type, '{%s}' % source, destination, offset) 334 335 def Dereference(self, value, alignment): 336 if alignment: 337 return '[%s:%d]' % (value, alignment) 338 else: 339 return '[%s]' % value 340 341 def DereferenceIncrement(self, value, alignment): 342 return '%s!' % self.Dereference(value, alignment) 343 344 def ImmediateConstant(self, value): 345 return '#%d' % value 346 347 def AllLanes(self, value): 348 return '%s[]' % value 349 350 def Lane(self, value, lane): 351 return '%s[%d]' % (value, lane) 352