1"""ARM/NEON assembly emitter.
2
3Used by code generators to produce ARM assembly with NEON simd code.
4Provides tools for easier register management: named register variable
5allocation/deallocation, and offers a more procedural/structured approach
6to generating assembly.
7
8TODO: right now neon emitter prints out assembly instructions immediately,
9it might be beneficial to keep the whole structure and emit the assembly after
10applying some optimizations like: instruction reordering or register reuse.
11
12TODO: NeonRegister object assigns explicit registers at allocation time.
13Similarily to emiting code, register mapping and reuse can be performed and
14optimized lazily.
15"""
16
17
18class Error(Exception):
19  """Module level error."""
20
21
22class RegisterAllocationError(Error):
23  """Cannot alocate registers."""
24
25
26class LaneError(Error):
27  """Wrong lane number."""
28
29
30def Low(register):
31  assert register[0] == 'q'
32  num = int(register[1:])
33  return 'd%d' % (num * 2)
34
35
36def High(register):
37  assert register[0] == 'q'
38  num = int(register[1:])
39  return 'd%d' % (num * 2 + 1)
40
41
42class NeonRegisters(object):
43  """Utility that keeps track of used ARM/NEON registers."""
44
45  def __init__(self):
46    self.double = set()
47    self.double_ever = set()
48    self.general = set()
49    self.general_ever = set()
50    self.parameters = set()
51
52  def MapParameter(self, parameter):
53    self.parameters.add(parameter)
54    return '%%[%s]' % parameter
55
56  def DoubleRegister(self, min_val=0):
57    for i in range(min_val, 32):
58      if i not in self.double:
59        self.double.add(i)
60        self.double_ever.add(i)
61        return 'd%d' % i
62    raise RegisterAllocationError('Not enough double registers.')
63
64  def QuadRegister(self, min_val=0):
65    for i in range(min_val, 16):
66      if ((i * 2) not in self.double) and ((i * 2 + 1) not in self.double):
67        self.double.add(i * 2)
68        self.double.add(i * 2 + 1)
69        self.double_ever.add(i * 2)
70        self.double_ever.add(i * 2 + 1)
71        return 'q%d' % i
72    raise RegisterAllocationError('Not enough quad registers.')
73
74  def GeneralRegister(self):
75    for i in range(0, 16):
76      if i not in self.general:
77        self.general.add(i)
78        self.general_ever.add(i)
79        return 'r%d' % i
80    raise RegisterAllocationError('Not enough general registers.')
81
82  def MappedParameters(self):
83    return [x for x in self.parameters]
84
85  def Clobbers(self):
86    return (['r%d' % i
87             for i in self.general_ever] + ['d%d' % i
88                                            for i in self.DoubleClobbers()])
89
90  def DoubleClobbers(self):
91    return sorted(self.double_ever)
92
93  def Low(self, register):
94    return Low(register)
95
96  def High(self, register):
97    return High(register)
98
99  def FreeRegister(self, register):
100    assert len(register) > 1
101    num = int(register[1:])
102
103    if register[0] == 'r':
104      assert num in self.general
105      self.general.remove(num)
106    elif register[0] == 'd':
107      assert num in self.double
108      self.double.remove(num)
109    elif register[0] == 'q':
110      assert num * 2 in self.double
111      assert num * 2 + 1 in self.double
112      self.double.remove(num * 2)
113      self.double.remove(num * 2 + 1)
114    else:
115      raise RegisterDeallocationError('Register not allocated: %s' % register)
116
117
118class NeonEmitter(object):
119  """Emits ARM/NEON assembly opcodes."""
120
121  def __init__(self, debug=False):
122    self.ops = {}
123    self.indent = ''
124    self.debug = debug
125
126  def PushIndent(self):
127    self.indent += '  '
128
129  def PopIndent(self):
130    self.indent = self.indent[:-2]
131
132  def EmitIndented(self, what):
133    print self.indent + what
134
135  def PushOp(self, op):
136    if op in self.ops.keys():
137      self.ops[op] += 1
138    else:
139      self.ops[op] = 1
140
141  def ClearCounters(self):
142    self.ops.clear()
143
144  def EmitNewline(self):
145    print ''
146
147  def EmitPreprocessor1(self, op, param):
148    print '#%s %s' % (op, param)
149
150  def EmitPreprocessor(self, op):
151    print '#%s' % op
152
153  def EmitInclude(self, include):
154    self.EmitPreprocessor1('include', include)
155
156  def EmitCall1(self, function, param):
157    self.EmitIndented('%s(%s);' % (function, param))
158
159  def EmitAssert(self, assert_expression):
160    if self.debug:
161      self.EmitCall1('assert', assert_expression)
162
163  def EmitHeaderBegin(self, header_name, includes):
164    self.EmitPreprocessor1('ifndef', (header_name + '_H_').upper())
165    self.EmitPreprocessor1('define', (header_name + '_H_').upper())
166    self.EmitNewline()
167    if includes:
168      for include in includes:
169        self.EmitInclude(include)
170      self.EmitNewline()
171
172  def EmitHeaderEnd(self):
173    self.EmitPreprocessor('endif')
174
175  def EmitCode(self, code):
176    self.EmitIndented('%s;' % code)
177
178  def EmitFunctionBeginA(self, function_name, params, return_type):
179    self.EmitIndented('%s %s(%s) {' %
180                      (return_type, function_name,
181                       ', '.join(['%s %s' % (t, n) for (t, n) in params])))
182    self.PushIndent()
183
184  def EmitFunctionEnd(self):
185    self.PopIndent()
186    self.EmitIndented('}')
187
188  def EmitAsmBegin(self):
189    self.EmitIndented('asm volatile(')
190    self.PushIndent()
191
192  def EmitAsmMapping(self, elements, modifier):
193    if elements:
194      self.EmitIndented(': ' + ', '.join(['[%s] "%s"(%s)' % (d, modifier, d)
195                                          for d in elements]))
196    else:
197      self.EmitIndented(':')
198
199  def EmitClobbers(self, elements):
200    if elements:
201      self.EmitIndented(': ' + ', '.join(['"%s"' % c for c in elements]))
202    else:
203      self.EmitIndented(':')
204
205  def EmitAsmEnd(self, outputs, inputs, clobbers):
206    self.EmitAsmMapping(outputs, '+r')
207    self.EmitAsmMapping(inputs, 'r')
208    self.EmitClobbers(clobbers)
209    self.PopIndent()
210    self.EmitIndented(');')
211
212  def EmitComment(self, comment):
213    self.EmitIndented('// ' + comment)
214
215  def EmitNumericalLabel(self, label):
216    self.EmitIndented('"%d:"' % label)
217
218  def EmitOp1(self, op, param1):
219    self.PushOp(op)
220    self.EmitIndented('"%s %s\\n"' % (op, param1))
221
222  def EmitOp2(self, op, param1, param2):
223    self.PushOp(op)
224    self.EmitIndented('"%s %s, %s\\n"' % (op, param1, param2))
225
226  def EmitOp3(self, op, param1, param2, param3):
227    self.PushOp(op)
228    self.EmitIndented('"%s %s, %s, %s\\n"' % (op, param1, param2, param3))
229
230  def EmitZip(self, size, param1, param2):
231    self.EmitOp2('vzip.%d' % size, param1, param2)
232
233  def EmitZip8(self, param1, param2):
234    self.EmitZip(8, param1, param2)
235
236  def EmitZip16(self, param1, param2):
237    self.EmitZip(16, param1, param2)
238
239  def EmitZip32(self, param1, param2):
240    self.EmitZip(32, param1, param2)
241
242  def EmitAdd(self, destination, source, param):
243    self.EmitOp3('add', destination, source, param)
244
245  def EmitSubs(self, destination, source, param):
246    self.EmitOp3('subs', destination, source, param)
247
248  def EmitSub(self, destination, source, param):
249    self.EmitOp3('sub', destination, source, param)
250
251  def EmitMul(self, destination, source, param):
252    self.EmitOp3('mul', destination, source, param)
253
254  def EmitMov(self, param1, param2):
255    self.EmitOp2('mov', param1, param2)
256
257  def EmitSkip(self, register, skip, stride):
258    self.EmitOp3('add', register, register, '#%d' % (skip * stride))
259
260  def EmitBeqBack(self, label):
261    self.EmitOp1('beq', '%db' % label)
262
263  def EmitBeqFront(self, label):
264    self.EmitOp1('beq', '%df' % label)
265
266  def EmitBneBack(self, label):
267    self.EmitOp1('bne', '%db' % label)
268
269  def EmitBneFront(self, label):
270    self.EmitOp1('bne', '%df' % label)
271
272  def EmitVAdd(self, add_type, destination, source_1, source_2):
273    self.EmitOp3('vadd.%s' % add_type, destination, source_1, source_2)
274
275  def EmitVAddw(self, add_type, destination, source_1, source_2):
276    self.EmitOp3('vaddw.%s' % add_type, destination, source_1, source_2)
277
278  def EmitVCvt(self, cvt_to, cvt_from, destination, source):
279    self.EmitOp2('vcvt.%s.%s' % (cvt_to, cvt_from), destination, source)
280
281  def EmitVDup(self, dup_type, destination, source):
282    self.EmitOp2('vdup.%s' % dup_type, destination, source)
283
284  def EmitVMov(self, mov_type, destination, source):
285    self.EmitOp2('vmov.%s' % mov_type, destination, source)
286
287  def EmitVQmovn(self, mov_type, destination, source):
288    self.EmitOp2('vqmovn.%s' % mov_type, destination, source)
289
290  def EmitVQmovun(self, mov_type, destination, source):
291    self.EmitOp2('vqmovun.%s' % mov_type, destination, source)
292
293  def EmitVMul(self, mul_type, destination, source_1, source_2):
294    self.EmitOp3('vmul.%s' % mul_type, destination, source_1, source_2)
295
296  def EmitVMull(self, mul_type, destination, source_1, source_2):
297    self.EmitOp3('vmull.%s' % mul_type, destination, source_1, source_2)
298
299  def EmitVPadd(self, add_type, destination, source_1, source_2):
300    self.EmitOp3('vpadd.%s' % add_type, destination, source_1, source_2)
301
302  def EmitVPaddl(self, add_type, destination, source):
303    self.EmitOp2('vpaddl.%s' % add_type, destination, source)
304
305  def EmitVPadal(self, add_type, destination, source):
306    self.EmitOp2('vpadal.%s' % add_type, destination, source)
307
308  def EmitVLoad(self, load_type, destination, source):
309    self.EmitOp2('vld%s' % load_type, '{%s}' % destination, '%s' % source)
310
311  def EmitVLoadA(self, load_type, destinations, source):
312    self.EmitVLoad(load_type, ', '.join(destinations), source)
313
314  def EmitPld(self, load_address_register):
315    self.EmitOp1('pld', '[%s]' % load_address_register)
316
317  def EmitPldOffset(self, load_address_register, offset):
318    self.EmitOp1('pld', '[%s, %s]' % (load_address_register, offset))
319
320  def EmitInstructionPreload(self, label):
321    self.EmitOp1('pli', label)
322
323  def EmitVShl(self, shift_type, destination, source, shift):
324    self.EmitOp3('vshl.%s' % shift_type, destination, source, shift)
325
326  def EmitVStore(self, store_type, source, destination):
327    self.EmitOp2('vst%s' % store_type, '{%s}' % source, destination)
328
329  def EmitVStoreA(self, store_type, sources, destination):
330    self.EmitVStore(store_type, ', '.join(sources), destination)
331
332  def EmitVStoreOffset(self, store_type, source, destination, offset):
333    self.EmitOp3('vst%s' % store_type, '{%s}' % source, destination, offset)
334
335  def Dereference(self, value, alignment):
336    if alignment:
337      return '[%s:%d]' % (value, alignment)
338    else:
339      return '[%s]' % value
340
341  def DereferenceIncrement(self, value, alignment):
342    return '%s!' % self.Dereference(value, alignment)
343
344  def ImmediateConstant(self, value):
345    return '#%d' % value
346
347  def AllLanes(self, value):
348    return '%s[]' % value
349
350  def Lane(self, value, lane):
351    return '%s[%d]' % (value, lane)
352