1# RUN: llc --mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -run-pass si-fold-operands,si-shrink-instructions %s -o - | FileCheck %s
2--- |
3  define amdgpu_kernel void @add_f32_1.0_one_f16_use() #0 {
4    %f16.val0 = load volatile half, half addrspace(1)* undef
5    %f16.val1 = load volatile half, half addrspace(1)* undef
6    %f32.val = load volatile float, float addrspace(1)* undef
7    %f16.add0 = fadd half %f16.val0, 0xH3C00
8    %f32.add = fadd float %f32.val, 1.000000e+00
9    store volatile half %f16.add0, half addrspace(1)* undef
10    store volatile float %f32.add, float addrspace(1)* undef
11    ret void
12  }
13
14  define amdgpu_kernel void @add_f32_1.0_multi_f16_use() #0 {
15    %f16.val0 = load volatile half, half addrspace(1)* undef
16    %f16.val1 = load volatile half, half addrspace(1)* undef
17    %f32.val = load volatile float, float addrspace(1)* undef
18    %f16.add0 = fadd half %f16.val0, 0xH3C00
19    %f32.add = fadd float %f32.val, 1.000000e+00
20    store volatile half %f16.add0, half addrspace(1)* undef
21    store volatile float %f32.add, float addrspace(1)* undef
22    ret void
23  }
24
25  define amdgpu_kernel void @add_f32_1.0_one_f32_use_one_f16_use () #0 {
26    %f16.val0 = load volatile half, half addrspace(1)* undef
27    %f16.val1 = load volatile half, half addrspace(1)* undef
28    %f32.val = load volatile float, float addrspace(1)* undef
29    %f16.add0 = fadd half %f16.val0, 0xH3C00
30    %f32.add = fadd float %f32.val, 1.000000e+00
31    store volatile half %f16.add0, half addrspace(1)* undef
32    store volatile float %f32.add, float addrspace(1)* undef
33    ret void
34  }
35
36  define amdgpu_kernel void @add_f32_1.0_one_f32_use_multi_f16_use () #0 {
37    %f16.val0 = load volatile half, half addrspace(1)* undef
38    %f16.val1 = load volatile half, half addrspace(1)* undef
39    %f32.val = load volatile float, float addrspace(1)* undef
40    %f16.add0 = fadd half %f16.val0, 0xH3C00
41    %f16.add1 = fadd half %f16.val1, 0xH3C00
42    %f32.add = fadd float %f32.val, 1.000000e+00
43    store volatile half %f16.add0, half addrspace(1)* undef
44    store volatile half %f16.add1, half addrspace(1)* undef
45    store volatile float %f32.add, float addrspace(1)* undef
46    ret void
47  }
48
49  define amdgpu_kernel void @add_i32_1_multi_f16_use() #0 {
50    %f16.val0 = load volatile half, half addrspace(1)* undef
51    %f16.val1 = load volatile half, half addrspace(1)* undef
52    %f16.add0 = fadd half %f16.val0, 0xH0001
53    %f16.add1 = fadd half %f16.val1, 0xH0001
54    store volatile half %f16.add0, half addrspace(1)* undef
55    store volatile half %f16.add1,half addrspace(1)* undef
56    ret void
57  }
58
59  define amdgpu_kernel void @add_i32_m2_one_f32_use_multi_f16_use () #0 {
60    %f16.val0 = load volatile half, half addrspace(1)* undef
61    %f16.val1 = load volatile half, half addrspace(1)* undef
62    %f32.val = load volatile float, float addrspace(1)* undef
63    %f16.add0 = fadd half %f16.val0, 0xHFFFE
64    %f16.add1 = fadd half %f16.val1, 0xHFFFE
65    %f32.add = fadd float %f32.val, 0xffffffffc0000000
66    store volatile half %f16.add0, half addrspace(1)* undef
67    store volatile half %f16.add1, half addrspace(1)* undef
68    store volatile float %f32.add, float addrspace(1)* undef
69    ret void
70  }
71
72  define amdgpu_kernel void @add_f16_1.0_multi_f32_use() #0 {
73    %f32.val0 = load volatile float, float addrspace(1)* undef
74    %f32.val1 = load volatile float, float addrspace(1)* undef
75    %f32.val = load volatile float, float addrspace(1)* undef
76    %f32.add0 = fadd float %f32.val0, 1.0
77    %f32.add1 = fadd float %f32.val1, 1.0
78    store volatile float %f32.add0, float addrspace(1)* undef
79    store volatile float %f32.add1, float addrspace(1)* undef
80    ret void
81  }
82
83  define amdgpu_kernel void @add_f16_1.0_other_high_bits_multi_f16_use() #0 {
84    %f16.val0 = load volatile half, half addrspace(1)* undef
85    %f16.val1 = load volatile half, half addrspace(1)* undef
86    %f32.val = load volatile half, half addrspace(1)* undef
87    %f16.add0 = fadd half %f16.val0, 0xH3C00
88    %f32.add = fadd half %f32.val, 1.000000e+00
89    store volatile half %f16.add0, half addrspace(1)* undef
90    store volatile half %f32.add, half addrspace(1)* undef
91    ret void
92  }
93
94  define amdgpu_kernel void @add_f16_1.0_other_high_bits_use_f16_f32() #0 {
95    %f16.val0 = load volatile half, half addrspace(1)* undef
96    %f16.val1 = load volatile half, half addrspace(1)* undef
97    %f32.val = load volatile half, half addrspace(1)* undef
98    %f16.add0 = fadd half %f16.val0, 0xH3C00
99    %f32.add = fadd half %f32.val, 1.000000e+00
100    store volatile half %f16.add0, half addrspace(1)* undef
101    store volatile half %f32.add, half addrspace(1)* undef
102    ret void
103  }
104
105  attributes #0 = { nounwind }
106
107...
108---
109
110# f32 1.0 with a single use should be folded as the low 32-bits of a
111#  literal constant.
112
113# CHECK-LABEL: name: add_f32_1.0_one_f16_use
114# CHECK: %13:vgpr_32 = V_ADD_F16_e32  1065353216, killed %11, implicit $exec
115
116name:            add_f32_1.0_one_f16_use
117alignment:       0
118exposesReturnsTwice: false
119legalized:       false
120regBankSelected: false
121selected:        false
122tracksRegLiveness: true
123registers:
124  - { id: 0, class: sreg_64 }
125  - { id: 1, class: sreg_32 }
126  - { id: 2, class: sgpr_32 }
127  - { id: 3, class: vgpr_32 }
128  - { id: 4, class: sreg_64 }
129  - { id: 5, class: sreg_32 }
130  - { id: 6, class: sreg_64 }
131  - { id: 7, class: sreg_32 }
132  - { id: 8, class: sreg_32 }
133  - { id: 9, class: sreg_32 }
134  - { id: 10, class: sreg_128 }
135  - { id: 11, class: vgpr_32 }
136  - { id: 12, class: vgpr_32 }
137  - { id: 13, class: vgpr_32 }
138frameInfo:
139  isFrameAddressTaken: false
140  isReturnAddressTaken: false
141  hasStackMap:     false
142  hasPatchPoint:   false
143  stackSize:       0
144  offsetAdjustment: 0
145  maxAlignment:    0
146  adjustsStack:    false
147  hasCalls:        false
148  maxCallFrameSize: 0
149  hasOpaqueSPAdjustment: false
150  hasVAStart:      false
151  hasMustTailInVarArgFunc: false
152body:             |
153  bb.0 (%ir-block.0):
154    %4 = IMPLICIT_DEF
155    %5 = COPY %4.sub1
156    %6 = IMPLICIT_DEF
157    %7 = COPY %6.sub0
158    %8 = S_MOV_B32 61440
159    %9 = S_MOV_B32 -1
160    %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
161    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
162    %12 = V_MOV_B32_e32 1065353216, implicit $exec
163    %13 = V_ADD_F16_e64 0, killed %11, 0, %12, 0, 0, implicit $exec
164    BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
165    S_ENDPGM
166
167...
168---
169# Materialized f32 inline immediate should not be folded into the f16
170# operands
171
172# CHECK-LABEL: name: add_f32_1.0_multi_f16_use
173# CHECK: %13:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
174# CHECK: %14:vgpr_32 = V_ADD_F16_e32 killed %11, %13, implicit $exec
175# CHECK: %15:vgpr_32 = V_ADD_F16_e32 killed %12, killed %13, implicit $exec
176
177
178name:            add_f32_1.0_multi_f16_use
179alignment:       0
180exposesReturnsTwice: false
181legalized:       false
182regBankSelected: false
183selected:        false
184tracksRegLiveness: true
185registers:
186  - { id: 0, class: sreg_64 }
187  - { id: 1, class: sreg_32 }
188  - { id: 2, class: sgpr_32 }
189  - { id: 3, class: vgpr_32 }
190  - { id: 4, class: sreg_64 }
191  - { id: 5, class: sreg_32 }
192  - { id: 6, class: sreg_64 }
193  - { id: 7, class: sreg_32 }
194  - { id: 8, class: sreg_32 }
195  - { id: 9, class: sreg_32 }
196  - { id: 10, class: sreg_128 }
197  - { id: 11, class: vgpr_32 }
198  - { id: 12, class: vgpr_32 }
199  - { id: 13, class: vgpr_32 }
200  - { id: 14, class: vgpr_32 }
201  - { id: 15, class: vgpr_32 }
202frameInfo:
203  isFrameAddressTaken: false
204  isReturnAddressTaken: false
205  hasStackMap:     false
206  hasPatchPoint:   false
207  stackSize:       0
208  offsetAdjustment: 0
209  maxAlignment:    0
210  adjustsStack:    false
211  hasCalls:        false
212  maxCallFrameSize: 0
213  hasOpaqueSPAdjustment: false
214  hasVAStart:      false
215  hasMustTailInVarArgFunc: false
216body:             |
217  bb.0 (%ir-block.0):
218    %4 = IMPLICIT_DEF
219    %5 = COPY %4.sub1
220    %6 = IMPLICIT_DEF
221    %7 = COPY %6.sub0
222    %8 = S_MOV_B32 61440
223    %9 = S_MOV_B32 -1
224    %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
225    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
226    %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
227    %13 = V_MOV_B32_e32 1065353216, implicit $exec
228    %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $exec
229    %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $exec
230    BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
231    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
232    S_ENDPGM
233
234...
235---
236
237# f32 1.0 should be folded into the single f32 use as an inline
238#  immediate, and folded into the single f16 use as a literal constant
239
240# CHECK-LABEL: name: add_f32_1.0_one_f32_use_one_f16_use
241# CHECK: %15:vgpr_32 = V_ADD_F16_e32 1065353216, %11, implicit $exec
242# CHECK: %16:vgpr_32 = V_ADD_F32_e32 1065353216, killed %13, implicit $exec
243
244name:            add_f32_1.0_one_f32_use_one_f16_use
245alignment:       0
246exposesReturnsTwice: false
247legalized:       false
248regBankSelected: false
249selected:        false
250tracksRegLiveness: true
251registers:
252  - { id: 0, class: sreg_64 }
253  - { id: 1, class: sreg_32 }
254  - { id: 2, class: sgpr_32 }
255  - { id: 3, class: vgpr_32 }
256  - { id: 4, class: sreg_64 }
257  - { id: 5, class: sreg_32 }
258  - { id: 6, class: sreg_64 }
259  - { id: 7, class: sreg_32 }
260  - { id: 8, class: sreg_32 }
261  - { id: 9, class: sreg_32 }
262  - { id: 10, class: sreg_128 }
263  - { id: 11, class: vgpr_32 }
264  - { id: 12, class: vgpr_32 }
265  - { id: 13, class: vgpr_32 }
266  - { id: 14, class: vgpr_32 }
267  - { id: 15, class: vgpr_32 }
268  - { id: 16, class: vgpr_32 }
269frameInfo:
270  isFrameAddressTaken: false
271  isReturnAddressTaken: false
272  hasStackMap:     false
273  hasPatchPoint:   false
274  stackSize:       0
275  offsetAdjustment: 0
276  maxAlignment:    0
277  adjustsStack:    false
278  hasCalls:        false
279  maxCallFrameSize: 0
280  hasOpaqueSPAdjustment: false
281  hasVAStart:      false
282  hasMustTailInVarArgFunc: false
283body:             |
284  bb.0 (%ir-block.0):
285    %4 = IMPLICIT_DEF
286    %5 = COPY %4.sub1
287    %6 = IMPLICIT_DEF
288    %7 = COPY %6.sub0
289    %8 = S_MOV_B32 61440
290    %9 = S_MOV_B32 -1
291    %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
292    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
293    %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
294    %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
295    %14 = V_MOV_B32_e32 1065353216, implicit $exec
296    %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec
297    %16 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec
298    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
299    BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
300    S_ENDPGM
301
302...
303---
304
305# f32 1.0 should be folded for the single f32 use as an inline
306#  constant, and not folded as a multi-use literal for the f16 cases
307
308# CHECK-LABEL: name: add_f32_1.0_one_f32_use_multi_f16_use
309# CHECK: %14:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
310# CHECK: %15:vgpr_32 = V_ADD_F16_e32  %11, %14, implicit $exec
311# CHECK: %16:vgpr_32 = V_ADD_F16_e32 %12,  %14, implicit $exec
312# CHECK: %17:vgpr_32 = V_ADD_F32_e32 1065353216, killed %13, implicit $exec
313
314name:            add_f32_1.0_one_f32_use_multi_f16_use
315alignment:       0
316exposesReturnsTwice: false
317legalized:       false
318regBankSelected: false
319selected:        false
320tracksRegLiveness: true
321registers:
322  - { id: 0, class: sreg_64 }
323  - { id: 1, class: sreg_32 }
324  - { id: 2, class: sgpr_32 }
325  - { id: 3, class: vgpr_32 }
326  - { id: 4, class: sreg_64 }
327  - { id: 5, class: sreg_32 }
328  - { id: 6, class: sreg_64 }
329  - { id: 7, class: sreg_32 }
330  - { id: 8, class: sreg_32 }
331  - { id: 9, class: sreg_32 }
332  - { id: 10, class: sreg_128 }
333  - { id: 11, class: vgpr_32 }
334  - { id: 12, class: vgpr_32 }
335  - { id: 13, class: vgpr_32 }
336  - { id: 14, class: vgpr_32 }
337  - { id: 15, class: vgpr_32 }
338  - { id: 16, class: vgpr_32 }
339  - { id: 17, class: vgpr_32 }
340frameInfo:
341  isFrameAddressTaken: false
342  isReturnAddressTaken: false
343  hasStackMap:     false
344  hasPatchPoint:   false
345  stackSize:       0
346  offsetAdjustment: 0
347  maxAlignment:    0
348  adjustsStack:    false
349  hasCalls:        false
350  maxCallFrameSize: 0
351  hasOpaqueSPAdjustment: false
352  hasVAStart:      false
353  hasMustTailInVarArgFunc: false
354body:             |
355  bb.0 (%ir-block.0):
356    %4 = IMPLICIT_DEF
357    %5 = COPY %4.sub1
358    %6 = IMPLICIT_DEF
359    %7 = COPY %6.sub0
360    %8 = S_MOV_B32 61440
361    %9 = S_MOV_B32 -1
362    %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
363    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
364    %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
365    %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
366    %14 = V_MOV_B32_e32 1065353216, implicit $exec
367    %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec
368    %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $exec
369    %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec
370    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
371    BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
372    BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
373    S_ENDPGM
374
375...
376---
377# CHECK-LABEL: name: add_i32_1_multi_f16_use
378# CHECK: %13:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
379# CHECK: %14:vgpr_32 = V_ADD_F16_e32 1, killed %11, implicit $exec
380# CHECK: %15:vgpr_32 = V_ADD_F16_e32 1, killed %12, implicit $exec
381
382
383name:            add_i32_1_multi_f16_use
384alignment:       0
385exposesReturnsTwice: false
386legalized:       false
387regBankSelected: false
388selected:        false
389tracksRegLiveness: true
390registers:
391  - { id: 0, class: sreg_64 }
392  - { id: 1, class: sreg_32 }
393  - { id: 2, class: sgpr_32 }
394  - { id: 3, class: vgpr_32 }
395  - { id: 4, class: sreg_64 }
396  - { id: 5, class: sreg_32 }
397  - { id: 6, class: sreg_64 }
398  - { id: 7, class: sreg_32 }
399  - { id: 8, class: sreg_32 }
400  - { id: 9, class: sreg_32 }
401  - { id: 10, class: sreg_128 }
402  - { id: 11, class: vgpr_32 }
403  - { id: 12, class: vgpr_32 }
404  - { id: 13, class: vgpr_32 }
405  - { id: 14, class: vgpr_32 }
406  - { id: 15, class: vgpr_32 }
407frameInfo:
408  isFrameAddressTaken: false
409  isReturnAddressTaken: false
410  hasStackMap:     false
411  hasPatchPoint:   false
412  stackSize:       0
413  offsetAdjustment: 0
414  maxAlignment:    0
415  adjustsStack:    false
416  hasCalls:        false
417  maxCallFrameSize: 0
418  hasOpaqueSPAdjustment: false
419  hasVAStart:      false
420  hasMustTailInVarArgFunc: false
421body:             |
422  bb.0 (%ir-block.0):
423    %4 = IMPLICIT_DEF
424    %5 = COPY %4.sub1
425    %6 = IMPLICIT_DEF
426    %7 = COPY %6.sub0
427    %8 = S_MOV_B32 61440
428    %9 = S_MOV_B32 -1
429    %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
430    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
431    %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
432    %13 = V_MOV_B32_e32 1, implicit $exec
433    %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $exec
434    %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $exec
435    BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
436    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
437    S_ENDPGM
438
439...
440---
441
442# CHECK-LABEL: name: add_i32_m2_one_f32_use_multi_f16_use
443# CHECK: %14:vgpr_32 = V_MOV_B32_e32 -2, implicit $exec
444# CHECK: %15:vgpr_32 = V_ADD_F16_e32 -2, %11, implicit $exec
445# CHECK: %16:vgpr_32 = V_ADD_F16_e32 -2, %12, implicit $exec
446# CHECK: %17:vgpr_32 = V_ADD_F32_e32 -2, killed %13, implicit $exec
447
448name:            add_i32_m2_one_f32_use_multi_f16_use
449alignment:       0
450exposesReturnsTwice: false
451legalized:       false
452regBankSelected: false
453selected:        false
454tracksRegLiveness: true
455registers:
456  - { id: 0, class: sreg_64 }
457  - { id: 1, class: sreg_32 }
458  - { id: 2, class: sgpr_32 }
459  - { id: 3, class: vgpr_32 }
460  - { id: 4, class: sreg_64 }
461  - { id: 5, class: sreg_32 }
462  - { id: 6, class: sreg_64 }
463  - { id: 7, class: sreg_32 }
464  - { id: 8, class: sreg_32 }
465  - { id: 9, class: sreg_32 }
466  - { id: 10, class: sreg_128 }
467  - { id: 11, class: vgpr_32 }
468  - { id: 12, class: vgpr_32 }
469  - { id: 13, class: vgpr_32 }
470  - { id: 14, class: vgpr_32 }
471  - { id: 15, class: vgpr_32 }
472  - { id: 16, class: vgpr_32 }
473  - { id: 17, class: vgpr_32 }
474frameInfo:
475  isFrameAddressTaken: false
476  isReturnAddressTaken: false
477  hasStackMap:     false
478  hasPatchPoint:   false
479  stackSize:       0
480  offsetAdjustment: 0
481  maxAlignment:    0
482  adjustsStack:    false
483  hasCalls:        false
484  maxCallFrameSize: 0
485  hasOpaqueSPAdjustment: false
486  hasVAStart:      false
487  hasMustTailInVarArgFunc: false
488body:             |
489  bb.0 (%ir-block.0):
490    %4 = IMPLICIT_DEF
491    %5 = COPY %4.sub1
492    %6 = IMPLICIT_DEF
493    %7 = COPY %6.sub0
494    %8 = S_MOV_B32 61440
495    %9 = S_MOV_B32 -1
496    %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
497    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
498    %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
499    %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
500    %14 = V_MOV_B32_e32 -2, implicit $exec
501    %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec
502    %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $exec
503    %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec
504    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
505    BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
506    BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
507    S_ENDPGM
508
509...
510---
511
512# f32 1.0 should be folded for the single f32 use as an inline
513#  constant, and not folded as a multi-use literal for the f16 cases
514
515# CHECK-LABEL: name: add_f16_1.0_multi_f32_use
516# CHECK: %13:vgpr_32 = V_MOV_B32_e32 15360, implicit $exec
517# CHECK: %14:vgpr_32 = V_ADD_F32_e32 %11, %13, implicit $exec
518# CHECK: %15:vgpr_32 = V_ADD_F32_e32 %12, %13, implicit $exec
519
520name:            add_f16_1.0_multi_f32_use
521alignment:       0
522exposesReturnsTwice: false
523legalized:       false
524regBankSelected: false
525selected:        false
526tracksRegLiveness: true
527registers:
528  - { id: 0, class: sreg_64 }
529  - { id: 1, class: sreg_32 }
530  - { id: 2, class: sgpr_32 }
531  - { id: 3, class: vgpr_32 }
532  - { id: 4, class: sreg_64 }
533  - { id: 5, class: sreg_32 }
534  - { id: 6, class: sreg_64 }
535  - { id: 7, class: sreg_32 }
536  - { id: 8, class: sreg_32 }
537  - { id: 9, class: sreg_32 }
538  - { id: 10, class: sreg_128 }
539  - { id: 11, class: vgpr_32 }
540  - { id: 12, class: vgpr_32 }
541  - { id: 13, class: vgpr_32 }
542  - { id: 14, class: vgpr_32 }
543  - { id: 15, class: vgpr_32 }
544frameInfo:
545  isFrameAddressTaken: false
546  isReturnAddressTaken: false
547  hasStackMap:     false
548  hasPatchPoint:   false
549  stackSize:       0
550  offsetAdjustment: 0
551  maxAlignment:    0
552  adjustsStack:    false
553  hasCalls:        false
554  maxCallFrameSize: 0
555  hasOpaqueSPAdjustment: false
556  hasVAStart:      false
557  hasMustTailInVarArgFunc: false
558body:             |
559  bb.0 (%ir-block.0):
560    %4 = IMPLICIT_DEF
561    %5 = COPY %4.sub1
562    %6 = IMPLICIT_DEF
563    %7 = COPY %6.sub0
564    %8 = S_MOV_B32 61440
565    %9 = S_MOV_B32 -1
566    %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
567    %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
568    %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
569    %13 = V_MOV_B32_e32 15360, implicit $exec
570    %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $exec
571    %15 = V_ADD_F32_e64 0, %12, 0, %13, 0, 0, implicit $exec
572    BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
573    BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
574    S_ENDPGM
575
576...
577---
578
579# The low 16-bits are an inline immediate, but the high bits are junk
580# FIXME: Should be able to fold this
581
582# CHECK-LABEL: name: add_f16_1.0_other_high_bits_multi_f16_use
583# CHECK: %13:vgpr_32 = V_MOV_B32_e32 80886784, implicit $exec
584# CHECK: %14:vgpr_32 = V_ADD_F16_e32 %11, %13, implicit $exec
585# CHECK: %15:vgpr_32 = V_ADD_F16_e32 %12, %13, implicit $exec
586
587name:            add_f16_1.0_other_high_bits_multi_f16_use
588alignment:       0
589exposesReturnsTwice: false
590legalized:       false
591regBankSelected: false
592selected:        false
593tracksRegLiveness: true
594registers:
595  - { id: 0, class: sreg_64 }
596  - { id: 1, class: sreg_32 }
597  - { id: 2, class: sgpr_32 }
598  - { id: 3, class: vgpr_32 }
599  - { id: 4, class: sreg_64 }
600  - { id: 5, class: sreg_32 }
601  - { id: 6, class: sreg_64 }
602  - { id: 7, class: sreg_32 }
603  - { id: 8, class: sreg_32 }
604  - { id: 9, class: sreg_32 }
605  - { id: 10, class: sreg_128 }
606  - { id: 11, class: vgpr_32 }
607  - { id: 12, class: vgpr_32 }
608  - { id: 13, class: vgpr_32 }
609  - { id: 14, class: vgpr_32 }
610  - { id: 15, class: vgpr_32 }
611frameInfo:
612  isFrameAddressTaken: false
613  isReturnAddressTaken: false
614  hasStackMap:     false
615  hasPatchPoint:   false
616  stackSize:       0
617  offsetAdjustment: 0
618  maxAlignment:    0
619  adjustsStack:    false
620  hasCalls:        false
621  maxCallFrameSize: 0
622  hasOpaqueSPAdjustment: false
623  hasVAStart:      false
624  hasMustTailInVarArgFunc: false
625body:             |
626  bb.0 (%ir-block.0):
627    %4 = IMPLICIT_DEF
628    %5 = COPY %4.sub1
629    %6 = IMPLICIT_DEF
630    %7 = COPY %6.sub0
631    %8 = S_MOV_B32 61440
632    %9 = S_MOV_B32 -1
633    %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
634    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
635    %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
636    %13 = V_MOV_B32_e32 80886784, implicit $exec
637    %14 = V_ADD_F16_e64 0, %11, 0, %13, 0, 0, implicit $exec
638    %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $exec
639    BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
640    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
641    S_ENDPGM
642
643...
644---
645
646# FIXME: Should fold inline immediate into f16 and literal use into
647# f32 instruction.
648
649# CHECK-LABEL: name: add_f16_1.0_other_high_bits_use_f16_f32
650# CHECK: %13:vgpr_32 = V_MOV_B32_e32 305413120, implicit $exec
651# CHECK: %14:vgpr_32 = V_ADD_F32_e32 %11, %13, implicit $exec
652# CHECK: %15:vgpr_32 = V_ADD_F16_e32 %12, %13, implicit $exec
653name:            add_f16_1.0_other_high_bits_use_f16_f32
654alignment:       0
655exposesReturnsTwice: false
656legalized:       false
657regBankSelected: false
658selected:        false
659tracksRegLiveness: true
660registers:
661  - { id: 0, class: sreg_64 }
662  - { id: 1, class: sreg_32 }
663  - { id: 2, class: sgpr_32 }
664  - { id: 3, class: vgpr_32 }
665  - { id: 4, class: sreg_64 }
666  - { id: 5, class: sreg_32 }
667  - { id: 6, class: sreg_64 }
668  - { id: 7, class: sreg_32 }
669  - { id: 8, class: sreg_32 }
670  - { id: 9, class: sreg_32 }
671  - { id: 10, class: sreg_128 }
672  - { id: 11, class: vgpr_32 }
673  - { id: 12, class: vgpr_32 }
674  - { id: 13, class: vgpr_32 }
675  - { id: 14, class: vgpr_32 }
676  - { id: 15, class: vgpr_32 }
677frameInfo:
678  isFrameAddressTaken: false
679  isReturnAddressTaken: false
680  hasStackMap:     false
681  hasPatchPoint:   false
682  stackSize:       0
683  offsetAdjustment: 0
684  maxAlignment:    0
685  adjustsStack:    false
686  hasCalls:        false
687  maxCallFrameSize: 0
688  hasOpaqueSPAdjustment: false
689  hasVAStart:      false
690  hasMustTailInVarArgFunc: false
691body:             |
692  bb.0 (%ir-block.0):
693    %4 = IMPLICIT_DEF
694    %5 = COPY %4.sub1
695    %6 = IMPLICIT_DEF
696    %7 = COPY %6.sub0
697    %8 = S_MOV_B32 61440
698    %9 = S_MOV_B32 -1
699    %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
700    %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
701    %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
702    %13 = V_MOV_B32_e32 305413120, implicit $exec
703    %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $exec
704    %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $exec
705    BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
706    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
707    S_ENDPGM
708
709...
710