1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10NSA %s
4
5define amdgpu_ps float @atomic_swap_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
6  ; GFX9-LABEL: name: atomic_swap_1d
7  ; GFX9: bb.1.main_body:
8  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
9  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
10  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
11  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
12  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
13  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
14  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
15  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
16  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
17  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
18  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
19  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
20  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
21  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
22  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
23  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
24  ; GFX10NSA-LABEL: name: atomic_swap_1d
25  ; GFX10NSA: bb.1.main_body:
26  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
27  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
28  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
29  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
30  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
31  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
32  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
33  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
34  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
35  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
36  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
37  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
38  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
39  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
40  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
41  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
42main_body:
43  %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
44  %out = bitcast i32 %v to float
45  ret float %out
46}
47
48define amdgpu_ps float @atomic_add_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
49  ; GFX9-LABEL: name: atomic_add_1d
50  ; GFX9: bb.1.main_body:
51  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
52  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
53  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
54  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
55  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
56  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
57  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
58  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
59  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
60  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
61  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
62  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
63  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
64  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
65  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
66  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
67  ; GFX10NSA-LABEL: name: atomic_add_1d
68  ; GFX10NSA: bb.1.main_body:
69  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
70  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
71  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
72  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
73  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
74  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
75  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
76  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
77  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
78  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
79  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
80  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
81  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
82  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
83  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
84  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
85main_body:
86  %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
87  %out = bitcast i32 %v to float
88  ret float %out
89}
90
91define amdgpu_ps float @atomic_sub_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
92  ; GFX9-LABEL: name: atomic_sub_1d
93  ; GFX9: bb.1.main_body:
94  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
95  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
96  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
97  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
98  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
99  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
100  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
101  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
102  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
103  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
104  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
105  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
106  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
107  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
108  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
109  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
110  ; GFX10NSA-LABEL: name: atomic_sub_1d
111  ; GFX10NSA: bb.1.main_body:
112  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
113  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
114  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
115  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
116  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
117  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
118  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
119  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
120  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
121  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
122  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
123  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
124  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
125  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
126  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
127  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
128main_body:
129  %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
130  %out = bitcast i32 %v to float
131  ret float %out
132}
133
134define amdgpu_ps float @atomic_smin_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
135  ; GFX9-LABEL: name: atomic_smin_1d
136  ; GFX9: bb.1.main_body:
137  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
138  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
139  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
140  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
141  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
142  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
143  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
144  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
145  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
146  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
147  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
148  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
149  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
150  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
151  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
152  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
153  ; GFX10NSA-LABEL: name: atomic_smin_1d
154  ; GFX10NSA: bb.1.main_body:
155  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
156  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
157  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
158  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
159  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
160  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
161  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
162  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
163  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
164  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
165  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
166  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
167  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
168  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
169  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
170  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
171main_body:
172  %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
173  %out = bitcast i32 %v to float
174  ret float %out
175}
176
177
178define amdgpu_ps float @atomic_umin_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
179  ; GFX9-LABEL: name: atomic_umin_1d
180  ; GFX9: bb.1.main_body:
181  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
182  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
183  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
184  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
185  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
186  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
187  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
188  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
189  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
190  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
191  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
192  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
193  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
194  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
195  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
196  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
197  ; GFX10NSA-LABEL: name: atomic_umin_1d
198  ; GFX10NSA: bb.1.main_body:
199  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
200  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
201  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
202  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
203  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
204  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
205  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
206  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
207  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
208  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
209  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
210  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
211  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
212  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
213  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
214  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
215main_body:
216  %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
217  %out = bitcast i32 %v to float
218  ret float %out
219}
220
221define amdgpu_ps float @atomic_smax_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
222  ; GFX9-LABEL: name: atomic_smax_1d
223  ; GFX9: bb.1.main_body:
224  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
225  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
226  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
227  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
228  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
229  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
230  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
231  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
232  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
233  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
234  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
235  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
236  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
237  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
238  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
239  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
240  ; GFX10NSA-LABEL: name: atomic_smax_1d
241  ; GFX10NSA: bb.1.main_body:
242  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
243  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
244  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
245  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
246  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
247  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
248  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
249  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
250  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
251  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
252  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
253  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
254  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
255  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
256  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
257  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
258main_body:
259  %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
260  %out = bitcast i32 %v to float
261  ret float %out
262}
263
264define amdgpu_ps float @atomic_umax_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
265  ; GFX9-LABEL: name: atomic_umax_1d
266  ; GFX9: bb.1.main_body:
267  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
268  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
269  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
270  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
271  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
272  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
273  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
274  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
275  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
276  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
277  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
278  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
279  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
280  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
281  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
282  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
283  ; GFX10NSA-LABEL: name: atomic_umax_1d
284  ; GFX10NSA: bb.1.main_body:
285  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
286  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
287  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
288  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
289  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
290  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
291  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
292  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
293  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
294  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
295  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
296  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
297  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
298  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
299  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
300  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
301main_body:
302  %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
303  %out = bitcast i32 %v to float
304  ret float %out
305}
306
307define amdgpu_ps float @atomic_and_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
308  ; GFX9-LABEL: name: atomic_and_1d
309  ; GFX9: bb.1.main_body:
310  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
311  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
312  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
313  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
314  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
315  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
316  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
317  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
318  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
319  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
320  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
321  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
322  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
323  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
324  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
325  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
326  ; GFX10NSA-LABEL: name: atomic_and_1d
327  ; GFX10NSA: bb.1.main_body:
328  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
329  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
330  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
331  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
332  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
333  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
334  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
335  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
336  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
337  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
338  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
339  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
340  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
341  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
342  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
343  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
344main_body:
345  %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
346  %out = bitcast i32 %v to float
347  ret float %out
348}
349
350define amdgpu_ps float @atomic_or_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
351  ; GFX9-LABEL: name: atomic_or_1d
352  ; GFX9: bb.1.main_body:
353  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
354  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
355  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
356  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
357  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
358  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
359  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
360  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
361  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
362  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
363  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
364  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
365  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
366  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
367  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
368  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
369  ; GFX10NSA-LABEL: name: atomic_or_1d
370  ; GFX10NSA: bb.1.main_body:
371  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
372  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
373  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
374  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
375  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
376  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
377  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
378  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
379  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
380  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
381  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
382  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
383  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
384  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
385  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
386  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
387main_body:
388  %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
389  %out = bitcast i32 %v to float
390  ret float %out
391}
392
393define amdgpu_ps float @atomic_xor_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
394  ; GFX9-LABEL: name: atomic_xor_1d
395  ; GFX9: bb.1.main_body:
396  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
397  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
398  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
399  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
400  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
401  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
402  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
403  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
404  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
405  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
406  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
407  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
408  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
409  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
410  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
411  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
412  ; GFX10NSA-LABEL: name: atomic_xor_1d
413  ; GFX10NSA: bb.1.main_body:
414  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
415  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
416  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
417  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
418  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
419  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
420  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
421  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
422  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
423  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
424  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
425  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
426  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
427  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
428  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
429  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
430main_body:
431  %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
432  %out = bitcast i32 %v to float
433  ret float %out
434}
435
436define amdgpu_ps float @atomic_inc_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
437  ; GFX9-LABEL: name: atomic_inc_1d
438  ; GFX9: bb.1.main_body:
439  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
440  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
441  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
442  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
443  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
444  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
445  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
446  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
447  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
448  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
449  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
450  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
451  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
452  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
453  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
454  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
455  ; GFX10NSA-LABEL: name: atomic_inc_1d
456  ; GFX10NSA: bb.1.main_body:
457  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
458  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
459  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
460  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
461  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
462  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
463  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
464  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
465  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
466  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
467  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
468  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
469  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
470  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
471  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
472  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
473main_body:
474  %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
475  %out = bitcast i32 %v to float
476  ret float %out
477}
478
479define amdgpu_ps float @atomic_dec_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
480  ; GFX9-LABEL: name: atomic_dec_1d
481  ; GFX9: bb.1.main_body:
482  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
483  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
484  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
485  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
486  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
487  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
488  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
489  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
490  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
491  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
492  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
493  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
494  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
495  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
496  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
497  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
498  ; GFX10NSA-LABEL: name: atomic_dec_1d
499  ; GFX10NSA: bb.1.main_body:
500  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
501  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
502  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
503  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
504  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
505  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
506  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
507  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
508  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
509  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
510  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
511  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
512  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
513  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
514  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
515  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
516main_body:
517  %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
518  %out = bitcast i32 %v to float
519  ret float %out
520}
521
522define amdgpu_ps float @atomic_cmpswap_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s) {
523  ; GFX9-LABEL: name: atomic_cmpswap_1d
524  ; GFX9: bb.1.main_body:
525  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
526  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
527  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
528  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
529  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
530  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
531  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
532  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
533  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
534  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
535  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
536  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
537  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
538  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
539  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
540  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
541  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
542  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
543  ; GFX10NSA-LABEL: name: atomic_cmpswap_1d
544  ; GFX10NSA: bb.1.main_body:
545  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
546  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
547  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
548  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
549  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
550  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
551  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
552  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
553  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
554  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
555  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
556  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
557  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
558  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
559  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
560  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
561  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
562  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
563main_body:
564  %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32 %cmp, i32 %swap, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
565  %out = bitcast i32 %v to float
566  ret float %out
567}
568
569define amdgpu_ps float @atomic_add_2d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t) {
570  ; GFX9-LABEL: name: atomic_add_2d
571  ; GFX9: bb.1.main_body:
572  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
573  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
574  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
575  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
576  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
577  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
578  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
579  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
580  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
581  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
582  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
583  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
584  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
585  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32)
586  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
587  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
588  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
589  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
590  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
591  ; GFX10NSA-LABEL: name: atomic_add_2d
592  ; GFX10NSA: bb.1.main_body:
593  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
594  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
595  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
596  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
597  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
598  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
599  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
600  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
601  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
602  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
603  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
604  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
605  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
606  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32)
607  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
608  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
609  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
610  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
611  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
612main_body:
613  %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32 %data, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
614  %out = bitcast i32 %v to float
615  ret float %out
616}
617
618define amdgpu_ps float @atomic_add_3d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %r) {
619  ; GFX9-LABEL: name: atomic_add_3d
620  ; GFX9: bb.1.main_body:
621  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
622  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
623  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
624  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
625  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
626  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
627  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
628  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
629  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
630  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
631  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
632  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
633  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
634  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
635  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32)
636  ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
637  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
638  ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32)
639  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
640  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
641  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
642  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
643  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
644  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
645  ; GFX10NSA-LABEL: name: atomic_add_3d
646  ; GFX10NSA: bb.1.main_body:
647  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
648  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
649  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
650  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
651  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
652  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
653  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
654  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
655  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
656  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
657  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
658  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
659  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
660  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
661  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32)
662  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
663  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
664  ; GFX10NSA:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32)
665  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
666  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
667  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
668  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
669  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
670  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
671main_body:
672  %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32 %data, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
673  %out = bitcast i32 %v to float
674  ret float %out
675}
676
677define amdgpu_ps float @atomic_add_cube(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %face) {
678  ; GFX9-LABEL: name: atomic_add_cube
679  ; GFX9: bb.1.main_body:
680  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
681  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
682  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
683  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
684  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
685  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
686  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
687  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
688  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
689  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
690  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
691  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
692  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
693  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
694  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32)
695  ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
696  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
697  ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32)
698  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
699  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
700  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
701  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
702  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
703  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
704  ; GFX10NSA-LABEL: name: atomic_add_cube
705  ; GFX10NSA: bb.1.main_body:
706  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
707  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
708  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
709  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
710  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
711  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
712  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
713  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
714  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
715  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
716  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
717  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
718  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
719  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
720  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32)
721  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
722  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
723  ; GFX10NSA:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32)
724  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
725  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
726  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
727  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
728  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
729  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
730main_body:
731  %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32 %data, i16 %s, i16 %t, i16 %face, <8 x i32> %rsrc, i32 0, i32 0)
732  %out = bitcast i32 %v to float
733  ret float %out
734}
735
736define amdgpu_ps float @atomic_add_1darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %slice) {
737  ; GFX9-LABEL: name: atomic_add_1darray
738  ; GFX9: bb.1.main_body:
739  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
740  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
741  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
742  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
743  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
744  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
745  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
746  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
747  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
748  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
749  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
750  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
751  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
752  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32)
753  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
754  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
755  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
756  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
757  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
758  ; GFX10NSA-LABEL: name: atomic_add_1darray
759  ; GFX10NSA: bb.1.main_body:
760  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
761  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
762  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
763  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
764  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
765  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
766  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
767  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
768  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
769  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
770  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
771  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
772  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
773  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32)
774  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
775  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
776  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
777  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
778  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
779main_body:
780  %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32 %data, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
781  %out = bitcast i32 %v to float
782  ret float %out
783}
784
785define amdgpu_ps float @atomic_add_2darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice) {
786  ; GFX9-LABEL: name: atomic_add_2darray
787  ; GFX9: bb.1.main_body:
788  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
789  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
790  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
791  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
792  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
793  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
794  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
795  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
796  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
797  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
798  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
799  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
800  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
801  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
802  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32)
803  ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
804  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
805  ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32)
806  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
807  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
808  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
809  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
810  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
811  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
812  ; GFX10NSA-LABEL: name: atomic_add_2darray
813  ; GFX10NSA: bb.1.main_body:
814  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
815  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
816  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
817  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
818  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
819  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
820  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
821  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
822  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
823  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
824  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
825  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
826  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
827  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
828  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32)
829  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
830  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
831  ; GFX10NSA:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32)
832  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
833  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
834  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
835  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
836  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
837  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
838main_body:
839  %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
840  %out = bitcast i32 %v to float
841  ret float %out
842}
843
844define amdgpu_ps float @atomic_add_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %fragid) {
845  ; GFX9-LABEL: name: atomic_add_2dmsaa
846  ; GFX9: bb.1.main_body:
847  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
848  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
849  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
850  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
851  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
852  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
853  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
854  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
855  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
856  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
857  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
858  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
859  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
860  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
861  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32)
862  ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
863  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
864  ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32)
865  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
866  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
867  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
868  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
869  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
870  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
871  ; GFX10NSA-LABEL: name: atomic_add_2dmsaa
872  ; GFX10NSA: bb.1.main_body:
873  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
874  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
875  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
876  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
877  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
878  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
879  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
880  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
881  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
882  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
883  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
884  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
885  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
886  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
887  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32)
888  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
889  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
890  ; GFX10NSA:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32)
891  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
892  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
893  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
894  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
895  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
896  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
897main_body:
898  %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
899  %out = bitcast i32 %v to float
900  ret float %out
901}
902
903define amdgpu_ps float @atomic_add_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
904  ; GFX9-LABEL: name: atomic_add_2darraymsaa
905  ; GFX9: bb.1.main_body:
906  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
907  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
908  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
909  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
910  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
911  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
912  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
913  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
914  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
915  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
916  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
917  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
918  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
919  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
920  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
921  ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32)
922  ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
923  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
924  ; GFX9:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32)
925  ; GFX9:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
926  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
927  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
928  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
929  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
930  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
931  ; GFX10NSA-LABEL: name: atomic_add_2darraymsaa
932  ; GFX10NSA: bb.1.main_body:
933  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
934  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
935  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
936  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
937  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
938  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
939  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
940  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
941  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
942  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
943  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
944  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
945  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
946  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
947  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
948  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32)
949  ; GFX10NSA:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
950  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
951  ; GFX10NSA:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32)
952  ; GFX10NSA:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
953  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
954  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
955  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
956  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
957  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
958main_body:
959  %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
960  %out = bitcast i32 %v to float
961  ret float %out
962}
963
964define amdgpu_ps float @atomic_add_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
965  ; GFX9-LABEL: name: atomic_add_1d_slc
966  ; GFX9: bb.1.main_body:
967  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
968  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
969  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
970  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
971  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
972  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
973  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
974  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
975  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
976  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
977  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
978  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
979  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
980  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
981  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
982  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
983  ; GFX10NSA-LABEL: name: atomic_add_1d_slc
984  ; GFX10NSA: bb.1.main_body:
985  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
986  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
987  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
988  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
989  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
990  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
991  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
992  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
993  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
994  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
995  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
996  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
997  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
998  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
999  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1000  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
1001main_body:
1002  %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 2)
1003  %out = bitcast i32 %v to float
1004  ret float %out
1005}
1006
1007define amdgpu_ps float @atomic_cmpswap_2d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s, i16 %t) {
1008  ; GFX9-LABEL: name: atomic_cmpswap_2d
1009  ; GFX9: bb.1.main_body:
1010  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1011  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1012  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1013  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1014  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1015  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1016  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1017  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1018  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1019  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1020  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1021  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1022  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1023  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1024  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1025  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
1026  ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32)
1027  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
1028  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
1029  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1030  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
1031  ; GFX10NSA-LABEL: name: atomic_cmpswap_2d
1032  ; GFX10NSA: bb.1.main_body:
1033  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1034  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1035  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1036  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1037  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1038  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1039  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1040  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1041  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1042  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1043  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1044  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1045  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1046  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1047  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1048  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
1049  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32)
1050  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
1051  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
1052  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1053  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
1054main_body:
1055  %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.2d.i32.i16(i32 %cmp, i32 %swap, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
1056  %out = bitcast i32 %v to float
1057  ret float %out
1058}
1059
1060define amdgpu_ps float @atomic_cmpswap_3d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %r) {
1061  ; GFX9-LABEL: name: atomic_cmpswap_3d
1062  ; GFX9: bb.1.main_body:
1063  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1064  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1065  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1066  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1067  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1068  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1069  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1070  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1071  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1072  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1073  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1074  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1075  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1076  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
1077  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1078  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1079  ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
1080  ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32)
1081  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
1082  ; GFX9:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
1083  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1084  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32)
1085  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1086  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
1087  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1088  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
1089  ; GFX10NSA-LABEL: name: atomic_cmpswap_3d
1090  ; GFX10NSA: bb.1.main_body:
1091  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1092  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1093  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1094  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1095  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1096  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1097  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1098  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1099  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1100  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1101  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1102  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1103  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1104  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
1105  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1106  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1107  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
1108  ; GFX10NSA:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32)
1109  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
1110  ; GFX10NSA:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
1111  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1112  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32)
1113  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1114  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
1115  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1116  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
1117main_body:
1118  %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.3d.i32.i16(i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
1119  %out = bitcast i32 %v to float
1120  ret float %out
1121}
1122
1123define amdgpu_ps float @atomic_cmpswap_2darraymsaa(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
1124  ; GFX9-LABEL: name: atomic_cmpswap_2darraymsaa
1125  ; GFX9: bb.1.main_body:
1126  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1127  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1128  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1129  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1130  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1131  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1132  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1133  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1134  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1135  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1136  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1137  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1138  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1139  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
1140  ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
1141  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1142  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1143  ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
1144  ; GFX9:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32)
1145  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
1146  ; GFX9:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
1147  ; GFX9:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
1148  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
1149  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1150  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
1151  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1152  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
1153  ; GFX10NSA-LABEL: name: atomic_cmpswap_2darraymsaa
1154  ; GFX10NSA: bb.1.main_body:
1155  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1156  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1157  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1158  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1159  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1160  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1161  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1162  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1163  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1164  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1165  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1166  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1167  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1168  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
1169  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
1170  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1171  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1172  ; GFX10NSA:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
1173  ; GFX10NSA:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32)
1174  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
1175  ; GFX10NSA:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
1176  ; GFX10NSA:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
1177  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
1178  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1179  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
1180  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1181  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
1182main_body:
1183  %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.2darraymsaa.i32.i16(i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1184  %out = bitcast i32 %v to float
1185  ret float %out
1186}
1187
1188declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1189declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1190declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1191declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1192declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1193declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1194declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1195declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1196declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1197declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1198declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1199declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1200declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32, i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1201declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1202declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1203declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1204declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1205declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1206declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1207declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1208declare i32 @llvm.amdgcn.image.atomic.cmpswap.2d.i32.i16(i32, i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1209declare i32 @llvm.amdgcn.image.atomic.cmpswap.3d.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1210declare i32 @llvm.amdgcn.image.atomic.cmpswap.cube.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1211declare i32 @llvm.amdgcn.image.atomic.cmpswap.1darray.i32.i16(i32, i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1212declare i32 @llvm.amdgcn.image.atomic.cmpswap.2darray.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1213declare i32 @llvm.amdgcn.image.atomic.cmpswap.2dmsaa.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1214declare i32 @llvm.amdgcn.image.atomic.cmpswap.2darraymsaa.i32.i16(i32, i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1215
1216attributes #0 = { nounwind }
1217