1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=UNPACKED %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX81 %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
6
7define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, half %data) {
8  ; PACKED-LABEL: name: image_store_f16
9  ; PACKED: bb.1 (%ir-block.0):
10  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
11  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
12  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
13  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
14  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
15  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
16  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
17  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
18  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
19  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
20  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
21  ; PACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
22  ; PACKED:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
23  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
24  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
25  ; PACKED:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 2 into custom "TargetCustom8")
26  ; PACKED:   S_ENDPGM 0
27  ; UNPACKED-LABEL: name: image_store_f16
28  ; UNPACKED: bb.1 (%ir-block.0):
29  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
30  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
31  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
32  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
33  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
34  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
35  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
36  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
37  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
38  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
39  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
40  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
41  ; UNPACKED:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
42  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
43  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
44  ; UNPACKED:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 2 into custom "TargetCustom8")
45  ; UNPACKED:   S_ENDPGM 0
46  ; GFX81-LABEL: name: image_store_f16
47  ; GFX81: bb.1 (%ir-block.0):
48  ; GFX81:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
49  ; GFX81:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
50  ; GFX81:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
51  ; GFX81:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
52  ; GFX81:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
53  ; GFX81:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
54  ; GFX81:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
55  ; GFX81:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
56  ; GFX81:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
57  ; GFX81:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
58  ; GFX81:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
59  ; GFX81:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
60  ; GFX81:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
61  ; GFX81:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
62  ; GFX81:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
63  ; GFX81:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 2 into custom "TargetCustom8")
64  ; GFX81:   S_ENDPGM 0
65  ; GFX9-LABEL: name: image_store_f16
66  ; GFX9: bb.1 (%ir-block.0):
67  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
68  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
69  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
70  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
71  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
72  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
73  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
74  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
75  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
76  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
77  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
78  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
79  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
80  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
81  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
82  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 2 into custom "TargetCustom8")
83  ; GFX9:   S_ENDPGM 0
84  ; GFX10-LABEL: name: image_store_f16
85  ; GFX10: bb.1 (%ir-block.0):
86  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
87  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
88  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
89  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
90  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
91  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
92  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
93  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
94  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
95  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
96  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
97  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
98  ; GFX10:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
99  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
100  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
101  ; GFX10:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 2 into custom "TargetCustom8")
102  ; GFX10:   S_ENDPGM 0
103  call void @llvm.amdgcn.image.store.2d.f16.i32(half %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
104  ret void
105}
106
107define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x half> %in) {
108  ; PACKED-LABEL: name: image_store_v2f16
109  ; PACKED: bb.1 (%ir-block.0):
110  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
111  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
112  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
113  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
114  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
115  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
116  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
117  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
118  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
119  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
120  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
121  ; PACKED:   [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
122  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
123  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
124  ; PACKED:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 4 into custom "TargetCustom8")
125  ; PACKED:   S_ENDPGM 0
126  ; UNPACKED-LABEL: name: image_store_v2f16
127  ; UNPACKED: bb.1 (%ir-block.0):
128  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
129  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
130  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
131  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
132  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
133  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
134  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
135  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
136  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
137  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
138  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
139  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
140  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
141  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
142  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>)
143  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
144  ; UNPACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
145  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
146  ; UNPACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
147  ; UNPACKED:   [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY11]](s32), [[COPY12]](s32)
148  ; UNPACKED:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 4 into custom "TargetCustom8")
149  ; UNPACKED:   S_ENDPGM 0
150  ; GFX81-LABEL: name: image_store_v2f16
151  ; GFX81: bb.1 (%ir-block.0):
152  ; GFX81:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
153  ; GFX81:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
154  ; GFX81:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
155  ; GFX81:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
156  ; GFX81:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
157  ; GFX81:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
158  ; GFX81:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
159  ; GFX81:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
160  ; GFX81:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
161  ; GFX81:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
162  ; GFX81:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
163  ; GFX81:   [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
164  ; GFX81:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
165  ; GFX81:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
166  ; GFX81:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>)
167  ; GFX81:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
168  ; GFX81:   [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[DEF]](s32)
169  ; GFX81:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 4 into custom "TargetCustom8")
170  ; GFX81:   S_ENDPGM 0
171  ; GFX9-LABEL: name: image_store_v2f16
172  ; GFX9: bb.1 (%ir-block.0):
173  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
174  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
175  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
176  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
177  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
178  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
179  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
180  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
181  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
182  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
183  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
184  ; GFX9:   [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
185  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
186  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
187  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 4 into custom "TargetCustom8")
188  ; GFX9:   S_ENDPGM 0
189  ; GFX10-LABEL: name: image_store_v2f16
190  ; GFX10: bb.1 (%ir-block.0):
191  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
192  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
193  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
194  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
195  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
196  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
197  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
198  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
199  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
200  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
201  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
202  ; GFX10:   [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
203  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
204  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
205  ; GFX10:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 4 into custom "TargetCustom8")
206  ; GFX10:   S_ENDPGM 0
207  call void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half> %in, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
208  ret void
209}
210
211define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <3 x half> %in) {
212  ; PACKED-LABEL: name: image_store_v3f16
213  ; PACKED: bb.1 (%ir-block.0):
214  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
215  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
216  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
217  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
218  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
219  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
220  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
221  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
222  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
223  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
224  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
225  ; PACKED:   [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
226  ; PACKED:   [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
227  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
228  ; PACKED:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
229  ; PACKED:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>)
230  ; PACKED:   [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>)
231  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96)
232  ; PACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
233  ; PACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
234  ; PACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
235  ; PACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
236  ; PACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
237  ; PACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
238  ; PACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
239  ; PACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
240  ; PACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
241  ; PACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
242  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
243  ; PACKED:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
244  ; PACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C1]]
245  ; PACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
246  ; PACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
247  ; PACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
248  ; PACKED:   [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
249  ; PACKED:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[DEF]](<2 x s16>)
250  ; PACKED:   [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
251  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
252  ; PACKED:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[UV3]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 6 into custom "TargetCustom8", align 8)
253  ; PACKED:   S_ENDPGM 0
254  ; UNPACKED-LABEL: name: image_store_v3f16
255  ; UNPACKED: bb.1 (%ir-block.0):
256  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
257  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
258  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
259  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
260  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
261  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
262  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
263  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
264  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
265  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
266  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
267  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
268  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
269  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
270  ; UNPACKED:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
271  ; UNPACKED:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>)
272  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>)
273  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96)
274  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
275  ; UNPACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
276  ; UNPACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
277  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
278  ; UNPACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
279  ; UNPACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
280  ; UNPACKED:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
281  ; UNPACKED:   [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32)
282  ; UNPACKED:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 6 into custom "TargetCustom8", align 8)
283  ; UNPACKED:   S_ENDPGM 0
284  ; GFX81-LABEL: name: image_store_v3f16
285  ; GFX81: bb.1 (%ir-block.0):
286  ; GFX81:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
287  ; GFX81:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
288  ; GFX81:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
289  ; GFX81:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
290  ; GFX81:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
291  ; GFX81:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
292  ; GFX81:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
293  ; GFX81:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
294  ; GFX81:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
295  ; GFX81:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
296  ; GFX81:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
297  ; GFX81:   [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
298  ; GFX81:   [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
299  ; GFX81:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
300  ; GFX81:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
301  ; GFX81:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>)
302  ; GFX81:   [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>)
303  ; GFX81:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96)
304  ; GFX81:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
305  ; GFX81:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
306  ; GFX81:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
307  ; GFX81:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
308  ; GFX81:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
309  ; GFX81:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
310  ; GFX81:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
311  ; GFX81:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
312  ; GFX81:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
313  ; GFX81:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
314  ; GFX81:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
315  ; GFX81:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
316  ; GFX81:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
317  ; GFX81:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C1]]
318  ; GFX81:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
319  ; GFX81:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
320  ; GFX81:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
321  ; GFX81:   [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
322  ; GFX81:   [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
323  ; GFX81:   [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
324  ; GFX81:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
325  ; GFX81:   [[BITCAST4:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>)
326  ; GFX81:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST4]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 6 into custom "TargetCustom8", align 8)
327  ; GFX81:   S_ENDPGM 0
328  ; GFX9-LABEL: name: image_store_v3f16
329  ; GFX9: bb.1 (%ir-block.0):
330  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
331  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
332  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
333  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
334  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
335  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
336  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
337  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
338  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
339  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
340  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
341  ; GFX9:   [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
342  ; GFX9:   [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
343  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
344  ; GFX9:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
345  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>)
346  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>)
347  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96)
348  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
349  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
350  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
351  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
352  ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
353  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
354  ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
355  ; GFX9:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
356  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF1]](s32)
357  ; GFX9:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF]](<2 x s16>)
358  ; GFX9:   [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
359  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
360  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[UV3]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 6 into custom "TargetCustom8", align 8)
361  ; GFX9:   S_ENDPGM 0
362  ; GFX10-LABEL: name: image_store_v3f16
363  ; GFX10: bb.1 (%ir-block.0):
364  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
365  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
366  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
367  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
368  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
369  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
370  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
371  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
372  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
373  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
374  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
375  ; GFX10:   [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
376  ; GFX10:   [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
377  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
378  ; GFX10:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
379  ; GFX10:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>)
380  ; GFX10:   [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>)
381  ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96)
382  ; GFX10:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
383  ; GFX10:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
384  ; GFX10:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
385  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
386  ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
387  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
388  ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
389  ; GFX10:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
390  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF1]](s32)
391  ; GFX10:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF]](<2 x s16>)
392  ; GFX10:   [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
393  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
394  ; GFX10:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[UV3]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 6 into custom "TargetCustom8", align 8)
395  ; GFX10:   S_ENDPGM 0
396  call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
397  ret void
398}
399
400define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <4 x half> %in) {
401  ; PACKED-LABEL: name: image_store_v4f16
402  ; PACKED: bb.1 (%ir-block.0):
403  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
404  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
405  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
406  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
407  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
408  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
409  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
410  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
411  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
412  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
413  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
414  ; PACKED:   [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
415  ; PACKED:   [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
416  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
417  ; PACKED:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>)
418  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
419  ; PACKED:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 8 into custom "TargetCustom8")
420  ; PACKED:   S_ENDPGM 0
421  ; UNPACKED-LABEL: name: image_store_v4f16
422  ; UNPACKED: bb.1 (%ir-block.0):
423  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
424  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
425  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
426  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
427  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
428  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
429  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
430  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
431  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
432  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
433  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
434  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
435  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
436  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
437  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
438  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>)
439  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
440  ; UNPACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
441  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>)
442  ; UNPACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
443  ; UNPACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
444  ; UNPACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
445  ; UNPACKED:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
446  ; UNPACKED:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
447  ; UNPACKED:   [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32)
448  ; UNPACKED:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 8 into custom "TargetCustom8")
449  ; UNPACKED:   S_ENDPGM 0
450  ; GFX81-LABEL: name: image_store_v4f16
451  ; GFX81: bb.1 (%ir-block.0):
452  ; GFX81:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
453  ; GFX81:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
454  ; GFX81:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
455  ; GFX81:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
456  ; GFX81:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
457  ; GFX81:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
458  ; GFX81:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
459  ; GFX81:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
460  ; GFX81:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
461  ; GFX81:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
462  ; GFX81:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
463  ; GFX81:   [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
464  ; GFX81:   [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
465  ; GFX81:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
466  ; GFX81:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>)
467  ; GFX81:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
468  ; GFX81:   [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s16>)
469  ; GFX81:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>)
470  ; GFX81:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
471  ; GFX81:   [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[DEF]](s32), [[DEF]](s32)
472  ; GFX81:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 8 into custom "TargetCustom8")
473  ; GFX81:   S_ENDPGM 0
474  ; GFX9-LABEL: name: image_store_v4f16
475  ; GFX9: bb.1 (%ir-block.0):
476  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
477  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
478  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
479  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
480  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
481  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
482  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
483  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
484  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
485  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
486  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
487  ; GFX9:   [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
488  ; GFX9:   [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
489  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
490  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>)
491  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
492  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 8 into custom "TargetCustom8")
493  ; GFX9:   S_ENDPGM 0
494  ; GFX10-LABEL: name: image_store_v4f16
495  ; GFX10: bb.1 (%ir-block.0):
496  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
497  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
498  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
499  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
500  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
501  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
502  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
503  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
504  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
505  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
506  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
507  ; GFX10:   [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
508  ; GFX10:   [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
509  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
510  ; GFX10:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>)
511  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
512  ; GFX10:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 8 into custom "TargetCustom8")
513  ; GFX10:   S_ENDPGM 0
514  call void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half> %in, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
515  ret void
516}
517
518declare void @llvm.amdgcn.image.store.2d.f16.i32(half, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
519declare void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
520declare void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
521declare void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
522
523attributes #0 = { nounwind writeonly }
524