1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=UNPACKED %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=PACKED %s
4
5define amdgpu_ps half @image_load_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
6  ; UNPACKED-LABEL: name: image_load_f16
7  ; UNPACKED: bb.1 (%ir-block.0):
8  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
9  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
10  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
11  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
12  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
13  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
14  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
15  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
16  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
17  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
18  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
19  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
20  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
21  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
22  ; UNPACKED:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD]](s16)
23  ; UNPACKED:   $vgpr0 = COPY [[ANYEXT]](s32)
24  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
25  ; PACKED-LABEL: name: image_load_f16
26  ; PACKED: bb.1 (%ir-block.0):
27  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
28  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
29  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
30  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
31  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
32  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
33  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
34  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
35  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
36  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
37  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
38  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
39  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
40  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
41  ; PACKED:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD]](s16)
42  ; PACKED:   $vgpr0 = COPY [[ANYEXT]](s32)
43  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
44  %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
45  ret half %tex
46}
47
48define amdgpu_ps <2 x half> @image_load_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
49  ; UNPACKED-LABEL: name: image_load_v2f16
50  ; UNPACKED: bb.1 (%ir-block.0):
51  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
52  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
53  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
54  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
55  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
56  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
57  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
58  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
59  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
60  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
61  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
62  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
63  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
64  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
65  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
66  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
67  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
68  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
69  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
70  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
71  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
72  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
73  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
74  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
75  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
76  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
77  ; PACKED-LABEL: name: image_load_v2f16
78  ; PACKED: bb.1 (%ir-block.0):
79  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
80  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
81  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
82  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
83  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
84  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
85  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
86  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
87  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
88  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
89  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
90  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
91  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
92  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
93  ; PACKED:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
94  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
95  %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
96  ret <2 x half> %tex
97}
98
99define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
100  ; UNPACKED-LABEL: name: image_load_v3f16
101  ; UNPACKED: bb.1 (%ir-block.0):
102  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
103  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
104  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
105  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
106  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
107  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
108  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
109  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
110  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
111  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
112  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
113  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
114  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
115  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
116  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
117  ; UNPACKED:   [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
118  ; UNPACKED:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
119  ; UNPACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
120  ; UNPACKED:   [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
121  ; UNPACKED:   [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
122  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
123  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
124  ; UNPACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
125  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
126  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
127  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
128  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
129  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
130  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
131  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
132  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
133  ; UNPACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
134  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
135  ; UNPACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
136  ; UNPACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
137  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
138  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
139  ; UNPACKED:   [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
140  ; UNPACKED:   $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
141  ; UNPACKED:   $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
142  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
143  ; PACKED-LABEL: name: image_load_v3f16
144  ; PACKED: bb.1 (%ir-block.0):
145  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
146  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
147  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
148  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
149  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
150  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
151  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
152  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
153  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
154  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
155  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
156  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
157  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
158  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
159  ; PACKED:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>)
160  ; PACKED:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
161  ; PACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
162  ; PACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
163  ; PACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
164  ; PACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
165  ; PACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
166  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
167  ; PACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
168  ; PACKED:   [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
169  ; PACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
170  ; PACKED:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
171  ; PACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
172  ; PACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
173  ; PACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
174  ; PACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
175  ; PACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
176  ; PACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
177  ; PACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
178  ; PACKED:   [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
179  ; PACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
180  ; PACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
181  ; PACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
182  ; PACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
183  ; PACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
184  ; PACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
185  ; PACKED:   [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
186  ; PACKED:   $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
187  ; PACKED:   $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
188  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
189  %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
190  ret <3 x half> %tex
191}
192
193define amdgpu_ps <4 x half> @image_load_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
194  ; UNPACKED-LABEL: name: image_load_v4f16
195  ; UNPACKED: bb.1 (%ir-block.0):
196  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
197  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
198  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
199  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
200  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
201  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
202  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
203  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
204  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
205  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
206  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
207  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
208  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
209  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
210  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
211  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
212  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
213  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
214  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
215  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
216  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
217  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
218  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
219  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
220  ; UNPACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
221  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]]
222  ; UNPACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
223  ; UNPACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]]
224  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
225  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
226  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
227  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
228  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
229  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
230  ; PACKED-LABEL: name: image_load_v4f16
231  ; PACKED: bb.1 (%ir-block.0):
232  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
233  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
234  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
235  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
236  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
237  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
238  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
239  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
240  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
241  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
242  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
243  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
244  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
245  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
246  ; PACKED:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>)
247  ; PACKED:   $vgpr0 = COPY [[UV]](<2 x s16>)
248  ; PACKED:   $vgpr1 = COPY [[UV1]](<2 x s16>)
249  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
250  %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
251  ret <4 x half> %tex
252}
253
254define amdgpu_ps half @image_load_tfe_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
255  ; UNPACKED-LABEL: name: image_load_tfe_f16
256  ; UNPACKED: bb.1 (%ir-block.0):
257  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
258  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
259  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
260  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
261  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
262  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
263  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
264  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
265  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
266  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
267  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
268  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
269  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
270  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
271  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
272  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
273  ; UNPACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
274  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
275  ; UNPACKED:   $vgpr0 = COPY [[COPY10]](s32)
276  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
277  ; PACKED-LABEL: name: image_load_tfe_f16
278  ; PACKED: bb.1 (%ir-block.0):
279  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
280  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
281  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
282  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
283  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
284  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
285  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
286  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
287  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
288  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
289  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
290  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
291  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
292  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
293  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
294  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
295  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
296  ; PACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
297  ; PACKED:   $vgpr0 = COPY [[COPY10]](s32)
298  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
299  %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
300  %tex = extractvalue { half, i32 } %res, 0
301  %tfe = extractvalue { half, i32 } %res, 1
302  store i32 %tfe, i32 addrspace(1)* undef
303  ret half %tex
304}
305
306define amdgpu_ps <2 x half> @image_load_tfe_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
307  ; UNPACKED-LABEL: name: image_load_tfe_v2f16
308  ; UNPACKED: bb.1 (%ir-block.0):
309  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
310  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
311  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
312  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
313  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
314  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
315  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
316  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
317  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
318  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
319  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
320  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
321  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
322  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
323  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
324  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
325  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
326  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
327  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
328  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
329  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
330  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
331  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
332  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
333  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
334  ; UNPACKED:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
335  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
336  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
337  ; PACKED-LABEL: name: image_load_tfe_v2f16
338  ; PACKED: bb.1 (%ir-block.0):
339  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
340  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
341  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
342  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
343  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
344  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
345  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
346  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
347  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
348  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
349  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
350  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
351  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
352  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
353  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
354  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
355  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
356  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
357  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
358  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
359  %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
360  %tex = extractvalue { <2 x half>, i32 } %res, 0
361  %tfe = extractvalue { <2 x half>, i32 } %res, 1
362  store i32 %tfe, i32 addrspace(1)* undef
363  ret <2 x half> %tex
364}
365
366define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
367  ; UNPACKED-LABEL: name: image_load_tfe_v3f16
368  ; UNPACKED: bb.1 (%ir-block.0):
369  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
370  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
371  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
372  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
373  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
374  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
375  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
376  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
377  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
378  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
379  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
380  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
381  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
382  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
383  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
384  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
385  ; UNPACKED:   [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
386  ; UNPACKED:   [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
387  ; UNPACKED:   G_STORE [[UV3]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
388  ; UNPACKED:   [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
389  ; UNPACKED:   [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
390  ; UNPACKED:   [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<4 x s16>)
391  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
392  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
393  ; UNPACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
394  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
395  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
396  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
397  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
398  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
399  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
400  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
401  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
402  ; UNPACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
403  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
404  ; UNPACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
405  ; UNPACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
406  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
407  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
408  ; UNPACKED:   [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
409  ; UNPACKED:   $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
410  ; UNPACKED:   $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
411  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
412  ; PACKED-LABEL: name: image_load_tfe_v3f16
413  ; PACKED: bb.1 (%ir-block.0):
414  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
415  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
416  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
417  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
418  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
419  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
420  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
421  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
422  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
423  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
424  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
425  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
426  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
427  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
428  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
429  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
430  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
431  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32)
432  ; PACKED:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
433  ; PACKED:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
434  ; PACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
435  ; PACKED:   [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
436  ; PACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>)
437  ; PACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
438  ; PACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
439  ; PACKED:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>)
440  ; PACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
441  ; PACKED:   [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
442  ; PACKED:   [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
443  ; PACKED:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
444  ; PACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
445  ; PACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
446  ; PACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
447  ; PACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
448  ; PACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
449  ; PACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
450  ; PACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
451  ; PACKED:   [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
452  ; PACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
453  ; PACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
454  ; PACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
455  ; PACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
456  ; PACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
457  ; PACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
458  ; PACKED:   [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
459  ; PACKED:   $vgpr0 = COPY [[BITCAST5]](<2 x s16>)
460  ; PACKED:   $vgpr1 = COPY [[BITCAST6]](<2 x s16>)
461  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
462  %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
463  %tex = extractvalue { <3 x half>, i32 } %res, 0
464  %tfe = extractvalue { <3 x half>, i32 } %res, 1
465  store i32 %tfe, i32 addrspace(1)* undef
466  ret <3 x half> %tex
467}
468
469define amdgpu_ps <4 x half> @image_load_tfe_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
470  ; UNPACKED-LABEL: name: image_load_tfe_v4f16
471  ; UNPACKED: bb.1 (%ir-block.0):
472  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
473  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
474  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
475  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
476  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
477  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
478  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
479  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
480  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
481  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
482  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
483  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
484  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
485  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
486  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
487  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
488  ; UNPACKED:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
489  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
490  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
491  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
492  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
493  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
494  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
495  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
496  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
497  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
498  ; UNPACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
499  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]]
500  ; UNPACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
501  ; UNPACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]]
502  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
503  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
504  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
505  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
506  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
507  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
508  ; PACKED-LABEL: name: image_load_tfe_v4f16
509  ; PACKED: bb.1 (%ir-block.0):
510  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
511  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
512  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
513  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
514  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
515  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
516  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
517  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
518  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
519  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
520  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
521  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
522  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
523  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
524  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
525  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
526  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
527  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32)
528  ; PACKED:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
529  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
530  ; PACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
531  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
532  %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
533  %tex = extractvalue { <4 x half>, i32 } %res, 0
534  %tfe = extractvalue { <4 x half>, i32 } %res, 1
535  store i32 %tfe, i32 addrspace(1)* undef
536  ret <4 x half> %tex
537}
538
539define amdgpu_ps half @image_load_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
540  ; UNPACKED-LABEL: name: image_load_f16_dmask_0000
541  ; UNPACKED: bb.1 (%ir-block.0):
542  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
543  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
544  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
545  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
546  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
547  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
548  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
549  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
550  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
551  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
552  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
553  ; UNPACKED:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
554  ; UNPACKED:   $vgpr0 = COPY [[DEF]](s32)
555  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
556  ; PACKED-LABEL: name: image_load_f16_dmask_0000
557  ; PACKED: bb.1 (%ir-block.0):
558  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
559  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
560  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
561  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
562  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
563  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
564  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
565  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
566  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
567  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
568  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
569  ; PACKED:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
570  ; PACKED:   $vgpr0 = COPY [[DEF]](s32)
571  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
572  %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
573  ret half %tex
574}
575
576define amdgpu_ps <2 x half> @image_load_v2f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
577  ; UNPACKED-LABEL: name: image_load_v2f16_dmask_1000
578  ; UNPACKED: bb.1 (%ir-block.0):
579  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
580  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
581  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
582  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
583  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
584  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
585  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
586  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
587  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
588  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
589  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
590  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
591  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
592  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
593  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
594  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
595  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
596  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
597  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
598  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
599  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
600  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
601  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
602  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
603  ; PACKED-LABEL: name: image_load_v2f16_dmask_1000
604  ; PACKED: bb.1 (%ir-block.0):
605  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
606  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
607  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
608  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
609  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
610  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
611  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
612  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
613  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
614  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
615  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
616  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
617  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
618  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
619  ; PACKED:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
620  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
621  %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
622  ret <2 x half> %tex
623}
624
625define amdgpu_ps <2 x half> @image_load_v2f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
626  ; UNPACKED-LABEL: name: image_load_v2f16_dmask_0000
627  ; UNPACKED: bb.1 (%ir-block.0):
628  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
629  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
630  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
631  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
632  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
633  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
634  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
635  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
636  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
637  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
638  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
639  ; UNPACKED:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
640  ; UNPACKED:   $vgpr0 = COPY [[DEF]](<2 x s16>)
641  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
642  ; PACKED-LABEL: name: image_load_v2f16_dmask_0000
643  ; PACKED: bb.1 (%ir-block.0):
644  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
645  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
646  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
647  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
648  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
649  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
650  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
651  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
652  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
653  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
654  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
655  ; PACKED:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
656  ; PACKED:   $vgpr0 = COPY [[DEF]](<2 x s16>)
657  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
658  %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
659  ret <2 x half> %tex
660}
661
662define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
663  ; UNPACKED-LABEL: name: image_load_v3f16_dmask_1100
664  ; UNPACKED: bb.1 (%ir-block.0):
665  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
666  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
667  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
668  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
669  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
670  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
671  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
672  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
673  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
674  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
675  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
676  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
677  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
678  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
679  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
680  ; UNPACKED:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
681  ; UNPACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
682  ; UNPACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
683  ; UNPACKED:   [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
684  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
685  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
686  ; UNPACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
687  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
688  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
689  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
690  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
691  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
692  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
693  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
694  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
695  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
696  ; UNPACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
697  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
698  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
699  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
700  ; UNPACKED:   [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
701  ; UNPACKED:   $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
702  ; UNPACKED:   $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
703  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
704  ; PACKED-LABEL: name: image_load_v3f16_dmask_1100
705  ; PACKED: bb.1 (%ir-block.0):
706  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
707  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
708  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
709  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
710  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
711  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
712  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
713  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
714  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
715  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
716  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
717  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
718  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
719  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
720  ; PACKED:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
721  ; PACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
722  ; PACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
723  ; PACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
724  ; PACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
725  ; PACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
726  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>)
727  ; PACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
728  ; PACKED:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
729  ; PACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
730  ; PACKED:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
731  ; PACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
732  ; PACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
733  ; PACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
734  ; PACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
735  ; PACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
736  ; PACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
737  ; PACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
738  ; PACKED:   [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
739  ; PACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
740  ; PACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
741  ; PACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
742  ; PACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
743  ; PACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
744  ; PACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
745  ; PACKED:   [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
746  ; PACKED:   $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
747  ; PACKED:   $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
748  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
749  %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
750  ret <3 x half> %tex
751}
752
753define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
754  ; UNPACKED-LABEL: name: image_load_v3f16_dmask_1000
755  ; UNPACKED: bb.1 (%ir-block.0):
756  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
757  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
758  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
759  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
760  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
761  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
762  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
763  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
764  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
765  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
766  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
767  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
768  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
769  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
770  ; UNPACKED:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
771  ; UNPACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
772  ; UNPACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
773  ; UNPACKED:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
774  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
775  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
776  ; UNPACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
777  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
778  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
779  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
780  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
781  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
782  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
783  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
784  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
785  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
786  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
787  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
788  ; UNPACKED:   [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
789  ; UNPACKED:   $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
790  ; UNPACKED:   $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
791  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
792  ; PACKED-LABEL: name: image_load_v3f16_dmask_1000
793  ; PACKED: bb.1 (%ir-block.0):
794  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
795  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
796  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
797  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
798  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
799  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
800  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
801  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
802  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
803  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
804  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
805  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
806  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
807  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
808  ; PACKED:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
809  ; PACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
810  ; PACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
811  ; PACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
812  ; PACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
813  ; PACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
814  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>)
815  ; PACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
816  ; PACKED:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
817  ; PACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
818  ; PACKED:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
819  ; PACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
820  ; PACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
821  ; PACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
822  ; PACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
823  ; PACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
824  ; PACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
825  ; PACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
826  ; PACKED:   [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
827  ; PACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
828  ; PACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
829  ; PACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
830  ; PACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
831  ; PACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
832  ; PACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
833  ; PACKED:   [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
834  ; PACKED:   $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
835  ; PACKED:   $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
836  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
837  %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
838  ret <3 x half> %tex
839}
840
841define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
842  ; UNPACKED-LABEL: name: image_load_v3f16_dmask_0000
843  ; UNPACKED: bb.1 (%ir-block.0):
844  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
845  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
846  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
847  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
848  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
849  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
850  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
851  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
852  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
853  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
854  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
855  ; UNPACKED:   [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
856  ; UNPACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
857  ; UNPACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
858  ; UNPACKED:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
859  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
860  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
861  ; UNPACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
862  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
863  ; UNPACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
864  ; UNPACKED:   [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
865  ; UNPACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
866  ; UNPACKED:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
867  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
868  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
869  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
870  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
871  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
872  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
873  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
874  ; UNPACKED:   [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
875  ; UNPACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
876  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
877  ; UNPACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
878  ; UNPACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
879  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
880  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
881  ; UNPACKED:   [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
882  ; UNPACKED:   $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
883  ; UNPACKED:   $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
884  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
885  ; PACKED-LABEL: name: image_load_v3f16_dmask_0000
886  ; PACKED: bb.1 (%ir-block.0):
887  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
888  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
889  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
890  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
891  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
892  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
893  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
894  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
895  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
896  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
897  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
898  ; PACKED:   [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
899  ; PACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
900  ; PACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
901  ; PACKED:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
902  ; PACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
903  ; PACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
904  ; PACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
905  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
906  ; PACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
907  ; PACKED:   [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
908  ; PACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
909  ; PACKED:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
910  ; PACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
911  ; PACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
912  ; PACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
913  ; PACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
914  ; PACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
915  ; PACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
916  ; PACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
917  ; PACKED:   [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
918  ; PACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
919  ; PACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
920  ; PACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
921  ; PACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
922  ; PACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
923  ; PACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
924  ; PACKED:   [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
925  ; PACKED:   $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
926  ; PACKED:   $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
927  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
928  %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
929  ret <3 x half> %tex
930}
931
932define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
933  ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1110
934  ; UNPACKED: bb.1 (%ir-block.0):
935  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
936  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
937  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
938  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
939  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
940  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
941  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
942  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
943  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
944  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
945  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
946  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
947  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
948  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
949  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
950  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
951  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
952  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
953  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
954  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
955  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
956  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
957  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
958  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
959  ; UNPACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
960  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]]
961  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
962  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
963  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
964  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
965  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
966  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
967  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
968  ; PACKED-LABEL: name: image_load_v4f16_dmask_1110
969  ; PACKED: bb.1 (%ir-block.0):
970  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
971  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
972  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
973  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
974  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
975  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
976  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
977  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
978  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
979  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
980  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
981  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
982  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
983  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
984  ; PACKED:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>)
985  ; PACKED:   $vgpr0 = COPY [[UV]](<2 x s16>)
986  ; PACKED:   $vgpr1 = COPY [[UV1]](<2 x s16>)
987  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
988  %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
989  ret <4 x half> %tex
990}
991
992define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
993  ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1100
994  ; UNPACKED: bb.1 (%ir-block.0):
995  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
996  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
997  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
998  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
999  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1000  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1001  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1002  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1003  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1004  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1005  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1006  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1007  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1008  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
1009  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1010  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1011  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
1012  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
1013  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
1014  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
1015  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1016  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
1017  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1018  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1019  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1020  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
1021  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
1022  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1023  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1024  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1025  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1026  ; PACKED-LABEL: name: image_load_v4f16_dmask_1100
1027  ; PACKED: bb.1 (%ir-block.0):
1028  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1029  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1030  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1031  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1032  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1033  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1034  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1035  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1036  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1037  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1038  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1039  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1040  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1041  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
1042  ; PACKED:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1043  ; PACKED:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
1044  ; PACKED:   $vgpr1 = COPY [[DEF]](<2 x s16>)
1045  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1046  %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
1047  ret <4 x half> %tex
1048}
1049
1050define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1051  ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1000
1052  ; UNPACKED: bb.1 (%ir-block.0):
1053  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1054  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1055  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1056  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1057  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1058  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1059  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1060  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1061  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1062  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1063  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1064  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1065  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1066  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1067  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1068  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1069  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
1070  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1071  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1072  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1073  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1074  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1075  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
1076  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1077  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1078  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1079  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1080  ; PACKED-LABEL: name: image_load_v4f16_dmask_1000
1081  ; PACKED: bb.1 (%ir-block.0):
1082  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1083  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1084  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1085  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1086  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1087  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1088  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1089  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1090  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1091  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1092  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1093  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1094  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1095  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1096  ; PACKED:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1097  ; PACKED:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
1098  ; PACKED:   $vgpr1 = COPY [[DEF]](<2 x s16>)
1099  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1100  %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
1101  ret <4 x half> %tex
1102}
1103
1104define amdgpu_ps <4 x half> @image_load_v4f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1105  ; UNPACKED-LABEL: name: image_load_v4f16_dmask_0000
1106  ; UNPACKED: bb.1 (%ir-block.0):
1107  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1108  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1109  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1110  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1111  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1112  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1113  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1114  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1115  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1116  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1117  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1118  ; UNPACKED:   [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1119  ; UNPACKED:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
1120  ; UNPACKED:   $vgpr0 = COPY [[UV]](<2 x s16>)
1121  ; UNPACKED:   $vgpr1 = COPY [[UV1]](<2 x s16>)
1122  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1123  ; PACKED-LABEL: name: image_load_v4f16_dmask_0000
1124  ; PACKED: bb.1 (%ir-block.0):
1125  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1126  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1127  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1128  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1129  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1130  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1131  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1132  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1133  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1134  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1135  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1136  ; PACKED:   [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1137  ; PACKED:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
1138  ; PACKED:   $vgpr0 = COPY [[UV]](<2 x s16>)
1139  ; PACKED:   $vgpr1 = COPY [[UV1]](<2 x s16>)
1140  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1141  %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
1142  ret <4 x half> %tex
1143}
1144
1145define amdgpu_ps half @image_load_tfe_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1146  ; UNPACKED-LABEL: name: image_load_tfe_f16_dmask_0000
1147  ; UNPACKED: bb.1 (%ir-block.0):
1148  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1149  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1150  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1151  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1152  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1153  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1154  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1155  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1156  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1157  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1158  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1159  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1160  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1161  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1162  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1163  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1164  ; UNPACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1165  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
1166  ; UNPACKED:   $vgpr0 = COPY [[COPY10]](s32)
1167  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
1168  ; PACKED-LABEL: name: image_load_tfe_f16_dmask_0000
1169  ; PACKED: bb.1 (%ir-block.0):
1170  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1171  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1172  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1173  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1174  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1175  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1176  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1177  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1178  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1179  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1180  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1181  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1182  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1183  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1184  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1185  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1186  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1187  ; PACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
1188  ; PACKED:   $vgpr0 = COPY [[COPY10]](s32)
1189  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
1190  %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1191  %tex = extractvalue { half, i32 } %res, 0
1192  %tfe = extractvalue { half, i32 } %res, 1
1193  store i32 %tfe, i32 addrspace(1)* undef
1194  ret half %tex
1195}
1196
1197define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1198  ; UNPACKED-LABEL: name: image_load_tfe_v2f16_dmask_1000
1199  ; UNPACKED: bb.1 (%ir-block.0):
1200  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1201  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1202  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1203  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1204  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1205  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1206  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1207  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1208  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1209  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1210  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1211  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1212  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1213  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1214  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1215  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1216  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1217  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
1218  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
1219  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1220  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1221  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1222  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1223  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1224  ; UNPACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1225  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1226  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
1227  ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_1000
1228  ; PACKED: bb.1 (%ir-block.0):
1229  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1230  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1231  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1232  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1233  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1234  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1235  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1236  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1237  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1238  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1239  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1240  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1241  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1242  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1243  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1244  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1245  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1246  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1247  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1248  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
1249  %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1250  %tex = extractvalue { <2 x half>, i32 } %res, 0
1251  %tfe = extractvalue { <2 x half>, i32 } %res, 1
1252  store i32 %tfe, i32 addrspace(1)* undef
1253  ret <2 x half> %tex
1254}
1255
1256define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1257  ; UNPACKED-LABEL: name: image_load_tfe_v2f16_dmask_0000
1258  ; UNPACKED: bb.1 (%ir-block.0):
1259  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1260  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1261  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1262  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1263  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1264  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1265  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1266  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1267  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1268  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1269  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1270  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1271  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1272  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1273  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1274  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1275  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1276  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
1277  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
1278  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1279  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1280  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1281  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1282  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1283  ; UNPACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1284  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1285  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
1286  ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_0000
1287  ; PACKED: bb.1 (%ir-block.0):
1288  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1289  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1290  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1291  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1292  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1293  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1294  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1295  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1296  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1297  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1298  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1299  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1300  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1301  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1302  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1303  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1304  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1305  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1306  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1307  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
1308  %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1309  %tex = extractvalue { <2 x half>, i32 } %res, 0
1310  %tfe = extractvalue { <2 x half>, i32 } %res, 1
1311  store i32 %tfe, i32 addrspace(1)* undef
1312  ret <2 x half> %tex
1313}
1314
1315define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1316  ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100
1317  ; UNPACKED: bb.1 (%ir-block.0):
1318  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1319  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1320  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1321  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1322  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1323  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1324  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1325  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1326  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1327  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1328  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1329  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1330  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1331  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1332  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
1333  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
1334  ; UNPACKED:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1335  ; UNPACKED:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1336  ; UNPACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1337  ; UNPACKED:   [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1338  ; UNPACKED:   [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
1339  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
1340  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1341  ; UNPACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1342  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1343  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
1344  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
1345  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
1346  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
1347  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
1348  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1349  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1350  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1351  ; UNPACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
1352  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
1353  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
1354  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
1355  ; UNPACKED:   [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1356  ; UNPACKED:   $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
1357  ; UNPACKED:   $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
1358  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1359  ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100
1360  ; PACKED: bb.1 (%ir-block.0):
1361  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1362  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1363  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1364  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1365  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1366  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1367  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1368  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1369  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1370  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1371  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1372  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1373  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1374  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1375  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
1376  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1377  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1378  ; PACKED:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1379  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1380  ; PACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1381  ; PACKED:   [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1382  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>)
1383  ; PACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1384  ; PACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
1385  ; PACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>)
1386  ; PACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1387  ; PACKED:   [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
1388  ; PACKED:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
1389  ; PACKED:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
1390  ; PACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1391  ; PACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
1392  ; PACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
1393  ; PACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1394  ; PACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
1395  ; PACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
1396  ; PACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1397  ; PACKED:   [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1398  ; PACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
1399  ; PACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
1400  ; PACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
1401  ; PACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
1402  ; PACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
1403  ; PACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
1404  ; PACKED:   [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1405  ; PACKED:   $vgpr0 = COPY [[BITCAST4]](<2 x s16>)
1406  ; PACKED:   $vgpr1 = COPY [[BITCAST5]](<2 x s16>)
1407  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1408  %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1409  %tex = extractvalue { <3 x half>, i32 } %res, 0
1410  %tfe = extractvalue { <3 x half>, i32 } %res, 1
1411  store i32 %tfe, i32 addrspace(1)* undef
1412  ret <3 x half> %tex
1413}
1414
1415define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1416  ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000
1417  ; UNPACKED: bb.1 (%ir-block.0):
1418  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1419  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1420  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1421  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1422  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1423  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1424  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1425  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1426  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1427  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1428  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1429  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1430  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1431  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1432  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1433  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1434  ; UNPACKED:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1435  ; UNPACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1436  ; UNPACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1437  ; UNPACKED:   [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1438  ; UNPACKED:   [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
1439  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
1440  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1441  ; UNPACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1442  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1443  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
1444  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
1445  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1446  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
1447  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1448  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1449  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
1450  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
1451  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
1452  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
1453  ; UNPACKED:   [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1454  ; UNPACKED:   $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
1455  ; UNPACKED:   $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
1456  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1457  ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000
1458  ; PACKED: bb.1 (%ir-block.0):
1459  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1460  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1461  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1462  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1463  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1464  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1465  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1466  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1467  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1468  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1469  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1470  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1471  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1472  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1473  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1474  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1475  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1476  ; PACKED:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1477  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1478  ; PACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1479  ; PACKED:   [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1480  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>)
1481  ; PACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1482  ; PACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
1483  ; PACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>)
1484  ; PACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1485  ; PACKED:   [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
1486  ; PACKED:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
1487  ; PACKED:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
1488  ; PACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1489  ; PACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
1490  ; PACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
1491  ; PACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1492  ; PACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
1493  ; PACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
1494  ; PACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1495  ; PACKED:   [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1496  ; PACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
1497  ; PACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
1498  ; PACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
1499  ; PACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
1500  ; PACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
1501  ; PACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
1502  ; PACKED:   [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1503  ; PACKED:   $vgpr0 = COPY [[BITCAST4]](<2 x s16>)
1504  ; PACKED:   $vgpr1 = COPY [[BITCAST5]](<2 x s16>)
1505  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1506  %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1507  %tex = extractvalue { <3 x half>, i32 } %res, 0
1508  %tfe = extractvalue { <3 x half>, i32 } %res, 1
1509  store i32 %tfe, i32 addrspace(1)* undef
1510  ret <3 x half> %tex
1511}
1512
1513define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1514  ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000
1515  ; UNPACKED: bb.1 (%ir-block.0):
1516  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1517  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1518  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1519  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1520  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1521  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1522  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1523  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1524  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1525  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1526  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1527  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1528  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1529  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1530  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1531  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1532  ; UNPACKED:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1533  ; UNPACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1534  ; UNPACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1535  ; UNPACKED:   [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1536  ; UNPACKED:   [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
1537  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
1538  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1539  ; UNPACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1540  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1541  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
1542  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
1543  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1544  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
1545  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1546  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1547  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
1548  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
1549  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
1550  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
1551  ; UNPACKED:   [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1552  ; UNPACKED:   $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
1553  ; UNPACKED:   $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
1554  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1555  ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000
1556  ; PACKED: bb.1 (%ir-block.0):
1557  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1558  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1559  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1560  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1561  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1562  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1563  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1564  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1565  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1566  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1567  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1568  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1569  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1570  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1571  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1572  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1573  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1574  ; PACKED:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1575  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1576  ; PACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1577  ; PACKED:   [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1578  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>)
1579  ; PACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1580  ; PACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
1581  ; PACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>)
1582  ; PACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1583  ; PACKED:   [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
1584  ; PACKED:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
1585  ; PACKED:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
1586  ; PACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1587  ; PACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
1588  ; PACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
1589  ; PACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1590  ; PACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
1591  ; PACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
1592  ; PACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1593  ; PACKED:   [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1594  ; PACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
1595  ; PACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
1596  ; PACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
1597  ; PACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
1598  ; PACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
1599  ; PACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
1600  ; PACKED:   [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1601  ; PACKED:   $vgpr0 = COPY [[BITCAST4]](<2 x s16>)
1602  ; PACKED:   $vgpr1 = COPY [[BITCAST5]](<2 x s16>)
1603  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1604  %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1605  %tex = extractvalue { <3 x half>, i32 } %res, 0
1606  %tfe = extractvalue { <3 x half>, i32 } %res, 1
1607  store i32 %tfe, i32 addrspace(1)* undef
1608  ret <3 x half> %tex
1609}
1610
1611define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1612  ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1110
1613  ; UNPACKED: bb.1 (%ir-block.0):
1614  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1615  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1616  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1617  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1618  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1619  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1620  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1621  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1622  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1623  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1624  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1625  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1626  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1627  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1628  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
1629  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
1630  ; UNPACKED:   G_STORE [[UV3]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1631  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1632  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
1633  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
1634  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
1635  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
1636  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1637  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
1638  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1639  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1640  ; UNPACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
1641  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]]
1642  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1643  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
1644  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
1645  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1646  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1647  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1648  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1649  ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1110
1650  ; PACKED: bb.1 (%ir-block.0):
1651  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1652  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1653  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1654  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1655  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1656  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1657  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1658  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1659  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1660  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1661  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1662  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1663  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1664  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1665  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
1666  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
1667  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1668  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32)
1669  ; PACKED:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1670  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1671  ; PACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1672  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1673  %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1674  %tex = extractvalue { <4 x half>, i32 } %res, 0
1675  %tfe = extractvalue { <4 x half>, i32 } %res, 1
1676  store i32 %tfe, i32 addrspace(1)* undef
1677  ret <4 x half> %tex
1678}
1679
1680define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1681  ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1100
1682  ; UNPACKED: bb.1 (%ir-block.0):
1683  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1684  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1685  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1686  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1687  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1688  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1689  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1690  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1691  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1692  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1693  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1694  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1695  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1696  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1697  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
1698  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
1699  ; UNPACKED:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1700  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1701  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
1702  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
1703  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
1704  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
1705  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1706  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
1707  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1708  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1709  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1710  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
1711  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
1712  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1713  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1714  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1715  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1716  ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1100
1717  ; PACKED: bb.1 (%ir-block.0):
1718  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1719  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1720  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1721  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1722  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1723  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1724  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1725  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1726  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1727  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1728  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1729  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1730  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1731  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1732  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
1733  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1734  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1735  ; PACKED:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1736  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1737  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1738  ; PACKED:   $vgpr1 = COPY [[DEF1]](<2 x s16>)
1739  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1740  %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1741  %tex = extractvalue { <4 x half>, i32 } %res, 0
1742  %tfe = extractvalue { <4 x half>, i32 } %res, 1
1743  store i32 %tfe, i32 addrspace(1)* undef
1744  ret <4 x half> %tex
1745}
1746
1747define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1748  ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1000
1749  ; UNPACKED: bb.1 (%ir-block.0):
1750  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1751  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1752  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1753  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1754  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1755  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1756  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1757  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1758  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1759  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1760  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1761  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1762  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1763  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1764  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1765  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1766  ; UNPACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1767  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1768  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
1769  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
1770  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1771  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1772  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1773  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1774  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1775  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
1776  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1777  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1778  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1779  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1780  ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1000
1781  ; PACKED: bb.1 (%ir-block.0):
1782  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1783  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1784  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1785  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1786  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1787  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1788  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1789  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1790  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1791  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1792  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1793  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1794  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1795  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1796  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1797  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1798  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1799  ; PACKED:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1800  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1801  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1802  ; PACKED:   $vgpr1 = COPY [[DEF1]](<2 x s16>)
1803  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1804  %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1805  %tex = extractvalue { <4 x half>, i32 } %res, 0
1806  %tfe = extractvalue { <4 x half>, i32 } %res, 1
1807  store i32 %tfe, i32 addrspace(1)* undef
1808  ret <4 x half> %tex
1809}
1810
1811define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1812  ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_0000
1813  ; UNPACKED: bb.1 (%ir-block.0):
1814  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1815  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1816  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1817  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1818  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1819  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1820  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1821  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1822  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1823  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1824  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1825  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1826  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1827  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1828  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1829  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1830  ; UNPACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1831  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1832  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
1833  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
1834  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1835  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1836  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1837  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1838  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1839  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
1840  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1841  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1842  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1843  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1844  ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_0000
1845  ; PACKED: bb.1 (%ir-block.0):
1846  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1847  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1848  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1849  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1850  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1851  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1852  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1853  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1854  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1855  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1856  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1857  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1858  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1859  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1860  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1861  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1862  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1863  ; PACKED:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1864  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1865  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1866  ; PACKED:   $vgpr1 = COPY [[DEF1]](<2 x s16>)
1867  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1868  %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1869  %tex = extractvalue { <4 x half>, i32 } %res, 0
1870  %tfe = extractvalue { <4 x half>, i32 } %res, 1
1871  store i32 %tfe, i32 addrspace(1)* undef
1872  ret <4 x half> %tex
1873}
1874
1875declare half @llvm.amdgcn.image.load.2d.f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1876declare <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1877declare <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1878declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1879declare { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1880declare { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1881declare { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1882declare { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1883
1884attributes #0 = { nounwind readonly }
1885