1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s
3
4define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
5  ; GFX10-LABEL: name: sample_d_1d
6  ; GFX10: bb.1.main_body:
7  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
8  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
9  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
10  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
11  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
12  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
13  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
14  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
15  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
16  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
17  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
18  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
19  ; GFX10:   [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
20  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
21  ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
22  ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
23  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
24  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
25  ; GFX10:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
26  ; GFX10:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
27  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32)
28  ; GFX10:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
29  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
30  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY14]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
31  ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
32  ; GFX10:   $vgpr0 = COPY [[UV]](s32)
33  ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
34  ; GFX10:   $vgpr2 = COPY [[UV2]](s32)
35  ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
36  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
37main_body:
38  %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
39  ret <4 x float> %v
40}
41
42define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
43  ; GFX10-LABEL: name: sample_d_2d
44  ; GFX10: bb.1.main_body:
45  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
46  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
47  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
48  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
49  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
50  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
51  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
52  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
53  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
54  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
55  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
56  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
57  ; GFX10:   [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
58  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
59  ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
60  ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
61  ; GFX10:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
62  ; GFX10:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
63  ; GFX10:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
64  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
65  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
66  ; GFX10:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
67  ; GFX10:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
68  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32)
69  ; GFX10:   [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
70  ; GFX10:   [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
71  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)
72  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY16]](s32), [[COPY17]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
73  ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
74  ; GFX10:   $vgpr0 = COPY [[UV]](s32)
75  ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
76  ; GFX10:   $vgpr2 = COPY [[UV2]](s32)
77  ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
78  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
79main_body:
80  %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
81  ret <4 x float> %v
82}
83
84define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
85  ; GFX10-LABEL: name: sample_d_3d
86  ; GFX10: bb.1.main_body:
87  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
88  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
89  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
90  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
91  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
92  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
93  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
94  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
95  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
96  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
97  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
98  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
99  ; GFX10:   [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
100  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
101  ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
102  ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
103  ; GFX10:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
104  ; GFX10:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
105  ; GFX10:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
106  ; GFX10:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
107  ; GFX10:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
108  ; GFX10:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8
109  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
110  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
111  ; GFX10:   [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
112  ; GFX10:   [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
113  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
114  ; GFX10:   [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
115  ; GFX10:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
116  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[DEF]](s32)
117  ; GFX10:   [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
118  ; GFX10:   [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
119  ; GFX10:   [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY24]](s32), [[COPY25]](s32)
120  ; GFX10:   [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
121  ; GFX10:   [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY26]](s32), [[DEF]](s32)
122  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
123  ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
124  ; GFX10:   $vgpr0 = COPY [[UV]](s32)
125  ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
126  ; GFX10:   $vgpr2 = COPY [[UV2]](s32)
127  ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
128  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
129main_body:
130  %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
131  ret <4 x float> %v
132}
133
134define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
135  ; GFX10-LABEL: name: sample_c_d_1d
136  ; GFX10: bb.1.main_body:
137  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
138  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
139  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
140  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
141  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
142  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
143  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
144  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
145  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
146  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
147  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
148  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
149  ; GFX10:   [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
150  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
151  ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
152  ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
153  ; GFX10:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
154  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
155  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
156  ; GFX10:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
157  ; GFX10:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
158  ; GFX10:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
159  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
160  ; GFX10:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
161  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
162  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY15]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
163  ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
164  ; GFX10:   $vgpr0 = COPY [[UV]](s32)
165  ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
166  ; GFX10:   $vgpr2 = COPY [[UV2]](s32)
167  ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
168  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
169main_body:
170  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
171  ret <4 x float> %v
172}
173
174define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
175  ; GFX10-LABEL: name: sample_c_d_2d
176  ; GFX10: bb.1.main_body:
177  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
178  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
179  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
180  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
181  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
182  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
183  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
184  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
185  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
186  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
187  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
188  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
189  ; GFX10:   [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
190  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
191  ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
192  ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
193  ; GFX10:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
194  ; GFX10:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
195  ; GFX10:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
196  ; GFX10:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
197  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
198  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
199  ; GFX10:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
200  ; GFX10:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
201  ; GFX10:   [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
202  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
203  ; GFX10:   [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
204  ; GFX10:   [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
205  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
206  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY17]](s32), [[COPY18]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
207  ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
208  ; GFX10:   $vgpr0 = COPY [[UV]](s32)
209  ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
210  ; GFX10:   $vgpr2 = COPY [[UV2]](s32)
211  ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
212  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
213main_body:
214  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
215  ret <4 x float> %v
216}
217
218define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
219  ; GFX10-LABEL: name: sample_d_cl_1d
220  ; GFX10: bb.1.main_body:
221  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
222  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
223  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
224  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
225  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
226  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
227  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
228  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
229  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
230  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
231  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
232  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
233  ; GFX10:   [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
234  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
235  ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
236  ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
237  ; GFX10:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
238  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
239  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
240  ; GFX10:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
241  ; GFX10:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
242  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
243  ; GFX10:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
244  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
245  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY14]](s32), [[COPY15]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
246  ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
247  ; GFX10:   $vgpr0 = COPY [[UV]](s32)
248  ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
249  ; GFX10:   $vgpr2 = COPY [[UV2]](s32)
250  ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
251  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
252main_body:
253  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
254  ret <4 x float> %v
255}
256
257define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
258  ; GFX10-LABEL: name: sample_d_cl_2d
259  ; GFX10: bb.1.main_body:
260  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
261  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
262  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
263  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
264  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
265  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
266  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
267  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
268  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
269  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
270  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
271  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
272  ; GFX10:   [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
273  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
274  ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
275  ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
276  ; GFX10:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
277  ; GFX10:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
278  ; GFX10:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
279  ; GFX10:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
280  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
281  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
282  ; GFX10:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
283  ; GFX10:   [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
284  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
285  ; GFX10:   [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
286  ; GFX10:   [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
287  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
288  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
289  ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
290  ; GFX10:   $vgpr0 = COPY [[UV]](s32)
291  ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
292  ; GFX10:   $vgpr2 = COPY [[UV2]](s32)
293  ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
294  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
295main_body:
296  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
297  ret <4 x float> %v
298}
299
300define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
301  ; GFX10-LABEL: name: sample_c_d_cl_1d
302  ; GFX10: bb.1.main_body:
303  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
304  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
305  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
306  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
307  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
308  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
309  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
310  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
311  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
312  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
313  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
314  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
315  ; GFX10:   [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
316  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
317  ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
318  ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
319  ; GFX10:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
320  ; GFX10:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
321  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
322  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
323  ; GFX10:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
324  ; GFX10:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
325  ; GFX10:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
326  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
327  ; GFX10:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
328  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32)
329  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY15]](s32), [[COPY16]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
330  ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
331  ; GFX10:   $vgpr0 = COPY [[UV]](s32)
332  ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
333  ; GFX10:   $vgpr2 = COPY [[UV2]](s32)
334  ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
335  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
336main_body:
337  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
338  ret <4 x float> %v
339}
340
341define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
342  ; GFX10-LABEL: name: sample_c_d_cl_2d
343  ; GFX10: bb.1.main_body:
344  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
345  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
346  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
347  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
348  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
349  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
350  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
351  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
352  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
353  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
354  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
355  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
356  ; GFX10:   [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
357  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
358  ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
359  ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
360  ; GFX10:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
361  ; GFX10:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
362  ; GFX10:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
363  ; GFX10:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
364  ; GFX10:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
365  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
366  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
367  ; GFX10:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
368  ; GFX10:   [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
369  ; GFX10:   [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
370  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)
371  ; GFX10:   [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
372  ; GFX10:   [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
373  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32)
374  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
375  ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
376  ; GFX10:   $vgpr0 = COPY [[UV]](s32)
377  ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
378  ; GFX10:   $vgpr2 = COPY [[UV2]](s32)
379  ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
380  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
381main_body:
382  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
383  ret <4 x float> %v
384}
385
386define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
387  ; GFX10-LABEL: name: sample_cd_1d
388  ; GFX10: bb.1.main_body:
389  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
390  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
391  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
392  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
393  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
394  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
395  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
396  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
397  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
398  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
399  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
400  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
401  ; GFX10:   [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
402  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
403  ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
404  ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
405  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
406  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
407  ; GFX10:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
408  ; GFX10:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
409  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32)
410  ; GFX10:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
411  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
412  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY14]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
413  ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
414  ; GFX10:   $vgpr0 = COPY [[UV]](s32)
415  ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
416  ; GFX10:   $vgpr2 = COPY [[UV2]](s32)
417  ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
418  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
419main_body:
420  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
421  ret <4 x float> %v
422}
423
424define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
425  ; GFX10-LABEL: name: sample_cd_2d
426  ; GFX10: bb.1.main_body:
427  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
428  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
429  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
430  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
431  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
432  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
433  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
434  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
435  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
436  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
437  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
438  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
439  ; GFX10:   [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
440  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
441  ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
442  ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
443  ; GFX10:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
444  ; GFX10:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
445  ; GFX10:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
446  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
447  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
448  ; GFX10:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
449  ; GFX10:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
450  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32)
451  ; GFX10:   [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
452  ; GFX10:   [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
453  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)
454  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY16]](s32), [[COPY17]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
455  ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
456  ; GFX10:   $vgpr0 = COPY [[UV]](s32)
457  ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
458  ; GFX10:   $vgpr2 = COPY [[UV2]](s32)
459  ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
460  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
461main_body:
462  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
463  ret <4 x float> %v
464}
465
466define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
467  ; GFX10-LABEL: name: sample_c_cd_1d
468  ; GFX10: bb.1.main_body:
469  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
470  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
471  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
472  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
473  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
474  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
475  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
476  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
477  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
478  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
479  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
480  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
481  ; GFX10:   [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
482  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
483  ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
484  ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
485  ; GFX10:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
486  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
487  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
488  ; GFX10:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
489  ; GFX10:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
490  ; GFX10:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
491  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
492  ; GFX10:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
493  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
494  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY15]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
495  ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
496  ; GFX10:   $vgpr0 = COPY [[UV]](s32)
497  ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
498  ; GFX10:   $vgpr2 = COPY [[UV2]](s32)
499  ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
500  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
501main_body:
502  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
503  ret <4 x float> %v
504}
505
506define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
507  ; GFX10-LABEL: name: sample_c_cd_2d
508  ; GFX10: bb.1.main_body:
509  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
510  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
511  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
512  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
513  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
514  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
515  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
516  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
517  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
518  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
519  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
520  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
521  ; GFX10:   [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
522  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
523  ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
524  ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
525  ; GFX10:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
526  ; GFX10:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
527  ; GFX10:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
528  ; GFX10:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
529  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
530  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
531  ; GFX10:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
532  ; GFX10:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
533  ; GFX10:   [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
534  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
535  ; GFX10:   [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
536  ; GFX10:   [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
537  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
538  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY17]](s32), [[COPY18]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
539  ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
540  ; GFX10:   $vgpr0 = COPY [[UV]](s32)
541  ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
542  ; GFX10:   $vgpr2 = COPY [[UV2]](s32)
543  ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
544  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
545main_body:
546  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
547  ret <4 x float> %v
548}
549
550define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
551  ; GFX10-LABEL: name: sample_cd_cl_1d
552  ; GFX10: bb.1.main_body:
553  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
554  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
555  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
556  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
557  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
558  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
559  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
560  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
561  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
562  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
563  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
564  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
565  ; GFX10:   [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
566  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
567  ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
568  ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
569  ; GFX10:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
570  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
571  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
572  ; GFX10:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
573  ; GFX10:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
574  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
575  ; GFX10:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
576  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
577  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY14]](s32), [[COPY15]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
578  ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
579  ; GFX10:   $vgpr0 = COPY [[UV]](s32)
580  ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
581  ; GFX10:   $vgpr2 = COPY [[UV2]](s32)
582  ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
583  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
584main_body:
585  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
586  ret <4 x float> %v
587}
588
589define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
590  ; GFX10-LABEL: name: sample_cd_cl_2d
591  ; GFX10: bb.1.main_body:
592  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
593  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
594  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
595  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
596  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
597  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
598  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
599  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
600  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
601  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
602  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
603  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
604  ; GFX10:   [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
605  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
606  ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
607  ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
608  ; GFX10:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
609  ; GFX10:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
610  ; GFX10:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
611  ; GFX10:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
612  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
613  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
614  ; GFX10:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
615  ; GFX10:   [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
616  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
617  ; GFX10:   [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
618  ; GFX10:   [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
619  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
620  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
621  ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
622  ; GFX10:   $vgpr0 = COPY [[UV]](s32)
623  ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
624  ; GFX10:   $vgpr2 = COPY [[UV2]](s32)
625  ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
626  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
627main_body:
628  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
629  ret <4 x float> %v
630}
631
632define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
633  ; GFX10-LABEL: name: sample_c_cd_cl_1d
634  ; GFX10: bb.1.main_body:
635  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
636  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
637  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
638  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
639  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
640  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
641  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
642  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
643  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
644  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
645  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
646  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
647  ; GFX10:   [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
648  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
649  ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
650  ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
651  ; GFX10:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
652  ; GFX10:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
653  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
654  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
655  ; GFX10:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
656  ; GFX10:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
657  ; GFX10:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
658  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
659  ; GFX10:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
660  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32)
661  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY15]](s32), [[COPY16]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
662  ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
663  ; GFX10:   $vgpr0 = COPY [[UV]](s32)
664  ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
665  ; GFX10:   $vgpr2 = COPY [[UV2]](s32)
666  ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
667  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
668main_body:
669  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
670  ret <4 x float> %v
671}
672
673define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
674  ; GFX10-LABEL: name: sample_c_cd_cl_2d
675  ; GFX10: bb.1.main_body:
676  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
677  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
678  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
679  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
680  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
681  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
682  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
683  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
684  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
685  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
686  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
687  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
688  ; GFX10:   [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
689  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
690  ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
691  ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
692  ; GFX10:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
693  ; GFX10:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
694  ; GFX10:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
695  ; GFX10:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
696  ; GFX10:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
697  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
698  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
699  ; GFX10:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
700  ; GFX10:   [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
701  ; GFX10:   [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
702  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)
703  ; GFX10:   [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
704  ; GFX10:   [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
705  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32)
706  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
707  ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
708  ; GFX10:   $vgpr0 = COPY [[UV]](s32)
709  ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
710  ; GFX10:   $vgpr2 = COPY [[UV2]](s32)
711  ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
712  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
713main_body:
714  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
715  ret <4 x float> %v
716}
717
718define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
719  ; GFX10-LABEL: name: sample_c_d_o_2darray_V1
720  ; GFX10: bb.1.main_body:
721  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
722  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
723  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
724  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
725  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
726  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
727  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
728  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
729  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
730  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
731  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
732  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
733  ; GFX10:   [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
734  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
735  ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
736  ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
737  ; GFX10:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
738  ; GFX10:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
739  ; GFX10:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
740  ; GFX10:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
741  ; GFX10:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
742  ; GFX10:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8
743  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
744  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
745  ; GFX10:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
746  ; GFX10:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
747  ; GFX10:   [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
748  ; GFX10:   [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
749  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
750  ; GFX10:   [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
751  ; GFX10:   [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
752  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)
753  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 4 from custom "TargetCustom8")
754  ; GFX10:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
755  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
756main_body:
757  %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
758  ret float %v
759}
760
761define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
762  ; GFX10-LABEL: name: sample_c_d_o_2darray_V2
763  ; GFX10: bb.1.main_body:
764  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
765  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
766  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
767  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
768  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
769  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
770  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
771  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
772  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
773  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
774  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
775  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
776  ; GFX10:   [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
777  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
778  ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
779  ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
780  ; GFX10:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
781  ; GFX10:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
782  ; GFX10:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
783  ; GFX10:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
784  ; GFX10:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
785  ; GFX10:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8
786  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
787  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
788  ; GFX10:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
789  ; GFX10:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
790  ; GFX10:   [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
791  ; GFX10:   [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
792  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
793  ; GFX10:   [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
794  ; GFX10:   [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
795  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)
796  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 8 from custom "TargetCustom8")
797  ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
798  ; GFX10:   $vgpr0 = COPY [[UV]](s32)
799  ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
800  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
801main_body:
802  %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
803  ret <2 x float> %v
804}
805
806declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
807declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
808declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
809declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
810declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
811declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
812declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
813declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
814declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
815
816declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
817declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
818declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
819declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
820declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
821declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
822declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
823declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
824
825declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
826declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
827
828attributes #0 = { nounwind }
829attributes #1 = { nounwind readonly }
830attributes #2 = { nounwind readnone }
831