1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -cost-model -analyze | FileCheck %s --check-prefix=AVX2
3; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skylake -cost-model -analyze | FileCheck %s --check-prefix=SKL
4; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=knl -cost-model -analyze | FileCheck %s --check-prefix=KNL
5; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skx -cost-model -analyze | FileCheck %s --check-prefix=SKX
6
7
8define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) {
9; AVX2-LABEL: 'test1'
10; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
11; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
12; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
13;
14; SKL-LABEL: 'test1'
15; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
16; SKL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
17; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
18;
19; KNL-LABEL: 'test1'
20; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
21; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
22; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
23;
24; SKX-LABEL: 'test1'
25; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
26; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
27; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
28;
29  %mask = icmp eq <2 x i64> %trigger, zeroinitializer
30  %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst)
31  ret <2 x double> %res
32}
33
34define <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) {
35; AVX2-LABEL: 'test2'
36; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
37; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
38; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
39;
40; SKL-LABEL: 'test2'
41; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
42; SKL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
43; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
44;
45; KNL-LABEL: 'test2'
46; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
47; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
48; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
49;
50; SKX-LABEL: 'test2'
51; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
52; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
53; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
54;
55  %mask = icmp eq <4 x i32> %trigger, zeroinitializer
56  %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst)
57  ret <4 x i32> %res
58}
59
60define void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
61; AVX2-LABEL: 'test3'
62; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
63; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
64; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
65;
66; SKL-LABEL: 'test3'
67; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
68; SKL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
69; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
70;
71; KNL-LABEL: 'test3'
72; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
73; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
74; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
75;
76; SKX-LABEL: 'test3'
77; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
78; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
79; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
80;
81  %mask = icmp eq <4 x i32> %trigger, zeroinitializer
82  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1>%mask)
83  ret void
84}
85
86define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) {
87; AVX2-LABEL: 'test4'
88; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
89; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
90; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
91;
92; SKL-LABEL: 'test4'
93; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
94; SKL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
95; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
96;
97; KNL-LABEL: 'test4'
98; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
99; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
100; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
101;
102; SKX-LABEL: 'test4'
103; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
104; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
105; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
106;
107  %mask = icmp eq <8 x i32> %trigger, zeroinitializer
108  %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1>%mask, <8 x float>%dst)
109  ret <8 x float> %res
110}
111
112define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
113; AVX2-LABEL: 'test5'
114; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
115; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
116; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
117;
118; SKL-LABEL: 'test5'
119; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
120; SKL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
121; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
122;
123; KNL-LABEL: 'test5'
124; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
125; KNL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
126; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
127;
128; SKX-LABEL: 'test5'
129; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
130; SKX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
131; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
132;
133  %mask = icmp eq <2 x i32> %trigger, zeroinitializer
134  call void @llvm.masked.store.v2f32.p0v2f32(<2 x float>%val, <2 x float>* %addr, i32 4, <2 x i1>%mask)
135  ret void
136}
137
138define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
139; AVX2-LABEL: 'test6'
140; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
141; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
142; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
143;
144; SKL-LABEL: 'test6'
145; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
146; SKL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
147; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
148;
149; KNL-LABEL: 'test6'
150; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
151; KNL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
152; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
153;
154; SKX-LABEL: 'test6'
155; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
156; SKX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
157; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
158;
159  %mask = icmp eq <2 x i32> %trigger, zeroinitializer
160  call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
161  ret void
162}
163
164define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) {
165; AVX2-LABEL: 'test7'
166; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
167; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
168; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
169;
170; SKL-LABEL: 'test7'
171; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
172; SKL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
173; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
174;
175; KNL-LABEL: 'test7'
176; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
177; KNL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
178; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
179;
180; SKX-LABEL: 'test7'
181; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
182; SKX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
183; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
184;
185  %mask = icmp eq <2 x i32> %trigger, zeroinitializer
186  %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst)
187  ret <2 x float> %res
188}
189
190define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
191; AVX2-LABEL: 'test8'
192; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
193; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
194; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
195;
196; SKL-LABEL: 'test8'
197; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
198; SKL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
199; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
200;
201; KNL-LABEL: 'test8'
202; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
203; KNL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
204; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
205;
206; SKX-LABEL: 'test8'
207; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
208; SKX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
209; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
210;
211  %mask = icmp eq <2 x i32> %trigger, zeroinitializer
212  %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)
213  ret <2 x i32> %res
214}
215
216define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %src0)  {
217; AVX2-LABEL: 'test_gather_2f64'
218; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
219; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
220;
221; SKL-LABEL: 'test_gather_2f64'
222; SKL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
223; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
224;
225; KNL-LABEL: 'test_gather_2f64'
226; KNL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
227; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
228;
229; SKX-LABEL: 'test_gather_2f64'
230; SKX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
231; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
232;
233
234
235
236
237%res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
238  ret <2 x double> %res
239}
240declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
241
242define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %src0)  {
243; AVX2-LABEL: 'test_gather_4i32'
244; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
245; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
246;
247; SKL-LABEL: 'test_gather_4i32'
248; SKL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
249; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
250;
251; KNL-LABEL: 'test_gather_4i32'
252; KNL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
253; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
254;
255; SKX-LABEL: 'test_gather_4i32'
256; SKX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
257; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
258;
259
260
261
262
263%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
264  ret <4 x i32> %res
265}
266
267define <4 x i32> @test_gather_4i32_const_mask(<4 x i32*> %ptrs, <4 x i32> %src0)  {
268; AVX2-LABEL: 'test_gather_4i32_const_mask'
269; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
270; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
271;
272; SKL-LABEL: 'test_gather_4i32_const_mask'
273; SKL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
274; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
275;
276; KNL-LABEL: 'test_gather_4i32_const_mask'
277; KNL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
278; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
279;
280; SKX-LABEL: 'test_gather_4i32_const_mask'
281; SKX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
282; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
283;
284
285
286
287
288%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
289  ret <4 x i32> %res
290}
291declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32, <4 x i1> %mask, <4 x i32> %src0)
292
293define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) {
294; AVX2-LABEL: 'test_gather_16f32_const_mask'
295; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
296; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
297; AVX2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
298; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
299;
300; SKL-LABEL: 'test_gather_16f32_const_mask'
301; SKL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
302; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
303; SKL-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
304; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
305;
306; KNL-LABEL: 'test_gather_16f32_const_mask'
307; KNL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
308; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
309; KNL-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
310; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
311;
312; SKX-LABEL: 'test_gather_16f32_const_mask'
313; SKX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
314; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
315; SKX-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
316; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
317;
318  %sext_ind = sext <16 x i32> %ind to <16 x i64>
319  %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
320
321  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
322  ret <16 x float>%res
323}
324
325define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <16 x i1>%mask) {
326; AVX2-LABEL: 'test_gather_16f32_var_mask'
327; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
328; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
329; AVX2-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
330; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
331;
332; SKL-LABEL: 'test_gather_16f32_var_mask'
333; SKL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
334; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
335; SKL-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
336; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
337;
338; KNL-LABEL: 'test_gather_16f32_var_mask'
339; KNL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
340; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
341; KNL-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
342; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
343;
344; SKX-LABEL: 'test_gather_16f32_var_mask'
345; SKX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
346; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
347; SKX-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
348; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
349;
350  %sext_ind = sext <16 x i32> %ind to <16 x i64>
351  %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
352
353  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
354  ret <16 x float>%res
355}
356
357define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i32> %ind, <16 x i1>%mask) {
358; AVX2-LABEL: 'test_gather_16f32_ra_var_mask'
359; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
360; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
361; AVX2-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
362; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
363;
364; SKL-LABEL: 'test_gather_16f32_ra_var_mask'
365; SKL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
366; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
367; SKL-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
368; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
369;
370; KNL-LABEL: 'test_gather_16f32_ra_var_mask'
371; KNL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
372; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
373; KNL-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
374; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
375;
376; SKX-LABEL: 'test_gather_16f32_ra_var_mask'
377; SKX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
378; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
379; SKX-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
380; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
381;
382  %sext_ind = sext <16 x i32> %ind to <16 x i64>
383  %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
384
385  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
386  ret <16 x float>%res
387}
388
389define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind) {
390; AVX2-LABEL: 'test_gather_16f32_const_mask2'
391; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
392; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
393; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
394; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
395; AVX2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
396; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
397;
398; SKL-LABEL: 'test_gather_16f32_const_mask2'
399; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
400; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
401; SKL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
402; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
403; SKL-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
404; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
405;
406; KNL-LABEL: 'test_gather_16f32_const_mask2'
407; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
408; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
409; KNL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
410; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
411; KNL-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
412; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
413;
414; SKX-LABEL: 'test_gather_16f32_const_mask2'
415; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
416; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
417; SKX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
418; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
419; SKX-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
420; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
421;
422  %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
423  %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
424
425  %sext_ind = sext <16 x i32> %ind to <16 x i64>
426  %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
427
428  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
429  ret <16 x float>%res
430}
431
432define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
433; AVX2-LABEL: 'test_scatter_16i32'
434; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
435; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
436; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
437; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
438; AVX2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
439; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
440;
441; SKL-LABEL: 'test_scatter_16i32'
442; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
443; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
444; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
445; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
446; SKL-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
447; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
448;
449; KNL-LABEL: 'test_scatter_16i32'
450; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
451; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
452; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
453; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %imask = bitcast i16 %mask to <16 x i1>
454; KNL-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
455; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
456;
457; SKX-LABEL: 'test_scatter_16i32'
458; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
459; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
460; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
461; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %imask = bitcast i16 %mask to <16 x i1>
462; SKX-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
463; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
464;
465  %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
466  %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
467
468  %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
469  %imask = bitcast i16 %mask to <16 x i1>
470  call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
471  ret void
472}
473
474define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) {
475; AVX2-LABEL: 'test_scatter_8i32'
476; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
477; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
478;
479; SKL-LABEL: 'test_scatter_8i32'
480; SKL-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
481; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
482;
483; KNL-LABEL: 'test_scatter_8i32'
484; KNL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
485; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
486;
487; SKX-LABEL: 'test_scatter_8i32'
488; SKX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
489; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
490;
491
492  call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
493  ret void
494}
495
496declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32, <8 x i1> %mask)
497
498define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
499; AVX2-LABEL: 'test_scatter_4i32'
500; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
501; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
502;
503; SKL-LABEL: 'test_scatter_4i32'
504; SKL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
505; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
506;
507; KNL-LABEL: 'test_scatter_4i32'
508; KNL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
509; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
510;
511; SKX-LABEL: 'test_scatter_4i32'
512; SKX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
513; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
514;
515  call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
516  ret void
517}
518
519define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) {
520; AVX2-LABEL: 'test_gather_4f32'
521; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
522; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
523; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
524; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
525;
526; SKL-LABEL: 'test_gather_4f32'
527; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
528; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
529; SKL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
530; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
531;
532; KNL-LABEL: 'test_gather_4f32'
533; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
534; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
535; KNL-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
536; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
537;
538; SKX-LABEL: 'test_gather_4f32'
539; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
540; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
541; SKX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
542; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
543;
544  %sext_ind = sext <4 x i32> %ind to <4 x i64>
545  %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
546
547  %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
548  ret <4 x float>%res
549}
550
551define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) {
552; AVX2-LABEL: 'test_gather_4f32_const_mask'
553; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
554; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
555; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
556; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
557;
558; SKL-LABEL: 'test_gather_4f32_const_mask'
559; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
560; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
561; SKL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
562; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
563;
564; KNL-LABEL: 'test_gather_4f32_const_mask'
565; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
566; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
567; KNL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
568; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
569;
570; SKX-LABEL: 'test_gather_4f32_const_mask'
571; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
572; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
573; SKX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
574; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
575;
576  %sext_ind = sext <4 x i32> %ind to <4 x i64>
577  %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
578
579  %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
580  ret <4 x float>%res
581}
582
583declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32, <4 x i1> %mask, <4 x float> )
584declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32, <4 x i1> %mask)
585declare void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32, <16 x i1> %imask)
586declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32, <16 x i1> %mask, <16 x float>)
587
588declare <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
589declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
590declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
591declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
592declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>)
593declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
594declare void @llvm.masked.store.v2f32.p0v2f32(<2 x float>, <2 x float>*, i32, <2 x i1>)
595declare void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>, <2 x i32>*, i32, <2 x i1>)
596declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>)
597declare <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>)
598declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>)
599declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
600declare <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>)
601declare <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>)
602declare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)
603declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>)
604declare void @llvm.masked.store.v8f64.p0v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>)
605declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)
606declare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)
607