1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mcpu=skylake -mtriple=i386-unknown-linux-gnu -mattr=+avx2 | FileCheck --check-prefix=X86 %s
3; RUN: llc < %s -mcpu=skylake -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck --check-prefix=X64 %s
4; RUN: llc < %s -mcpu=skx -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2,-avx512f | FileCheck --check-prefix=X64 %s
5; RUN: llc < %s -mcpu=skylake -mtriple=x86_64-unknown-linux-gnu -mattr=-avx2 | FileCheck --check-prefix=NOGATHER %s
6
7declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %ptrs, i32 %align, <2 x i1> %masks, <2 x i32> %passthro)
8
9define <2 x i32> @masked_gather_v2i32(<2 x i32*>* %ptr, <2 x i1> %masks, <2 x i32> %passthro) {
10; X86-LABEL: masked_gather_v2i32:
11; X86:       # %bb.0: # %entry
12; X86-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
13; X86-NEXT:    vpslld $31, %xmm0, %xmm0
14; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
15; X86-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
16; X86-NEXT:    vpgatherdd %xmm0, (,%xmm2), %xmm1
17; X86-NEXT:    vmovdqa %xmm1, %xmm0
18; X86-NEXT:    retl
19;
20; X64-LABEL: masked_gather_v2i32:
21; X64:       # %bb.0: # %entry
22; X64-NEXT:    vmovdqa (%rdi), %xmm2
23; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
24; X64-NEXT:    vpslld $31, %xmm0, %xmm0
25; X64-NEXT:    vpgatherqd %xmm0, (,%xmm2), %xmm1
26; X64-NEXT:    vmovdqa %xmm1, %xmm0
27; X64-NEXT:    retq
28;
29; NOGATHER-LABEL: masked_gather_v2i32:
30; NOGATHER:       # %bb.0: # %entry
31; NOGATHER-NEXT:    vmovdqa (%rdi), %xmm2
32; NOGATHER-NEXT:    vpsllq $63, %xmm0, %xmm0
33; NOGATHER-NEXT:    vmovmskpd %xmm0, %eax
34; NOGATHER-NEXT:    testb $1, %al
35; NOGATHER-NEXT:    jne .LBB0_1
36; NOGATHER-NEXT:  # %bb.2: # %else
37; NOGATHER-NEXT:    testb $2, %al
38; NOGATHER-NEXT:    jne .LBB0_3
39; NOGATHER-NEXT:  .LBB0_4: # %else2
40; NOGATHER-NEXT:    vmovdqa %xmm1, %xmm0
41; NOGATHER-NEXT:    retq
42; NOGATHER-NEXT:  .LBB0_1: # %cond.load
43; NOGATHER-NEXT:    vmovq %xmm2, %rcx
44; NOGATHER-NEXT:    vpinsrd $0, (%rcx), %xmm1, %xmm1
45; NOGATHER-NEXT:    testb $2, %al
46; NOGATHER-NEXT:    je .LBB0_4
47; NOGATHER-NEXT:  .LBB0_3: # %cond.load1
48; NOGATHER-NEXT:    vpextrq $1, %xmm2, %rax
49; NOGATHER-NEXT:    vpinsrd $1, (%rax), %xmm1, %xmm1
50; NOGATHER-NEXT:    vmovdqa %xmm1, %xmm0
51; NOGATHER-NEXT:    retq
52entry:
53  %ld  = load <2 x i32*>, <2 x i32*>* %ptr
54  %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %ld, i32 0, <2 x i1> %masks, <2 x i32> %passthro)
55  ret <2 x i32> %res
56}
57
58define <4 x i32> @masked_gather_v2i32_concat(<2 x i32*>* %ptr, <2 x i1> %masks, <2 x i32> %passthro) {
59; X86-LABEL: masked_gather_v2i32_concat:
60; X86:       # %bb.0: # %entry
61; X86-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
62; X86-NEXT:    vpslld $31, %xmm0, %xmm0
63; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
64; X86-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
65; X86-NEXT:    vpgatherdd %xmm0, (,%xmm2), %xmm1
66; X86-NEXT:    vmovdqa %xmm1, %xmm0
67; X86-NEXT:    retl
68;
69; X64-LABEL: masked_gather_v2i32_concat:
70; X64:       # %bb.0: # %entry
71; X64-NEXT:    vmovdqa (%rdi), %xmm2
72; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
73; X64-NEXT:    vpslld $31, %xmm0, %xmm0
74; X64-NEXT:    vpgatherqd %xmm0, (,%xmm2), %xmm1
75; X64-NEXT:    vmovdqa %xmm1, %xmm0
76; X64-NEXT:    retq
77;
78; NOGATHER-LABEL: masked_gather_v2i32_concat:
79; NOGATHER:       # %bb.0: # %entry
80; NOGATHER-NEXT:    vmovdqa (%rdi), %xmm2
81; NOGATHER-NEXT:    vpsllq $63, %xmm0, %xmm0
82; NOGATHER-NEXT:    vmovmskpd %xmm0, %eax
83; NOGATHER-NEXT:    testb $1, %al
84; NOGATHER-NEXT:    jne .LBB1_1
85; NOGATHER-NEXT:  # %bb.2: # %else
86; NOGATHER-NEXT:    testb $2, %al
87; NOGATHER-NEXT:    jne .LBB1_3
88; NOGATHER-NEXT:  .LBB1_4: # %else2
89; NOGATHER-NEXT:    vmovdqa %xmm1, %xmm0
90; NOGATHER-NEXT:    retq
91; NOGATHER-NEXT:  .LBB1_1: # %cond.load
92; NOGATHER-NEXT:    vmovq %xmm2, %rcx
93; NOGATHER-NEXT:    vpinsrd $0, (%rcx), %xmm1, %xmm1
94; NOGATHER-NEXT:    testb $2, %al
95; NOGATHER-NEXT:    je .LBB1_4
96; NOGATHER-NEXT:  .LBB1_3: # %cond.load1
97; NOGATHER-NEXT:    vpextrq $1, %xmm2, %rax
98; NOGATHER-NEXT:    vpinsrd $1, (%rax), %xmm1, %xmm1
99; NOGATHER-NEXT:    vmovdqa %xmm1, %xmm0
100; NOGATHER-NEXT:    retq
101entry:
102  %ld  = load <2 x i32*>, <2 x i32*>* %ptr
103  %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %ld, i32 0, <2 x i1> %masks, <2 x i32> %passthro)
104  %res2 = shufflevector <2 x i32> %res, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
105  ret <4 x i32> %res2
106}
107
108declare <2 x float> @llvm.masked.gather.v2float(<2 x float*> %ptrs, i32 %align, <2 x i1> %masks, <2 x float> %passthro)
109
110define <2 x float> @masked_gather_v2float(<2 x float*>* %ptr, <2 x i1> %masks, <2 x float> %passthro) {
111; X86-LABEL: masked_gather_v2float:
112; X86:       # %bb.0: # %entry
113; X86-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
114; X86-NEXT:    vpslld $31, %xmm0, %xmm0
115; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
116; X86-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
117; X86-NEXT:    vgatherdps %xmm0, (,%xmm2), %xmm1
118; X86-NEXT:    vmovaps %xmm1, %xmm0
119; X86-NEXT:    retl
120;
121; X64-LABEL: masked_gather_v2float:
122; X64:       # %bb.0: # %entry
123; X64-NEXT:    vmovaps (%rdi), %xmm2
124; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
125; X64-NEXT:    vpslld $31, %xmm0, %xmm0
126; X64-NEXT:    vgatherqps %xmm0, (,%xmm2), %xmm1
127; X64-NEXT:    vmovaps %xmm1, %xmm0
128; X64-NEXT:    retq
129;
130; NOGATHER-LABEL: masked_gather_v2float:
131; NOGATHER:       # %bb.0: # %entry
132; NOGATHER-NEXT:    vmovdqa (%rdi), %xmm2
133; NOGATHER-NEXT:    vpsllq $63, %xmm0, %xmm0
134; NOGATHER-NEXT:    vmovmskpd %xmm0, %eax
135; NOGATHER-NEXT:    testb $1, %al
136; NOGATHER-NEXT:    jne .LBB2_1
137; NOGATHER-NEXT:  # %bb.2: # %else
138; NOGATHER-NEXT:    testb $2, %al
139; NOGATHER-NEXT:    jne .LBB2_3
140; NOGATHER-NEXT:  .LBB2_4: # %else2
141; NOGATHER-NEXT:    vmovaps %xmm1, %xmm0
142; NOGATHER-NEXT:    retq
143; NOGATHER-NEXT:  .LBB2_1: # %cond.load
144; NOGATHER-NEXT:    vmovq %xmm2, %rcx
145; NOGATHER-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
146; NOGATHER-NEXT:    vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
147; NOGATHER-NEXT:    testb $2, %al
148; NOGATHER-NEXT:    je .LBB2_4
149; NOGATHER-NEXT:  .LBB2_3: # %cond.load1
150; NOGATHER-NEXT:    vpextrq $1, %xmm2, %rax
151; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
152; NOGATHER-NEXT:    vmovaps %xmm1, %xmm0
153; NOGATHER-NEXT:    retq
154entry:
155  %ld  = load <2 x float*>, <2 x float*>* %ptr
156  %res = call <2 x float> @llvm.masked.gather.v2float(<2 x float*> %ld, i32 0, <2 x i1> %masks, <2 x float> %passthro)
157  ret <2 x float> %res
158}
159
160define <4 x float> @masked_gather_v2float_concat(<2 x float*>* %ptr, <2 x i1> %masks, <2 x float> %passthro) {
161; X86-LABEL: masked_gather_v2float_concat:
162; X86:       # %bb.0: # %entry
163; X86-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
164; X86-NEXT:    vpslld $31, %xmm0, %xmm0
165; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
166; X86-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
167; X86-NEXT:    vgatherdps %xmm0, (,%xmm2), %xmm1
168; X86-NEXT:    vmovaps %xmm1, %xmm0
169; X86-NEXT:    retl
170;
171; X64-LABEL: masked_gather_v2float_concat:
172; X64:       # %bb.0: # %entry
173; X64-NEXT:    vmovaps (%rdi), %xmm2
174; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
175; X64-NEXT:    vpslld $31, %xmm0, %xmm0
176; X64-NEXT:    vgatherqps %xmm0, (,%xmm2), %xmm1
177; X64-NEXT:    vmovaps %xmm1, %xmm0
178; X64-NEXT:    retq
179;
180; NOGATHER-LABEL: masked_gather_v2float_concat:
181; NOGATHER:       # %bb.0: # %entry
182; NOGATHER-NEXT:    vmovdqa (%rdi), %xmm2
183; NOGATHER-NEXT:    vpsllq $63, %xmm0, %xmm0
184; NOGATHER-NEXT:    vmovmskpd %xmm0, %eax
185; NOGATHER-NEXT:    testb $1, %al
186; NOGATHER-NEXT:    jne .LBB3_1
187; NOGATHER-NEXT:  # %bb.2: # %else
188; NOGATHER-NEXT:    testb $2, %al
189; NOGATHER-NEXT:    jne .LBB3_3
190; NOGATHER-NEXT:  .LBB3_4: # %else2
191; NOGATHER-NEXT:    vmovaps %xmm1, %xmm0
192; NOGATHER-NEXT:    retq
193; NOGATHER-NEXT:  .LBB3_1: # %cond.load
194; NOGATHER-NEXT:    vmovq %xmm2, %rcx
195; NOGATHER-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
196; NOGATHER-NEXT:    vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
197; NOGATHER-NEXT:    testb $2, %al
198; NOGATHER-NEXT:    je .LBB3_4
199; NOGATHER-NEXT:  .LBB3_3: # %cond.load1
200; NOGATHER-NEXT:    vpextrq $1, %xmm2, %rax
201; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
202; NOGATHER-NEXT:    vmovaps %xmm1, %xmm0
203; NOGATHER-NEXT:    retq
204entry:
205  %ld  = load <2 x float*>, <2 x float*>* %ptr
206  %res = call <2 x float> @llvm.masked.gather.v2float(<2 x float*> %ld, i32 0, <2 x i1> %masks, <2 x float> %passthro)
207  %res2 = shufflevector <2 x float> %res, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
208  ret <4 x float> %res2
209}
210
211
212declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 %align, <4 x i1> %masks, <4 x i32> %passthro)
213
214define <4 x i32> @masked_gather_v4i32(<4 x i32*> %ptrs, <4 x i1> %masks, <4 x i32> %passthro) {
215; X86-LABEL: masked_gather_v4i32:
216; X86:       # %bb.0: # %entry
217; X86-NEXT:    vpslld $31, %xmm1, %xmm1
218; X86-NEXT:    vpgatherdd %xmm1, (,%xmm0), %xmm2
219; X86-NEXT:    vmovdqa %xmm2, %xmm0
220; X86-NEXT:    retl
221;
222; X64-LABEL: masked_gather_v4i32:
223; X64:       # %bb.0: # %entry
224; X64-NEXT:    vpslld $31, %xmm1, %xmm1
225; X64-NEXT:    vpgatherqd %xmm1, (,%ymm0), %xmm2
226; X64-NEXT:    vmovdqa %xmm2, %xmm0
227; X64-NEXT:    vzeroupper
228; X64-NEXT:    retq
229;
230; NOGATHER-LABEL: masked_gather_v4i32:
231; NOGATHER:       # %bb.0: # %entry
232; NOGATHER-NEXT:    vpslld $31, %xmm1, %xmm1
233; NOGATHER-NEXT:    vmovmskps %xmm1, %eax
234; NOGATHER-NEXT:    testb $1, %al
235; NOGATHER-NEXT:    je .LBB4_2
236; NOGATHER-NEXT:  # %bb.1: # %cond.load
237; NOGATHER-NEXT:    vmovq %xmm0, %rcx
238; NOGATHER-NEXT:    vpinsrd $0, (%rcx), %xmm2, %xmm2
239; NOGATHER-NEXT:  .LBB4_2: # %else
240; NOGATHER-NEXT:    testb $2, %al
241; NOGATHER-NEXT:    je .LBB4_4
242; NOGATHER-NEXT:  # %bb.3: # %cond.load1
243; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rcx
244; NOGATHER-NEXT:    vpinsrd $1, (%rcx), %xmm2, %xmm2
245; NOGATHER-NEXT:  .LBB4_4: # %else2
246; NOGATHER-NEXT:    vextractf128 $1, %ymm0, %xmm0
247; NOGATHER-NEXT:    testb $4, %al
248; NOGATHER-NEXT:    jne .LBB4_5
249; NOGATHER-NEXT:  # %bb.6: # %else5
250; NOGATHER-NEXT:    testb $8, %al
251; NOGATHER-NEXT:    jne .LBB4_7
252; NOGATHER-NEXT:  .LBB4_8: # %else8
253; NOGATHER-NEXT:    vmovdqa %xmm2, %xmm0
254; NOGATHER-NEXT:    vzeroupper
255; NOGATHER-NEXT:    retq
256; NOGATHER-NEXT:  .LBB4_5: # %cond.load4
257; NOGATHER-NEXT:    vmovq %xmm0, %rcx
258; NOGATHER-NEXT:    vpinsrd $2, (%rcx), %xmm2, %xmm2
259; NOGATHER-NEXT:    testb $8, %al
260; NOGATHER-NEXT:    je .LBB4_8
261; NOGATHER-NEXT:  .LBB4_7: # %cond.load7
262; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
263; NOGATHER-NEXT:    vpinsrd $3, (%rax), %xmm2, %xmm2
264; NOGATHER-NEXT:    vmovdqa %xmm2, %xmm0
265; NOGATHER-NEXT:    vzeroupper
266; NOGATHER-NEXT:    retq
267entry:
268  %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 0, <4 x i1> %masks, <4 x i32> %passthro)
269  ret <4 x i32> %res
270}
271
272declare <4 x float> @llvm.masked.gather.v4float(<4 x float*> %ptrs, i32 %align, <4 x i1> %masks, <4 x float> %passthro)
273
274define <4 x float> @masked_gather_v4float(<4 x float*> %ptrs, <4 x i1> %masks, <4 x float> %passthro) {
275; X86-LABEL: masked_gather_v4float:
276; X86:       # %bb.0: # %entry
277; X86-NEXT:    vpslld $31, %xmm1, %xmm1
278; X86-NEXT:    vgatherdps %xmm1, (,%xmm0), %xmm2
279; X86-NEXT:    vmovaps %xmm2, %xmm0
280; X86-NEXT:    retl
281;
282; X64-LABEL: masked_gather_v4float:
283; X64:       # %bb.0: # %entry
284; X64-NEXT:    vpslld $31, %xmm1, %xmm1
285; X64-NEXT:    vgatherqps %xmm1, (,%ymm0), %xmm2
286; X64-NEXT:    vmovaps %xmm2, %xmm0
287; X64-NEXT:    vzeroupper
288; X64-NEXT:    retq
289;
290; NOGATHER-LABEL: masked_gather_v4float:
291; NOGATHER:       # %bb.0: # %entry
292; NOGATHER-NEXT:    vpslld $31, %xmm1, %xmm1
293; NOGATHER-NEXT:    vmovmskps %xmm1, %eax
294; NOGATHER-NEXT:    testb $1, %al
295; NOGATHER-NEXT:    je .LBB5_2
296; NOGATHER-NEXT:  # %bb.1: # %cond.load
297; NOGATHER-NEXT:    vmovq %xmm0, %rcx
298; NOGATHER-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
299; NOGATHER-NEXT:    vblendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
300; NOGATHER-NEXT:  .LBB5_2: # %else
301; NOGATHER-NEXT:    testb $2, %al
302; NOGATHER-NEXT:    je .LBB5_4
303; NOGATHER-NEXT:  # %bb.3: # %cond.load1
304; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rcx
305; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
306; NOGATHER-NEXT:  .LBB5_4: # %else2
307; NOGATHER-NEXT:    vextractf128 $1, %ymm0, %xmm0
308; NOGATHER-NEXT:    testb $4, %al
309; NOGATHER-NEXT:    jne .LBB5_5
310; NOGATHER-NEXT:  # %bb.6: # %else5
311; NOGATHER-NEXT:    testb $8, %al
312; NOGATHER-NEXT:    jne .LBB5_7
313; NOGATHER-NEXT:  .LBB5_8: # %else8
314; NOGATHER-NEXT:    vmovaps %xmm2, %xmm0
315; NOGATHER-NEXT:    vzeroupper
316; NOGATHER-NEXT:    retq
317; NOGATHER-NEXT:  .LBB5_5: # %cond.load4
318; NOGATHER-NEXT:    vmovq %xmm0, %rcx
319; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
320; NOGATHER-NEXT:    testb $8, %al
321; NOGATHER-NEXT:    je .LBB5_8
322; NOGATHER-NEXT:  .LBB5_7: # %cond.load7
323; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
324; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
325; NOGATHER-NEXT:    vmovaps %xmm2, %xmm0
326; NOGATHER-NEXT:    vzeroupper
327; NOGATHER-NEXT:    retq
328entry:
329  %res = call <4 x float> @llvm.masked.gather.v4float(<4 x float*> %ptrs, i32 0, <4 x i1> %masks, <4 x float> %passthro)
330  ret <4 x float> %res
331}
332
333declare <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptrs, i32 %align, <8 x i1> %masks, <8 x i32> %passthro)
334
335define <8 x i32> @masked_gather_v8i32(<8 x i32*>* %ptr, <8 x i1> %masks, <8 x i32> %passthro) {
336; X86-LABEL: masked_gather_v8i32:
337; X86:       # %bb.0: # %entry
338; X86-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
339; X86-NEXT:    vpslld $31, %ymm0, %ymm0
340; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
341; X86-NEXT:    vmovdqa (%eax), %ymm2
342; X86-NEXT:    vpgatherdd %ymm0, (,%ymm2), %ymm1
343; X86-NEXT:    vmovdqa %ymm1, %ymm0
344; X86-NEXT:    retl
345;
346; X64-LABEL: masked_gather_v8i32:
347; X64:       # %bb.0: # %entry
348; X64-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
349; X64-NEXT:    vpslld $31, %ymm0, %ymm0
350; X64-NEXT:    vmovdqa (%rdi), %ymm2
351; X64-NEXT:    vmovdqa 32(%rdi), %ymm3
352; X64-NEXT:    vextracti128 $1, %ymm1, %xmm4
353; X64-NEXT:    vextracti128 $1, %ymm0, %xmm5
354; X64-NEXT:    vpgatherqd %xmm5, (,%ymm3), %xmm4
355; X64-NEXT:    vpgatherqd %xmm0, (,%ymm2), %xmm1
356; X64-NEXT:    vinserti128 $1, %xmm4, %ymm1, %ymm0
357; X64-NEXT:    retq
358;
359; NOGATHER-LABEL: masked_gather_v8i32:
360; NOGATHER:       # %bb.0: # %entry
361; NOGATHER-NEXT:    vmovdqa (%rdi), %ymm2
362; NOGATHER-NEXT:    vpsllw $15, %xmm0, %xmm0
363; NOGATHER-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
364; NOGATHER-NEXT:    vpmovmskb %xmm0, %eax
365; NOGATHER-NEXT:    testb $1, %al
366; NOGATHER-NEXT:    je .LBB6_2
367; NOGATHER-NEXT:  # %bb.1: # %cond.load
368; NOGATHER-NEXT:    vmovq %xmm2, %rcx
369; NOGATHER-NEXT:    vpinsrd $0, (%rcx), %xmm1, %xmm0
370; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
371; NOGATHER-NEXT:  .LBB6_2: # %else
372; NOGATHER-NEXT:    testb $2, %al
373; NOGATHER-NEXT:    je .LBB6_4
374; NOGATHER-NEXT:  # %bb.3: # %cond.load1
375; NOGATHER-NEXT:    vpextrq $1, %xmm2, %rcx
376; NOGATHER-NEXT:    vpinsrd $1, (%rcx), %xmm1, %xmm0
377; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
378; NOGATHER-NEXT:  .LBB6_4: # %else2
379; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm0
380; NOGATHER-NEXT:    testb $4, %al
381; NOGATHER-NEXT:    je .LBB6_6
382; NOGATHER-NEXT:  # %bb.5: # %cond.load4
383; NOGATHER-NEXT:    vmovq %xmm0, %rcx
384; NOGATHER-NEXT:    vpinsrd $2, (%rcx), %xmm1, %xmm2
385; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
386; NOGATHER-NEXT:  .LBB6_6: # %else5
387; NOGATHER-NEXT:    testb $8, %al
388; NOGATHER-NEXT:    je .LBB6_8
389; NOGATHER-NEXT:  # %bb.7: # %cond.load7
390; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rcx
391; NOGATHER-NEXT:    vpinsrd $3, (%rcx), %xmm1, %xmm0
392; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
393; NOGATHER-NEXT:  .LBB6_8: # %else8
394; NOGATHER-NEXT:    vmovdqa 32(%rdi), %ymm0
395; NOGATHER-NEXT:    testb $16, %al
396; NOGATHER-NEXT:    je .LBB6_10
397; NOGATHER-NEXT:  # %bb.9: # %cond.load10
398; NOGATHER-NEXT:    vmovq %xmm0, %rcx
399; NOGATHER-NEXT:    vextractf128 $1, %ymm1, %xmm2
400; NOGATHER-NEXT:    vpinsrd $0, (%rcx), %xmm2, %xmm2
401; NOGATHER-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
402; NOGATHER-NEXT:  .LBB6_10: # %else11
403; NOGATHER-NEXT:    testb $32, %al
404; NOGATHER-NEXT:    je .LBB6_12
405; NOGATHER-NEXT:  # %bb.11: # %cond.load13
406; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rcx
407; NOGATHER-NEXT:    vextractf128 $1, %ymm1, %xmm2
408; NOGATHER-NEXT:    vpinsrd $1, (%rcx), %xmm2, %xmm2
409; NOGATHER-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
410; NOGATHER-NEXT:  .LBB6_12: # %else14
411; NOGATHER-NEXT:    vextractf128 $1, %ymm0, %xmm0
412; NOGATHER-NEXT:    testb $64, %al
413; NOGATHER-NEXT:    jne .LBB6_13
414; NOGATHER-NEXT:  # %bb.14: # %else17
415; NOGATHER-NEXT:    testb $-128, %al
416; NOGATHER-NEXT:    jne .LBB6_15
417; NOGATHER-NEXT:  .LBB6_16: # %else20
418; NOGATHER-NEXT:    vmovaps %ymm1, %ymm0
419; NOGATHER-NEXT:    retq
420; NOGATHER-NEXT:  .LBB6_13: # %cond.load16
421; NOGATHER-NEXT:    vmovq %xmm0, %rcx
422; NOGATHER-NEXT:    vextractf128 $1, %ymm1, %xmm2
423; NOGATHER-NEXT:    vpinsrd $2, (%rcx), %xmm2, %xmm2
424; NOGATHER-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
425; NOGATHER-NEXT:    testb $-128, %al
426; NOGATHER-NEXT:    je .LBB6_16
427; NOGATHER-NEXT:  .LBB6_15: # %cond.load19
428; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
429; NOGATHER-NEXT:    vextractf128 $1, %ymm1, %xmm0
430; NOGATHER-NEXT:    vpinsrd $3, (%rax), %xmm0, %xmm0
431; NOGATHER-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
432; NOGATHER-NEXT:    vmovaps %ymm1, %ymm0
433; NOGATHER-NEXT:    retq
434entry:
435  %ld  = load <8 x i32*>, <8 x i32*>* %ptr
436  %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ld, i32 0, <8 x i1> %masks, <8 x i32> %passthro)
437  ret <8 x i32> %res
438}
439
440declare <8 x float> @llvm.masked.gather.v8float(<8 x float*> %ptrs, i32 %align, <8 x i1> %masks, <8 x float> %passthro)
441
442define <8 x float> @masked_gather_v8float(<8 x float*>* %ptr, <8 x i1> %masks, <8 x float> %passthro) {
443; X86-LABEL: masked_gather_v8float:
444; X86:       # %bb.0: # %entry
445; X86-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
446; X86-NEXT:    vpslld $31, %ymm0, %ymm0
447; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
448; X86-NEXT:    vmovaps (%eax), %ymm2
449; X86-NEXT:    vgatherdps %ymm0, (,%ymm2), %ymm1
450; X86-NEXT:    vmovaps %ymm1, %ymm0
451; X86-NEXT:    retl
452;
453; X64-LABEL: masked_gather_v8float:
454; X64:       # %bb.0: # %entry
455; X64-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
456; X64-NEXT:    vpslld $31, %ymm0, %ymm0
457; X64-NEXT:    vmovaps (%rdi), %ymm2
458; X64-NEXT:    vmovaps 32(%rdi), %ymm3
459; X64-NEXT:    vextractf128 $1, %ymm1, %xmm4
460; X64-NEXT:    vextracti128 $1, %ymm0, %xmm5
461; X64-NEXT:    vgatherqps %xmm5, (,%ymm3), %xmm4
462; X64-NEXT:    vgatherqps %xmm0, (,%ymm2), %xmm1
463; X64-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm0
464; X64-NEXT:    retq
465;
466; NOGATHER-LABEL: masked_gather_v8float:
467; NOGATHER:       # %bb.0: # %entry
468; NOGATHER-NEXT:    vmovdqa (%rdi), %ymm2
469; NOGATHER-NEXT:    vpsllw $15, %xmm0, %xmm0
470; NOGATHER-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
471; NOGATHER-NEXT:    vpmovmskb %xmm0, %eax
472; NOGATHER-NEXT:    testb $1, %al
473; NOGATHER-NEXT:    je .LBB7_2
474; NOGATHER-NEXT:  # %bb.1: # %cond.load
475; NOGATHER-NEXT:    vmovq %xmm2, %rcx
476; NOGATHER-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
477; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm0[0],ymm1[1,2,3,4,5,6,7]
478; NOGATHER-NEXT:  .LBB7_2: # %else
479; NOGATHER-NEXT:    testb $2, %al
480; NOGATHER-NEXT:    je .LBB7_4
481; NOGATHER-NEXT:  # %bb.3: # %cond.load1
482; NOGATHER-NEXT:    vpextrq $1, %xmm2, %rcx
483; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],mem[0],xmm1[2,3]
484; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
485; NOGATHER-NEXT:  .LBB7_4: # %else2
486; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm0
487; NOGATHER-NEXT:    testb $4, %al
488; NOGATHER-NEXT:    je .LBB7_6
489; NOGATHER-NEXT:  # %bb.5: # %cond.load4
490; NOGATHER-NEXT:    vmovq %xmm0, %rcx
491; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm2 = xmm1[0,1],mem[0],xmm1[3]
492; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
493; NOGATHER-NEXT:  .LBB7_6: # %else5
494; NOGATHER-NEXT:    testb $8, %al
495; NOGATHER-NEXT:    je .LBB7_8
496; NOGATHER-NEXT:  # %bb.7: # %cond.load7
497; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rcx
498; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],mem[0]
499; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
500; NOGATHER-NEXT:  .LBB7_8: # %else8
501; NOGATHER-NEXT:    vmovdqa 32(%rdi), %ymm0
502; NOGATHER-NEXT:    testb $16, %al
503; NOGATHER-NEXT:    je .LBB7_10
504; NOGATHER-NEXT:  # %bb.9: # %cond.load10
505; NOGATHER-NEXT:    vmovq %xmm0, %rcx
506; NOGATHER-NEXT:    vextractf128 $1, %ymm1, %xmm2
507; NOGATHER-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
508; NOGATHER-NEXT:    vblendps {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3]
509; NOGATHER-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
510; NOGATHER-NEXT:  .LBB7_10: # %else11
511; NOGATHER-NEXT:    testb $32, %al
512; NOGATHER-NEXT:    je .LBB7_12
513; NOGATHER-NEXT:  # %bb.11: # %cond.load13
514; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rcx
515; NOGATHER-NEXT:    vextractf128 $1, %ymm1, %xmm2
516; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
517; NOGATHER-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
518; NOGATHER-NEXT:  .LBB7_12: # %else14
519; NOGATHER-NEXT:    vextractf128 $1, %ymm0, %xmm0
520; NOGATHER-NEXT:    testb $64, %al
521; NOGATHER-NEXT:    jne .LBB7_13
522; NOGATHER-NEXT:  # %bb.14: # %else17
523; NOGATHER-NEXT:    testb $-128, %al
524; NOGATHER-NEXT:    jne .LBB7_15
525; NOGATHER-NEXT:  .LBB7_16: # %else20
526; NOGATHER-NEXT:    vmovaps %ymm1, %ymm0
527; NOGATHER-NEXT:    retq
528; NOGATHER-NEXT:  .LBB7_13: # %cond.load16
529; NOGATHER-NEXT:    vmovq %xmm0, %rcx
530; NOGATHER-NEXT:    vextractf128 $1, %ymm1, %xmm2
531; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
532; NOGATHER-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
533; NOGATHER-NEXT:    testb $-128, %al
534; NOGATHER-NEXT:    je .LBB7_16
535; NOGATHER-NEXT:  .LBB7_15: # %cond.load19
536; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
537; NOGATHER-NEXT:    vextractf128 $1, %ymm1, %xmm0
538; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
539; NOGATHER-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
540; NOGATHER-NEXT:    vmovaps %ymm1, %ymm0
541; NOGATHER-NEXT:    retq
542entry:
543  %ld  = load <8 x float*>, <8 x float*>* %ptr
544  %res = call <8 x float> @llvm.masked.gather.v8float(<8 x float*> %ld, i32 0, <8 x i1> %masks, <8 x float> %passthro)
545  ret <8 x float> %res
546}
547
548declare <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %ptrs, i32 %align, <4 x i1> %masks, <4 x i64> %passthro)
549
550define <4 x i64> @masked_gather_v4i64(<4 x i64*>* %ptr, <4 x i1> %masks, <4 x i64> %passthro) {
551; X86-LABEL: masked_gather_v4i64:
552; X86:       # %bb.0: # %entry
553; X86-NEXT:    vpslld $31, %xmm0, %xmm0
554; X86-NEXT:    vpmovsxdq %xmm0, %ymm0
555; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
556; X86-NEXT:    vmovdqa (%eax), %xmm2
557; X86-NEXT:    vpgatherdq %ymm0, (,%xmm2), %ymm1
558; X86-NEXT:    vmovdqa %ymm1, %ymm0
559; X86-NEXT:    retl
560;
561; X64-LABEL: masked_gather_v4i64:
562; X64:       # %bb.0: # %entry
563; X64-NEXT:    vpslld $31, %xmm0, %xmm0
564; X64-NEXT:    vpmovsxdq %xmm0, %ymm0
565; X64-NEXT:    vmovdqa (%rdi), %ymm2
566; X64-NEXT:    vpgatherqq %ymm0, (,%ymm2), %ymm1
567; X64-NEXT:    vmovdqa %ymm1, %ymm0
568; X64-NEXT:    retq
569;
570; NOGATHER-LABEL: masked_gather_v4i64:
571; NOGATHER:       # %bb.0: # %entry
572; NOGATHER-NEXT:    vmovdqa (%rdi), %ymm2
573; NOGATHER-NEXT:    vpslld $31, %xmm0, %xmm0
574; NOGATHER-NEXT:    vmovmskps %xmm0, %eax
575; NOGATHER-NEXT:    testb $1, %al
576; NOGATHER-NEXT:    je .LBB8_2
577; NOGATHER-NEXT:  # %bb.1: # %cond.load
578; NOGATHER-NEXT:    vmovq %xmm2, %rcx
579; NOGATHER-NEXT:    vpinsrq $0, (%rcx), %xmm1, %xmm0
580; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
581; NOGATHER-NEXT:  .LBB8_2: # %else
582; NOGATHER-NEXT:    testb $2, %al
583; NOGATHER-NEXT:    je .LBB8_4
584; NOGATHER-NEXT:  # %bb.3: # %cond.load1
585; NOGATHER-NEXT:    vpextrq $1, %xmm2, %rcx
586; NOGATHER-NEXT:    vpinsrq $1, (%rcx), %xmm1, %xmm0
587; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
588; NOGATHER-NEXT:  .LBB8_4: # %else2
589; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm0
590; NOGATHER-NEXT:    testb $4, %al
591; NOGATHER-NEXT:    jne .LBB8_5
592; NOGATHER-NEXT:  # %bb.6: # %else5
593; NOGATHER-NEXT:    testb $8, %al
594; NOGATHER-NEXT:    jne .LBB8_7
595; NOGATHER-NEXT:  .LBB8_8: # %else8
596; NOGATHER-NEXT:    vmovaps %ymm1, %ymm0
597; NOGATHER-NEXT:    retq
598; NOGATHER-NEXT:  .LBB8_5: # %cond.load4
599; NOGATHER-NEXT:    vmovq %xmm0, %rcx
600; NOGATHER-NEXT:    vextractf128 $1, %ymm1, %xmm2
601; NOGATHER-NEXT:    vpinsrq $0, (%rcx), %xmm2, %xmm2
602; NOGATHER-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
603; NOGATHER-NEXT:    testb $8, %al
604; NOGATHER-NEXT:    je .LBB8_8
605; NOGATHER-NEXT:  .LBB8_7: # %cond.load7
606; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
607; NOGATHER-NEXT:    vextractf128 $1, %ymm1, %xmm0
608; NOGATHER-NEXT:    vpinsrq $1, (%rax), %xmm0, %xmm0
609; NOGATHER-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
610; NOGATHER-NEXT:    vmovaps %ymm1, %ymm0
611; NOGATHER-NEXT:    retq
612entry:
613  %ld  = load <4 x i64*>, <4 x i64*>* %ptr
614  %res = call <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %ld, i32 0, <4 x i1> %masks, <4 x i64> %passthro)
615  ret <4 x i64> %res
616}
617
618declare <4 x double> @llvm.masked.gather.v4double(<4 x double*> %ptrs, i32 %align, <4 x i1> %masks, <4 x double> %passthro)
619
620define <4 x double> @masked_gather_v4double(<4 x double*>* %ptr, <4 x i1> %masks, <4 x double> %passthro) {
621; X86-LABEL: masked_gather_v4double:
622; X86:       # %bb.0: # %entry
623; X86-NEXT:    vpslld $31, %xmm0, %xmm0
624; X86-NEXT:    vpmovsxdq %xmm0, %ymm0
625; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
626; X86-NEXT:    vmovapd (%eax), %xmm2
627; X86-NEXT:    vgatherdpd %ymm0, (,%xmm2), %ymm1
628; X86-NEXT:    vmovapd %ymm1, %ymm0
629; X86-NEXT:    retl
630;
631; X64-LABEL: masked_gather_v4double:
632; X64:       # %bb.0: # %entry
633; X64-NEXT:    vpslld $31, %xmm0, %xmm0
634; X64-NEXT:    vpmovsxdq %xmm0, %ymm0
635; X64-NEXT:    vmovapd (%rdi), %ymm2
636; X64-NEXT:    vgatherqpd %ymm0, (,%ymm2), %ymm1
637; X64-NEXT:    vmovapd %ymm1, %ymm0
638; X64-NEXT:    retq
639;
640; NOGATHER-LABEL: masked_gather_v4double:
641; NOGATHER:       # %bb.0: # %entry
642; NOGATHER-NEXT:    vmovdqa (%rdi), %ymm2
643; NOGATHER-NEXT:    vpslld $31, %xmm0, %xmm0
644; NOGATHER-NEXT:    vmovmskps %xmm0, %eax
645; NOGATHER-NEXT:    testb $1, %al
646; NOGATHER-NEXT:    je .LBB9_2
647; NOGATHER-NEXT:  # %bb.1: # %cond.load
648; NOGATHER-NEXT:    vmovq %xmm2, %rcx
649; NOGATHER-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
650; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm0[0,1],ymm1[2,3,4,5,6,7]
651; NOGATHER-NEXT:  .LBB9_2: # %else
652; NOGATHER-NEXT:    testb $2, %al
653; NOGATHER-NEXT:    je .LBB9_4
654; NOGATHER-NEXT:  # %bb.3: # %cond.load1
655; NOGATHER-NEXT:    vpextrq $1, %xmm2, %rcx
656; NOGATHER-NEXT:    vmovhps {{.*#+}} xmm0 = xmm1[0,1],mem[0,1]
657; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
658; NOGATHER-NEXT:  .LBB9_4: # %else2
659; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm0
660; NOGATHER-NEXT:    testb $4, %al
661; NOGATHER-NEXT:    jne .LBB9_5
662; NOGATHER-NEXT:  # %bb.6: # %else5
663; NOGATHER-NEXT:    testb $8, %al
664; NOGATHER-NEXT:    jne .LBB9_7
665; NOGATHER-NEXT:  .LBB9_8: # %else8
666; NOGATHER-NEXT:    vmovaps %ymm1, %ymm0
667; NOGATHER-NEXT:    retq
668; NOGATHER-NEXT:  .LBB9_5: # %cond.load4
669; NOGATHER-NEXT:    vmovq %xmm0, %rcx
670; NOGATHER-NEXT:    vextractf128 $1, %ymm1, %xmm2
671; NOGATHER-NEXT:    vmovlps {{.*#+}} xmm2 = mem[0,1],xmm2[2,3]
672; NOGATHER-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
673; NOGATHER-NEXT:    testb $8, %al
674; NOGATHER-NEXT:    je .LBB9_8
675; NOGATHER-NEXT:  .LBB9_7: # %cond.load7
676; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
677; NOGATHER-NEXT:    vextractf128 $1, %ymm1, %xmm0
678; NOGATHER-NEXT:    vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
679; NOGATHER-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
680; NOGATHER-NEXT:    vmovaps %ymm1, %ymm0
681; NOGATHER-NEXT:    retq
682entry:
683  %ld  = load <4 x double*>, <4 x double*>* %ptr
684  %res = call <4 x double> @llvm.masked.gather.v4double(<4 x double*> %ld, i32 0, <4 x i1> %masks, <4 x double> %passthro)
685  ret <4 x double> %res
686}
687
688declare <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %ptrs, i32 %align, <2 x i1> %masks, <2 x i64> %passthro)
689
690define <2 x i64> @masked_gather_v2i64(<2 x i64*>* %ptr, <2 x i1> %masks, <2 x i64> %passthro) {
691; X86-LABEL: masked_gather_v2i64:
692; X86:       # %bb.0: # %entry
693; X86-NEXT:    vpsllq $63, %xmm0, %xmm0
694; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
695; X86-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
696; X86-NEXT:    vpgatherdq %xmm0, (,%xmm2), %xmm1
697; X86-NEXT:    vmovdqa %xmm1, %xmm0
698; X86-NEXT:    retl
699;
700; X64-LABEL: masked_gather_v2i64:
701; X64:       # %bb.0: # %entry
702; X64-NEXT:    vpsllq $63, %xmm0, %xmm0
703; X64-NEXT:    vmovdqa (%rdi), %xmm2
704; X64-NEXT:    vpgatherqq %xmm0, (,%xmm2), %xmm1
705; X64-NEXT:    vmovdqa %xmm1, %xmm0
706; X64-NEXT:    retq
707;
708; NOGATHER-LABEL: masked_gather_v2i64:
709; NOGATHER:       # %bb.0: # %entry
710; NOGATHER-NEXT:    vmovdqa (%rdi), %xmm2
711; NOGATHER-NEXT:    vpsllq $63, %xmm0, %xmm0
712; NOGATHER-NEXT:    vmovmskpd %xmm0, %eax
713; NOGATHER-NEXT:    testb $1, %al
714; NOGATHER-NEXT:    jne .LBB10_1
715; NOGATHER-NEXT:  # %bb.2: # %else
716; NOGATHER-NEXT:    testb $2, %al
717; NOGATHER-NEXT:    jne .LBB10_3
718; NOGATHER-NEXT:  .LBB10_4: # %else2
719; NOGATHER-NEXT:    vmovdqa %xmm1, %xmm0
720; NOGATHER-NEXT:    retq
721; NOGATHER-NEXT:  .LBB10_1: # %cond.load
722; NOGATHER-NEXT:    vmovq %xmm2, %rcx
723; NOGATHER-NEXT:    vpinsrq $0, (%rcx), %xmm1, %xmm1
724; NOGATHER-NEXT:    testb $2, %al
725; NOGATHER-NEXT:    je .LBB10_4
726; NOGATHER-NEXT:  .LBB10_3: # %cond.load1
727; NOGATHER-NEXT:    vpextrq $1, %xmm2, %rax
728; NOGATHER-NEXT:    vpinsrq $1, (%rax), %xmm1, %xmm1
729; NOGATHER-NEXT:    vmovdqa %xmm1, %xmm0
730; NOGATHER-NEXT:    retq
731entry:
732  %ld  = load <2 x i64*>, <2 x i64*>* %ptr
733  %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %ld, i32 0, <2 x i1> %masks, <2 x i64> %passthro)
734  ret <2 x i64> %res
735}
736
737declare <2 x double> @llvm.masked.gather.v2double(<2 x double*> %ptrs, i32 %align, <2 x i1> %masks, <2 x double> %passthro)
738
739define <2 x double> @masked_gather_v2double(<2 x double*>* %ptr, <2 x i1> %masks, <2 x double> %passthro) {
740; X86-LABEL: masked_gather_v2double:
741; X86:       # %bb.0: # %entry
742; X86-NEXT:    vpsllq $63, %xmm0, %xmm0
743; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
744; X86-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
745; X86-NEXT:    vgatherdpd %xmm0, (,%xmm2), %xmm1
746; X86-NEXT:    vmovapd %xmm1, %xmm0
747; X86-NEXT:    retl
748;
749; X64-LABEL: masked_gather_v2double:
750; X64:       # %bb.0: # %entry
751; X64-NEXT:    vpsllq $63, %xmm0, %xmm0
752; X64-NEXT:    vmovapd (%rdi), %xmm2
753; X64-NEXT:    vgatherqpd %xmm0, (,%xmm2), %xmm1
754; X64-NEXT:    vmovapd %xmm1, %xmm0
755; X64-NEXT:    retq
756;
757; NOGATHER-LABEL: masked_gather_v2double:
758; NOGATHER:       # %bb.0: # %entry
759; NOGATHER-NEXT:    vmovdqa (%rdi), %xmm2
760; NOGATHER-NEXT:    vpsllq $63, %xmm0, %xmm0
761; NOGATHER-NEXT:    vmovmskpd %xmm0, %eax
762; NOGATHER-NEXT:    testb $1, %al
763; NOGATHER-NEXT:    jne .LBB11_1
764; NOGATHER-NEXT:  # %bb.2: # %else
765; NOGATHER-NEXT:    testb $2, %al
766; NOGATHER-NEXT:    jne .LBB11_3
767; NOGATHER-NEXT:  .LBB11_4: # %else2
768; NOGATHER-NEXT:    vmovaps %xmm1, %xmm0
769; NOGATHER-NEXT:    retq
770; NOGATHER-NEXT:  .LBB11_1: # %cond.load
771; NOGATHER-NEXT:    vmovq %xmm2, %rcx
772; NOGATHER-NEXT:    vmovlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
773; NOGATHER-NEXT:    testb $2, %al
774; NOGATHER-NEXT:    je .LBB11_4
775; NOGATHER-NEXT:  .LBB11_3: # %cond.load1
776; NOGATHER-NEXT:    vpextrq $1, %xmm2, %rax
777; NOGATHER-NEXT:    vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
778; NOGATHER-NEXT:    vmovaps %xmm1, %xmm0
779; NOGATHER-NEXT:    retq
780entry:
781  %ld  = load <2 x double*>, <2 x double*>* %ptr
782  %res = call <2 x double> @llvm.masked.gather.v2double(<2 x double*> %ld, i32 0, <2 x i1> %masks, <2 x double> %passthro)
783  ret <2 x double> %res
784}
785
786
787define <2 x double> @masked_gather_zeromask(<2 x double*>* %ptr, <2 x double> %dummy, <2 x double> %passthru) {
788; X86-LABEL: masked_gather_zeromask:
789; X86:       # %bb.0: # %entry
790; X86-NEXT:    vmovaps %xmm1, %xmm0
791; X86-NEXT:    retl
792;
793; X64-LABEL: masked_gather_zeromask:
794; X64:       # %bb.0: # %entry
795; X64-NEXT:    vmovaps %xmm1, %xmm0
796; X64-NEXT:    retq
797;
798; NOGATHER-LABEL: masked_gather_zeromask:
799; NOGATHER:       # %bb.0: # %entry
800; NOGATHER-NEXT:    vmovaps %xmm1, %xmm0
801; NOGATHER-NEXT:    retq
802entry:
803  %ld  = load <2 x double*>, <2 x double*>* %ptr
804  %res = call <2 x double> @llvm.masked.gather.v2double(<2 x double*> %ld, i32 0, <2 x i1> zeroinitializer, <2 x double> %passthru)
805  ret <2 x double> %res
806}
807