1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,AVX,X86-AVX
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,AVX512VL,X86-AVX512VL
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,AVX,X64-AVX
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,AVX512VL,X64-AVX512VL
6
7; We don't check any vinsertf128 variant with immediate 0 because that's just a blend.
8
9define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
10; AVX-LABEL: test_x86_avx_sqrt_pd_256:
11; AVX:       # %bb.0:
12; AVX-NEXT:    vsqrtpd %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x51,0xc0]
13; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
14;
15; AVX512VL-LABEL: test_x86_avx_sqrt_pd_256:
16; AVX512VL:       # %bb.0:
17; AVX512VL-NEXT:    vsqrtpd %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x51,0xc0]
18; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
19  %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
20  ret <4 x double> %res
21}
22declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
23
24define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
25; AVX-LABEL: test_x86_avx_sqrt_ps_256:
26; AVX:       # %bb.0:
27; AVX-NEXT:    vsqrtps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x51,0xc0]
28; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
29;
30; AVX512VL-LABEL: test_x86_avx_sqrt_ps_256:
31; AVX512VL:       # %bb.0:
32; AVX512VL-NEXT:    vsqrtps %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x51,0xc0]
33; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
34  %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
35  ret <8 x float> %res
36}
37declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
38
39define <4 x double> @test_x86_avx_vinsertf128_pd_256_1(<4 x double> %a0, <2 x double> %a1) {
40; AVX-LABEL: test_x86_avx_vinsertf128_pd_256_1:
41; AVX:       # %bb.0:
42; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
43; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
44;
45; AVX512VL-LABEL: test_x86_avx_vinsertf128_pd_256_1:
46; AVX512VL:       # %bb.0:
47; AVX512VL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
48; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
49  %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 1)
50  ret <4 x double> %res
51}
52declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
53
54define <8 x float> @test_x86_avx_vinsertf128_ps_256_1(<8 x float> %a0, <4 x float> %a1) {
55; AVX-LABEL: test_x86_avx_vinsertf128_ps_256_1:
56; AVX:       # %bb.0:
57; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
58; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
59;
60; AVX512VL-LABEL: test_x86_avx_vinsertf128_ps_256_1:
61; AVX512VL:       # %bb.0:
62; AVX512VL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
63; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
64  %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 1)
65  ret <8 x float> %res
66}
67declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone
68
69define <8 x i32> @test_x86_avx_vinsertf128_si_256_1(<8 x i32> %a0, <4 x i32> %a1) {
70; AVX-LABEL: test_x86_avx_vinsertf128_si_256_1:
71; AVX:       # %bb.0:
72; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
73; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
74;
75; AVX512VL-LABEL: test_x86_avx_vinsertf128_si_256_1:
76; AVX512VL:       # %bb.0:
77; AVX512VL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
78; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
79  %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 1)
80  ret <8 x i32> %res
81}
82
83; Verify that high bits of the immediate are masked off. This should be the equivalent
84; of a vinsertf128 $0 which should be optimized into a blend, so just check that it's
85; not a vinsertf128 $1.
86define <8 x i32> @test_x86_avx_vinsertf128_si_256_2(<8 x i32> %a0, <4 x i32> %a1) {
87; CHECK-LABEL: test_x86_avx_vinsertf128_si_256_2:
88; CHECK:       # %bb.0:
89; CHECK-NEXT:    # kill: def $xmm1 killed $xmm1 def $ymm1
90; CHECK-NEXT:    vblendps $240, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x0c,0xc0,0xf0]
91; CHECK-NEXT:    # ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
92; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
93  %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 2)
94  ret <8 x i32> %res
95}
96declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone
97
98; We don't check any vextractf128 variant with immediate 0 because that's just a move.
99
100define <2 x double> @test_x86_avx_vextractf128_pd_256_1(<4 x double> %a0) {
101; AVX-LABEL: test_x86_avx_vextractf128_pd_256_1:
102; AVX:       # %bb.0:
103; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
104; AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
105; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
106;
107; AVX512VL-LABEL: test_x86_avx_vextractf128_pd_256_1:
108; AVX512VL:       # %bb.0:
109; AVX512VL-NEXT:    vextractf128 $1, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
110; AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
111; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
112  %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 1)
113  ret <2 x double> %res
114}
115declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
116
117define <4 x float> @test_x86_avx_vextractf128_ps_256_1(<8 x float> %a0) {
118; AVX-LABEL: test_x86_avx_vextractf128_ps_256_1:
119; AVX:       # %bb.0:
120; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
121; AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
122; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
123;
124; AVX512VL-LABEL: test_x86_avx_vextractf128_ps_256_1:
125; AVX512VL:       # %bb.0:
126; AVX512VL-NEXT:    vextractf128 $1, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
127; AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
128; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
129  %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 1)
130  ret <4 x float> %res
131}
132declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
133
134define <4 x i32> @test_x86_avx_vextractf128_si_256_1(<8 x i32> %a0) {
135; AVX-LABEL: test_x86_avx_vextractf128_si_256_1:
136; AVX:       # %bb.0:
137; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
138; AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
139; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
140;
141; AVX512VL-LABEL: test_x86_avx_vextractf128_si_256_1:
142; AVX512VL:       # %bb.0:
143; AVX512VL-NEXT:    vextractf128 $1, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
144; AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
145; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
146  %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 1)
147  ret <4 x i32> %res
148}
149declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone
150
151; Verify that high bits of the immediate are masked off. This should be the equivalent
152; of a vextractf128 $0 which should be optimized away, so just check that it's
153; not a vextractf128 of any kind.
154define <2 x double> @test_x86_avx_extractf128_pd_256_2(<4 x double> %a0) {
155; CHECK-LABEL: test_x86_avx_extractf128_pd_256_2:
156; CHECK:       # %bb.0:
157; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
158; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
159; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
160  %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 2)
161  ret <2 x double> %res
162}
163
164
165define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
166; X86-AVX-LABEL: test_x86_avx_vbroadcastf128_pd_256:
167; X86-AVX:       # %bb.0:
168; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
169; X86-AVX-NEXT:    vbroadcastf128 (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x00]
170; X86-AVX-NEXT:    # ymm0 = mem[0,1,0,1]
171; X86-AVX-NEXT:    retl # encoding: [0xc3]
172;
173; X86-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_pd_256:
174; X86-AVX512VL:       # %bb.0:
175; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
176; X86-AVX512VL-NEXT:    vbroadcastf128 (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x00]
177; X86-AVX512VL-NEXT:    # ymm0 = mem[0,1,0,1]
178; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
179;
180; X64-AVX-LABEL: test_x86_avx_vbroadcastf128_pd_256:
181; X64-AVX:       # %bb.0:
182; X64-AVX-NEXT:    vbroadcastf128 (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x07]
183; X64-AVX-NEXT:    # ymm0 = mem[0,1,0,1]
184; X64-AVX-NEXT:    retq # encoding: [0xc3]
185;
186; X64-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_pd_256:
187; X64-AVX512VL:       # %bb.0:
188; X64-AVX512VL-NEXT:    vbroadcastf128 (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x07]
189; X64-AVX512VL-NEXT:    # ymm0 = mem[0,1,0,1]
190; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
191  %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
192  ret <4 x double> %res
193}
194declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly
195
196
197define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
198; X86-AVX-LABEL: test_x86_avx_vbroadcastf128_ps_256:
199; X86-AVX:       # %bb.0:
200; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
201; X86-AVX-NEXT:    vbroadcastf128 (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x00]
202; X86-AVX-NEXT:    # ymm0 = mem[0,1,0,1]
203; X86-AVX-NEXT:    retl # encoding: [0xc3]
204;
205; X86-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_ps_256:
206; X86-AVX512VL:       # %bb.0:
207; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
208; X86-AVX512VL-NEXT:    vbroadcastf128 (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x00]
209; X86-AVX512VL-NEXT:    # ymm0 = mem[0,1,0,1]
210; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
211;
212; X64-AVX-LABEL: test_x86_avx_vbroadcastf128_ps_256:
213; X64-AVX:       # %bb.0:
214; X64-AVX-NEXT:    vbroadcastf128 (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x07]
215; X64-AVX-NEXT:    # ymm0 = mem[0,1,0,1]
216; X64-AVX-NEXT:    retq # encoding: [0xc3]
217;
218; X64-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_ps_256:
219; X64-AVX512VL:       # %bb.0:
220; X64-AVX512VL-NEXT:    vbroadcastf128 (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x07]
221; X64-AVX512VL-NEXT:    # ymm0 = mem[0,1,0,1]
222; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
223  %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
224  ret <8 x float> %res
225}
226declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
227
228
229define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
230; CHECK-LABEL: test_x86_avx_blend_pd_256:
231; CHECK:       # %bb.0:
232; CHECK-NEXT:    vblendps $192, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x0c,0xc0,0xc0]
233; CHECK-NEXT:    # ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
234; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
235  %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1]
236  ret <4 x double> %res
237}
238declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone
239
240
241define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
242; CHECK-LABEL: test_x86_avx_blend_ps_256:
243; CHECK:       # %bb.0:
244; CHECK-NEXT:    vblendps $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0x07]
245; CHECK-NEXT:    # ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
246; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
247  %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
248  ret <8 x float> %res
249}
250declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
251
252
253define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
254; CHECK-LABEL: test_x86_avx_dp_ps_256:
255; CHECK:       # %bb.0:
256; CHECK-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x40,0xc1,0x07]
257; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
258  %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
259  ret <8 x float> %res
260}
261declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
262
263
264define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
265; AVX-LABEL: test_x86_sse2_psll_dq:
266; AVX:       # %bb.0:
267; AVX-NEXT:    vpslldq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf8,0x01]
268; AVX-NEXT:    # xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
269; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
270;
271; AVX512VL-LABEL: test_x86_sse2_psll_dq:
272; AVX512VL:       # %bb.0:
273; AVX512VL-NEXT:    vpslldq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x01]
274; AVX512VL-NEXT:    # xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
275; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
276  %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
277  ret <2 x i64> %res
278}
279declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
280
281
282define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
283; AVX-LABEL: test_x86_sse2_psrl_dq:
284; AVX:       # %bb.0:
285; AVX-NEXT:    vpsrldq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd8,0x01]
286; AVX-NEXT:    # xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
287; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
288;
289; AVX512VL-LABEL: test_x86_sse2_psrl_dq:
290; AVX512VL:       # %bb.0:
291; AVX512VL-NEXT:    vpsrldq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x01]
292; AVX512VL-NEXT:    # xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
293; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
294  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
295  ret <2 x i64> %res
296}
297declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
298
299
300define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
301; CHECK-LABEL: test_x86_sse41_blendpd:
302; CHECK:       # %bb.0:
303; CHECK-NEXT:    vblendps $3, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x03]
304; CHECK-NEXT:    # xmm0 = xmm0[0,1],xmm1[2,3]
305; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
306  %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 2) ; <<2 x double>> [#uses=1]
307  ret <2 x double> %res
308}
309declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone
310
311
312define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
313; CHECK-LABEL: test_x86_sse41_blendps:
314; CHECK:       # %bb.0:
315; CHECK-NEXT:    vblendps $8, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x08]
316; CHECK-NEXT:    # xmm0 = xmm1[0,1,2],xmm0[3]
317; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
318  %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
319  ret <4 x float> %res
320}
321declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
322
323
324define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
325; CHECK-LABEL: test_x86_sse41_pblendw:
326; CHECK:       # %bb.0:
327; CHECK-NEXT:    vpblendw $7, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x07]
328; CHECK-NEXT:    # xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7]
329; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
330  %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7) ; <<8 x i16>> [#uses=1]
331  ret <8 x i16> %res
332}
333declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
334
335
336define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
337; AVX-LABEL: test_x86_sse41_pmovsxbd:
338; AVX:       # %bb.0:
339; AVX-NEXT:    vpmovsxbd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x21,0xc0]
340; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
341;
342; AVX512VL-LABEL: test_x86_sse41_pmovsxbd:
343; AVX512VL:       # %bb.0:
344; AVX512VL-NEXT:    vpmovsxbd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0xc0]
345; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
346  %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
347  ret <4 x i32> %res
348}
349declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
350
351
352define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
353; AVX-LABEL: test_x86_sse41_pmovsxbq:
354; AVX:       # %bb.0:
355; AVX-NEXT:    vpmovsxbq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x22,0xc0]
356; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
357;
358; AVX512VL-LABEL: test_x86_sse41_pmovsxbq:
359; AVX512VL:       # %bb.0:
360; AVX512VL-NEXT:    vpmovsxbq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0xc0]
361; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
362  %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
363  ret <2 x i64> %res
364}
365declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
366
367
368define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
369; AVX-LABEL: test_x86_sse41_pmovsxbw:
370; AVX:       # %bb.0:
371; AVX-NEXT:    vpmovsxbw %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x20,0xc0]
372; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
373;
374; AVX512VL-LABEL: test_x86_sse41_pmovsxbw:
375; AVX512VL:       # %bb.0:
376; AVX512VL-NEXT:    vpmovsxbw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x20,0xc0]
377; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
378  %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
379  ret <8 x i16> %res
380}
381declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
382
383
384define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
385; AVX-LABEL: test_x86_sse41_pmovsxdq:
386; AVX:       # %bb.0:
387; AVX-NEXT:    vpmovsxdq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x25,0xc0]
388; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
389;
390; AVX512VL-LABEL: test_x86_sse41_pmovsxdq:
391; AVX512VL:       # %bb.0:
392; AVX512VL-NEXT:    vpmovsxdq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x25,0xc0]
393; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
394  %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
395  ret <2 x i64> %res
396}
397declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
398
399
400define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
401; AVX-LABEL: test_x86_sse41_pmovsxwd:
402; AVX:       # %bb.0:
403; AVX-NEXT:    vpmovsxwd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x23,0xc0]
404; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
405;
406; AVX512VL-LABEL: test_x86_sse41_pmovsxwd:
407; AVX512VL:       # %bb.0:
408; AVX512VL-NEXT:    vpmovsxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x23,0xc0]
409; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
410  %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
411  ret <4 x i32> %res
412}
413declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
414
415
416define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
417; AVX-LABEL: test_x86_sse41_pmovsxwq:
418; AVX:       # %bb.0:
419; AVX-NEXT:    vpmovsxwq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x24,0xc0]
420; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
421;
422; AVX512VL-LABEL: test_x86_sse41_pmovsxwq:
423; AVX512VL:       # %bb.0:
424; AVX512VL-NEXT:    vpmovsxwq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x24,0xc0]
425; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
426  %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
427  ret <2 x i64> %res
428}
429declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
430
431
432define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
433; AVX-LABEL: test_x86_sse41_pmovzxbd:
434; AVX:       # %bb.0:
435; AVX-NEXT:    vpmovzxbd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x31,0xc0]
436; AVX-NEXT:    # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
437; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
438;
439; AVX512VL-LABEL: test_x86_sse41_pmovzxbd:
440; AVX512VL:       # %bb.0:
441; AVX512VL-NEXT:    vpmovzxbd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x31,0xc0]
442; AVX512VL-NEXT:    # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
443; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
444  %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
445  ret <4 x i32> %res
446}
447declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
448
449
450define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
451; AVX-LABEL: test_x86_sse41_pmovzxbq:
452; AVX:       # %bb.0:
453; AVX-NEXT:    vpmovzxbq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x32,0xc0]
454; AVX-NEXT:    # xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
455; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
456;
457; AVX512VL-LABEL: test_x86_sse41_pmovzxbq:
458; AVX512VL:       # %bb.0:
459; AVX512VL-NEXT:    vpmovzxbq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x32,0xc0]
460; AVX512VL-NEXT:    # xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
461; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
462  %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
463  ret <2 x i64> %res
464}
465declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
466
467
468define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
469; AVX-LABEL: test_x86_sse41_pmovzxbw:
470; AVX:       # %bb.0:
471; AVX-NEXT:    vpmovzxbw %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x30,0xc0]
472; AVX-NEXT:    # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
473; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
474;
475; AVX512VL-LABEL: test_x86_sse41_pmovzxbw:
476; AVX512VL:       # %bb.0:
477; AVX512VL-NEXT:    vpmovzxbw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x30,0xc0]
478; AVX512VL-NEXT:    # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
479; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
480  %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
481  ret <8 x i16> %res
482}
483declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
484
485
486define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
487; AVX-LABEL: test_x86_sse41_pmovzxdq:
488; AVX:       # %bb.0:
489; AVX-NEXT:    vpmovzxdq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x35,0xc0]
490; AVX-NEXT:    # xmm0 = xmm0[0],zero,xmm0[1],zero
491; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
492;
493; AVX512VL-LABEL: test_x86_sse41_pmovzxdq:
494; AVX512VL:       # %bb.0:
495; AVX512VL-NEXT:    vpmovzxdq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x35,0xc0]
496; AVX512VL-NEXT:    # xmm0 = xmm0[0],zero,xmm0[1],zero
497; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
498  %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
499  ret <2 x i64> %res
500}
501declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
502
503
504define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
505; AVX-LABEL: test_x86_sse41_pmovzxwd:
506; AVX:       # %bb.0:
507; AVX-NEXT:    vpmovzxwd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x33,0xc0]
508; AVX-NEXT:    # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
509; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
510;
511; AVX512VL-LABEL: test_x86_sse41_pmovzxwd:
512; AVX512VL:       # %bb.0:
513; AVX512VL-NEXT:    vpmovzxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xc0]
514; AVX512VL-NEXT:    # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
515; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
516  %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
517  ret <4 x i32> %res
518}
519declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
520
521
522define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
523; AVX-LABEL: test_x86_sse41_pmovzxwq:
524; AVX:       # %bb.0:
525; AVX-NEXT:    vpmovzxwq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x34,0xc0]
526; AVX-NEXT:    # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
527; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
528;
529; AVX512VL-LABEL: test_x86_sse41_pmovzxwq:
530; AVX512VL:       # %bb.0:
531; AVX512VL-NEXT:    vpmovzxwq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x34,0xc0]
532; AVX512VL-NEXT:    # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
533; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
534  %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
535  ret <2 x i64> %res
536}
537declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
538
539
540define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
541; AVX-LABEL: test_x86_sse2_cvtdq2pd:
542; AVX:       # %bb.0:
543; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xe6,0xc0]
544; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
545;
546; AVX512VL-LABEL: test_x86_sse2_cvtdq2pd:
547; AVX512VL:       # %bb.0:
548; AVX512VL-NEXT:    vcvtdq2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xc0]
549; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
550  %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
551  ret <2 x double> %res
552}
553declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
554
555
556define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
557; AVX-LABEL: test_x86_avx_cvtdq2_pd_256:
558; AVX:       # %bb.0:
559; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0 # encoding: [0xc5,0xfe,0xe6,0xc0]
560; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
561;
562; AVX512VL-LABEL: test_x86_avx_cvtdq2_pd_256:
563; AVX512VL:       # %bb.0:
564; AVX512VL-NEXT:    vcvtdq2pd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0xe6,0xc0]
565; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
566  %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
567  ret <4 x double> %res
568}
569declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
570
571
572define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
573; AVX-LABEL: test_x86_sse2_cvtps2pd:
574; AVX:       # %bb.0:
575; AVX-NEXT:    vcvtps2pd %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5a,0xc0]
576; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
577;
578; AVX512VL-LABEL: test_x86_sse2_cvtps2pd:
579; AVX512VL:       # %bb.0:
580; AVX512VL-NEXT:    vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0]
581; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
582  %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
583  ret <2 x double> %res
584}
585declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
586
587
588define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
589; AVX-LABEL: test_x86_avx_cvt_ps2_pd_256:
590; AVX:       # %bb.0:
591; AVX-NEXT:    vcvtps2pd %xmm0, %ymm0 # encoding: [0xc5,0xfc,0x5a,0xc0]
592; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
593;
594; AVX512VL-LABEL: test_x86_avx_cvt_ps2_pd_256:
595; AVX512VL:       # %bb.0:
596; AVX512VL-NEXT:    vcvtps2pd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5a,0xc0]
597; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
598  %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
599  ret <4 x double> %res
600}
601declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
602
603
604define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
605  ; add operation forces the execution domain.
606; X86-AVX-LABEL: test_x86_sse2_storeu_dq:
607; X86-AVX:       # %bb.0:
608; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
609; X86-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
610; X86-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1]
611; X86-AVX-NEXT:    vmovdqu %xmm0, (%eax) # encoding: [0xc5,0xfa,0x7f,0x00]
612; X86-AVX-NEXT:    retl # encoding: [0xc3]
613;
614; X86-AVX512VL-LABEL: test_x86_sse2_storeu_dq:
615; X86-AVX512VL:       # %bb.0:
616; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
617; X86-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
618; X86-AVX512VL-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1]
619; X86-AVX512VL-NEXT:    vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00]
620; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
621;
622; X64-AVX-LABEL: test_x86_sse2_storeu_dq:
623; X64-AVX:       # %bb.0:
624; X64-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
625; X64-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1]
626; X64-AVX-NEXT:    vmovdqu %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x7f,0x07]
627; X64-AVX-NEXT:    retq # encoding: [0xc3]
628;
629; X64-AVX512VL-LABEL: test_x86_sse2_storeu_dq:
630; X64-AVX512VL:       # %bb.0:
631; X64-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
632; X64-AVX512VL-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1]
633; X64-AVX512VL-NEXT:    vmovdqu %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x07]
634; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
635  %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
636  call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
637  ret void
638}
639declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
640
641
642define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
643  ; fadd operation forces the execution domain.
644; X86-AVX-LABEL: test_x86_sse2_storeu_pd:
645; X86-AVX:       # %bb.0:
646; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
647; X86-AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9]
648; X86-AVX-NEXT:    vmovhpd {{\.LCPI.*}}, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A]
649; X86-AVX-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
650; X86-AVX-NEXT:    # xmm1 = xmm1[0],mem[0]
651; X86-AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1]
652; X86-AVX-NEXT:    vmovupd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x11,0x00]
653; X86-AVX-NEXT:    retl # encoding: [0xc3]
654;
655; X86-AVX512VL-LABEL: test_x86_sse2_storeu_pd:
656; X86-AVX512VL:       # %bb.0:
657; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
658; X86-AVX512VL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9]
659; X86-AVX512VL-NEXT:    vmovhpd {{\.LCPI.*}}, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A]
660; X86-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
661; X86-AVX512VL-NEXT:    # xmm1 = xmm1[0],mem[0]
662; X86-AVX512VL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
663; X86-AVX512VL-NEXT:    vmovupd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x00]
664; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
665;
666; X64-AVX-LABEL: test_x86_sse2_storeu_pd:
667; X64-AVX:       # %bb.0:
668; X64-AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9]
669; X64-AVX-NEXT:    vmovhpd {{.*}}(%rip), %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A]
670; X64-AVX-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
671; X64-AVX-NEXT:    # xmm1 = xmm1[0],mem[0]
672; X64-AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1]
673; X64-AVX-NEXT:    vmovupd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x11,0x07]
674; X64-AVX-NEXT:    retq # encoding: [0xc3]
675;
676; X64-AVX512VL-LABEL: test_x86_sse2_storeu_pd:
677; X64-AVX512VL:       # %bb.0:
678; X64-AVX512VL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9]
679; X64-AVX512VL-NEXT:    vmovhpd {{.*}}(%rip), %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A]
680; X64-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
681; X64-AVX512VL-NEXT:    # xmm1 = xmm1[0],mem[0]
682; X64-AVX512VL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
683; X64-AVX512VL-NEXT:    vmovupd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x07]
684; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
685  %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
686  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
687  ret void
688}
689declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
690
691
692define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
693; X86-AVX-LABEL: test_x86_sse_storeu_ps:
694; X86-AVX:       # %bb.0:
695; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
696; X86-AVX-NEXT:    vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00]
697; X86-AVX-NEXT:    retl # encoding: [0xc3]
698;
699; X86-AVX512VL-LABEL: test_x86_sse_storeu_ps:
700; X86-AVX512VL:       # %bb.0:
701; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
702; X86-AVX512VL-NEXT:    vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
703; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
704;
705; X64-AVX-LABEL: test_x86_sse_storeu_ps:
706; X64-AVX:       # %bb.0:
707; X64-AVX-NEXT:    vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07]
708; X64-AVX-NEXT:    retq # encoding: [0xc3]
709;
710; X64-AVX512VL-LABEL: test_x86_sse_storeu_ps:
711; X64-AVX512VL:       # %bb.0:
712; X64-AVX512VL-NEXT:    vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
713; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
714  call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
715  ret void
716}
717declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
718
719
720define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
721  ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions
722  ; add operation forces the execution domain.
723; X86-AVX-LABEL: test_x86_avx_storeu_dq_256:
724; X86-AVX:       # %bb.0:
725; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
726; X86-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
727; X86-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm2 # encoding: [0xc5,0xf9,0xf8,0xd1]
728; X86-AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
729; X86-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1]
730; X86-AVX-NEXT:    vmovdqu %xmm0, 16(%eax) # encoding: [0xc5,0xfa,0x7f,0x40,0x10]
731; X86-AVX-NEXT:    vmovdqu %xmm2, (%eax) # encoding: [0xc5,0xfa,0x7f,0x10]
732; X86-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
733; X86-AVX-NEXT:    retl # encoding: [0xc3]
734;
735; X86-AVX512VL-LABEL: test_x86_avx_storeu_dq_256:
736; X86-AVX512VL:       # %bb.0:
737; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
738; X86-AVX512VL-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x76,0xc9]
739; X86-AVX512VL-NEXT:    vpsubb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf8,0xc1]
740; X86-AVX512VL-NEXT:    vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00]
741; X86-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
742; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
743;
744; X64-AVX-LABEL: test_x86_avx_storeu_dq_256:
745; X64-AVX:       # %bb.0:
746; X64-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
747; X64-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm2 # encoding: [0xc5,0xf9,0xf8,0xd1]
748; X64-AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
749; X64-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1]
750; X64-AVX-NEXT:    vmovdqu %xmm0, 16(%rdi) # encoding: [0xc5,0xfa,0x7f,0x47,0x10]
751; X64-AVX-NEXT:    vmovdqu %xmm2, (%rdi) # encoding: [0xc5,0xfa,0x7f,0x17]
752; X64-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
753; X64-AVX-NEXT:    retq # encoding: [0xc3]
754;
755; X64-AVX512VL-LABEL: test_x86_avx_storeu_dq_256:
756; X64-AVX512VL:       # %bb.0:
757; X64-AVX512VL-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x76,0xc9]
758; X64-AVX512VL-NEXT:    vpsubb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf8,0xc1]
759; X64-AVX512VL-NEXT:    vmovdqu %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x07]
760; X64-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
761; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
762  %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
763  call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
764  ret void
765}
766declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
767
768
769define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
770  ; add operation forces the execution domain.
771; X86-AVX-LABEL: test_x86_avx_storeu_pd_256:
772; X86-AVX:       # %bb.0:
773; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
774; X86-AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9]
775; X86-AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1]
776; X86-AVX-NEXT:    vmovupd %ymm0, (%eax) # encoding: [0xc5,0xfd,0x11,0x00]
777; X86-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
778; X86-AVX-NEXT:    retl # encoding: [0xc3]
779;
780; X86-AVX512VL-LABEL: test_x86_avx_storeu_pd_256:
781; X86-AVX512VL:       # %bb.0:
782; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
783; X86-AVX512VL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9]
784; X86-AVX512VL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
785; X86-AVX512VL-NEXT:    vmovupd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x00]
786; X86-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
787; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
788;
789; X64-AVX-LABEL: test_x86_avx_storeu_pd_256:
790; X64-AVX:       # %bb.0:
791; X64-AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9]
792; X64-AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1]
793; X64-AVX-NEXT:    vmovupd %ymm0, (%rdi) # encoding: [0xc5,0xfd,0x11,0x07]
794; X64-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
795; X64-AVX-NEXT:    retq # encoding: [0xc3]
796;
797; X64-AVX512VL-LABEL: test_x86_avx_storeu_pd_256:
798; X64-AVX512VL:       # %bb.0:
799; X64-AVX512VL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9]
800; X64-AVX512VL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
801; X64-AVX512VL-NEXT:    vmovupd %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x07]
802; X64-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
803; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
804  %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
805  call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2)
806  ret void
807}
808declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
809
810
811define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
812; X86-AVX-LABEL: test_x86_avx_storeu_ps_256:
813; X86-AVX:       # %bb.0:
814; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
815; X86-AVX-NEXT:    vmovups %ymm0, (%eax) # encoding: [0xc5,0xfc,0x11,0x00]
816; X86-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
817; X86-AVX-NEXT:    retl # encoding: [0xc3]
818;
819; X86-AVX512VL-LABEL: test_x86_avx_storeu_ps_256:
820; X86-AVX512VL:       # %bb.0:
821; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
822; X86-AVX512VL-NEXT:    vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00]
823; X86-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
824; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
825;
826; X64-AVX-LABEL: test_x86_avx_storeu_ps_256:
827; X64-AVX:       # %bb.0:
828; X64-AVX-NEXT:    vmovups %ymm0, (%rdi) # encoding: [0xc5,0xfc,0x11,0x07]
829; X64-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
830; X64-AVX-NEXT:    retq # encoding: [0xc3]
831;
832; X64-AVX512VL-LABEL: test_x86_avx_storeu_ps_256:
833; X64-AVX512VL:       # %bb.0:
834; X64-AVX512VL-NEXT:    vmovups %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
835; X64-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
836; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
837  call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
838  ret void
839}
840declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
841
842
843define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) {
844; AVX-LABEL: test_x86_avx_vpermil_pd:
845; AVX:       # %bb.0:
846; AVX-NEXT:    vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
847; AVX-NEXT:    # xmm0 = xmm0[1,0]
848; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
849;
850; AVX512VL-LABEL: test_x86_avx_vpermil_pd:
851; AVX512VL:       # %bb.0:
852; AVX512VL-NEXT:    vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
853; AVX512VL-NEXT:    # xmm0 = xmm0[1,0]
854; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
855  %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 1) ; <<2 x double>> [#uses=1]
856  ret <2 x double> %res
857}
858declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone
859
860
861define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) {
862; AVX-LABEL: test_x86_avx_vpermil_pd_256:
863; AVX:       # %bb.0:
864; AVX-NEXT:    vpermilpd $7, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x05,0xc0,0x07]
865; AVX-NEXT:    # ymm0 = ymm0[1,1,3,2]
866; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
867;
868; AVX512VL-LABEL: test_x86_avx_vpermil_pd_256:
869; AVX512VL:       # %bb.0:
870; AVX512VL-NEXT:    vpermilpd $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x05,0xc0,0x07]
871; AVX512VL-NEXT:    # ymm0 = ymm0[1,1,3,2]
872; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
873  %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
874  ret <4 x double> %res
875}
876declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone
877
878
879define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
880; AVX-LABEL: test_x86_avx_vpermil_ps:
881; AVX:       # %bb.0:
882; AVX-NEXT:    vpermilps $7, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x07]
883; AVX-NEXT:    # xmm0 = xmm0[3,1,0,0]
884; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
885;
886; AVX512VL-LABEL: test_x86_avx_vpermil_ps:
887; AVX512VL:       # %bb.0:
888; AVX512VL-NEXT:    vpermilps $7, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x07]
889; AVX512VL-NEXT:    # xmm0 = xmm0[3,1,0,0]
890; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
891  %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
892  ret <4 x float> %res
893}
894declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone
895
896
897define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) {
898; AVX-LABEL: test_x86_avx_vpermil_ps_256:
899; AVX:       # %bb.0:
900; AVX-NEXT:    vpermilps $7, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x04,0xc0,0x07]
901; AVX-NEXT:    # ymm0 = ymm0[3,1,0,0,7,5,4,4]
902; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
903;
904; AVX512VL-LABEL: test_x86_avx_vpermil_ps_256:
905; AVX512VL:       # %bb.0:
906; AVX512VL-NEXT:    vpermilps $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x04,0xc0,0x07]
907; AVX512VL-NEXT:    # ymm0 = ymm0[3,1,0,0,7,5,4,4]
908; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
909  %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1]
910  ret <8 x float> %res
911}
912declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone
913
914
915define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) {
916; AVX-LABEL: test_x86_avx_vperm2f128_pd_256:
917; AVX:       # %bb.0:
918; AVX-NEXT:    vperm2f128 $33, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21]
919; AVX-NEXT:    # ymm0 = ymm1[2,3],ymm0[0,1]
920; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
921;
922; AVX512VL-LABEL: test_x86_avx_vperm2f128_pd_256:
923; AVX512VL:       # %bb.0:
924; AVX512VL-NEXT:    vperm2f128 $33, %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21]
925; AVX512VL-NEXT:    # ymm0 = ymm1[2,3],ymm0[0,1]
926; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
927  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 3) ; <<4 x double>> [#uses=1]
928  ret <4 x double> %res
929}
930declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
931
932
933define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) {
934; AVX-LABEL: test_x86_avx_vperm2f128_ps_256:
935; AVX:       # %bb.0:
936; AVX-NEXT:    vperm2f128 $33, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21]
937; AVX-NEXT:    # ymm0 = ymm1[2,3],ymm0[0,1]
938; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
939;
940; AVX512VL-LABEL: test_x86_avx_vperm2f128_ps_256:
941; AVX512VL:       # %bb.0:
942; AVX512VL-NEXT:    vperm2f128 $33, %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21]
943; AVX512VL-NEXT:    # ymm0 = ymm1[2,3],ymm0[0,1]
944; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
945  %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 3) ; <<8 x float>> [#uses=1]
946  ret <8 x float> %res
947}
948declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
949
950
951define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) {
952; AVX-LABEL: test_x86_avx_vperm2f128_si_256:
953; AVX:       # %bb.0:
954; AVX-NEXT:    vperm2f128 $33, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21]
955; AVX-NEXT:    # ymm0 = ymm1[2,3],ymm0[0,1]
956; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
957;
958; AVX512VL-LABEL: test_x86_avx_vperm2f128_si_256:
959; AVX512VL:       # %bb.0:
960; AVX512VL-NEXT:    vperm2i128 $33, %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x46,0xc0,0x21]
961; AVX512VL-NEXT:    # ymm0 = ymm1[2,3],ymm0[0,1]
962; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
963  %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 3) ; <<8 x i32>> [#uses=1]
964  ret <8 x i32> %res
965}
966declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
967
968
969define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
970; AVX-LABEL: test_x86_avx_cvtdq2_ps_256:
971; AVX:       # %bb.0:
972; AVX-NEXT:    vcvtdq2ps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x5b,0xc0]
973; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
974;
975; AVX512VL-LABEL: test_x86_avx_cvtdq2_ps_256:
976; AVX512VL:       # %bb.0:
977; AVX512VL-NEXT:    vcvtdq2ps %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5b,0xc0]
978; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
979  %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1]
980  ret <8 x float> %res
981}
982declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone
983