1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,X86-AVX512DQ
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,X86-AVX512DQVL
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,X64-AVX512DQ
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,X64-AVX512DQVL
6
7define i32 @test_int_x86_avx512_kadd_w(<16 x i32> %A, <16 x i32> %B) nounwind {
8; CHECK-LABEL: test_int_x86_avx512_kadd_w:
9; CHECK:       # %bb.0: # %entry
10; CHECK-NEXT:    vptestmd %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
11; CHECK-NEXT:    vptestmd %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x27,0xc9]
12; CHECK-NEXT:    kaddw %k1, %k0, %k0 # encoding: [0xc5,0xfc,0x4a,0xc1]
13; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
14; CHECK-NEXT:    kortestw %k0, %k0 # encoding: [0xc5,0xf8,0x98,0xc0]
15; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
16; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
17; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
18entry:
19  %0 = icmp ne <16 x i32> %A, zeroinitializer
20  %1 = icmp ne <16 x i32> %B, zeroinitializer
21  %2 = call <16 x i1> @llvm.x86.avx512.kadd.w(<16 x i1> %0, <16 x i1> %1)
22  %3 = bitcast <16 x i1> %2 to i16
23  %4 = icmp eq i16 %3, 0
24  %5 = zext i1 %4 to i32
25  ret i32 %5
26}
27declare <16 x i1> @llvm.x86.avx512.kadd.w(<16 x i1>, <16 x i1>)
28
29define i32 @test_int_x86_avx512_kadd_b(<8 x i64> %A, <8 x i64> %B) nounwind {
30; CHECK-LABEL: test_int_x86_avx512_kadd_b:
31; CHECK:       # %bb.0: # %entry
32; CHECK-NEXT:    vptestmq %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc0]
33; CHECK-NEXT:    vptestmq %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x27,0xc9]
34; CHECK-NEXT:    kaddb %k1, %k0, %k0 # encoding: [0xc5,0xfd,0x4a,0xc1]
35; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
36; CHECK-NEXT:    kortestb %k0, %k0 # encoding: [0xc5,0xf9,0x98,0xc0]
37; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
38; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
39; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
40entry:
41  %0 = icmp ne <8 x i64> %A, zeroinitializer
42  %1 = icmp ne <8 x i64> %B, zeroinitializer
43  %2 = call <8 x i1> @llvm.x86.avx512.kadd.b(<8 x i1> %0, <8 x i1> %1)
44  %3 = bitcast <8 x i1> %2 to i8
45  %4 = icmp eq i8 %3, 0
46  %5 = zext i1 %4 to i32
47  ret i32 %5
48}
49declare <8 x i1> @llvm.x86.avx512.kadd.b(<8 x i1>, <8 x i1>)
50
51define i32 @test_x86_avx512_ktestc_w(<16 x i32> %A, <16 x i32> %B) {
52; CHECK-LABEL: test_x86_avx512_ktestc_w:
53; CHECK:       # %bb.0:
54; CHECK-NEXT:    vptestmd %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
55; CHECK-NEXT:    vptestmd %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x27,0xc9]
56; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
57; CHECK-NEXT:    ktestw %k1, %k0 # encoding: [0xc5,0xf8,0x99,0xc1]
58; CHECK-NEXT:    setb %al # encoding: [0x0f,0x92,0xc0]
59; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
60; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
61  %1 = icmp ne <16 x i32> %A, zeroinitializer
62  %2 = icmp ne <16 x i32> %B, zeroinitializer
63  %res = call i32 @llvm.x86.avx512.ktestc.w(<16 x i1> %1, <16 x i1> %2) ; <i32> [#uses=1]
64  ret i32 %res
65}
66declare i32 @llvm.x86.avx512.ktestc.w(<16 x i1>, <16 x i1>) nounwind readnone
67
68define i32 @test_x86_avx512_ktestz_w(<16 x i32> %A, <16 x i32> %B) {
69; CHECK-LABEL: test_x86_avx512_ktestz_w:
70; CHECK:       # %bb.0:
71; CHECK-NEXT:    vptestmd %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
72; CHECK-NEXT:    vptestmd %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x27,0xc9]
73; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
74; CHECK-NEXT:    ktestw %k1, %k0 # encoding: [0xc5,0xf8,0x99,0xc1]
75; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
76; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
77; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
78  %1 = icmp ne <16 x i32> %A, zeroinitializer
79  %2 = icmp ne <16 x i32> %B, zeroinitializer
80  %res = call i32 @llvm.x86.avx512.ktestz.w(<16 x i1> %1, <16 x i1> %2) ; <i32> [#uses=1]
81  ret i32 %res
82}
83declare i32 @llvm.x86.avx512.ktestz.w(<16 x i1>, <16 x i1>) nounwind readnone
84
85define i32 @test_x86_avx512_ktestc_b(<8 x i64> %A, <8 x i64> %B) {
86; CHECK-LABEL: test_x86_avx512_ktestc_b:
87; CHECK:       # %bb.0:
88; CHECK-NEXT:    vptestmq %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc0]
89; CHECK-NEXT:    vptestmq %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x27,0xc9]
90; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
91; CHECK-NEXT:    ktestb %k1, %k0 # encoding: [0xc5,0xf9,0x99,0xc1]
92; CHECK-NEXT:    setb %al # encoding: [0x0f,0x92,0xc0]
93; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
94; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
95  %1 = icmp ne <8 x i64> %A, zeroinitializer
96  %2 = icmp ne <8 x i64> %B, zeroinitializer
97  %res = call i32 @llvm.x86.avx512.ktestc.b(<8 x i1> %1, <8 x i1> %2) ; <i32> [#uses=1]
98  ret i32 %res
99}
100declare i32 @llvm.x86.avx512.ktestc.b(<8 x i1>, <8 x i1>) nounwind readnone
101
102define i32 @test_x86_avx512_ktestz_b(<8 x i64> %A, <8 x i64> %B) {
103; CHECK-LABEL: test_x86_avx512_ktestz_b:
104; CHECK:       # %bb.0:
105; CHECK-NEXT:    vptestmq %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc0]
106; CHECK-NEXT:    vptestmq %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x27,0xc9]
107; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
108; CHECK-NEXT:    ktestb %k1, %k0 # encoding: [0xc5,0xf9,0x99,0xc1]
109; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
110; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
111; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
112  %1 = icmp ne <8 x i64> %A, zeroinitializer
113  %2 = icmp ne <8 x i64> %B, zeroinitializer
114  %res = call i32 @llvm.x86.avx512.ktestz.b(<8 x i1> %1, <8 x i1> %2) ; <i32> [#uses=1]
115  ret i32 %res
116}
117declare i32 @llvm.x86.avx512.ktestz.b(<8 x i1>, <8 x i1>) nounwind readnone
118
119declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double>, <8 x i64>, i8, i32)
120
121define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
122; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_512:
123; X86:       # %bb.0:
124; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
125; X86-NEXT:    vcvtpd2qq {ru-sae}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x59,0x7b,0xc8]
126; X86-NEXT:    vcvtpd2qq {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x7b,0xc0]
127; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
128; X86-NEXT:    retl # encoding: [0xc3]
129;
130; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_512:
131; X64:       # %bb.0:
132; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
133; X64-NEXT:    vcvtpd2qq {ru-sae}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x59,0x7b,0xc8]
134; X64-NEXT:    vcvtpd2qq {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x7b,0xc0]
135; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
136; X64-NEXT:    retq # encoding: [0xc3]
137  %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 10)
138  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8)
139  %res2 = add <8 x i64> %res, %res1
140  ret <8 x i64> %res2
141}
142
143declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double>, <8 x i64>, i8, i32)
144
145define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
146; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_512:
147; X86:       # %bb.0:
148; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
149; X86-NEXT:    vcvtpd2uqq {ru-sae}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x59,0x79,0xc8]
150; X86-NEXT:    vcvtpd2uqq {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x79,0xc0]
151; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
152; X86-NEXT:    retl # encoding: [0xc3]
153;
154; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_512:
155; X64:       # %bb.0:
156; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
157; X64-NEXT:    vcvtpd2uqq {ru-sae}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x59,0x79,0xc8]
158; X64-NEXT:    vcvtpd2uqq {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x79,0xc0]
159; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
160; X64-NEXT:    retq # encoding: [0xc3]
161  %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 10)
162  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8)
163  %res2 = add <8 x i64> %res, %res1
164  ret <8 x i64> %res2
165}
166
167declare <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float>, <8 x i64>, i8, i32)
168
169define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
170; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_512:
171; X86:       # %bb.0:
172; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
173; X86-NEXT:    vcvtps2qq {ru-sae}, %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x7b,0xc8]
174; X86-NEXT:    vcvtps2qq {rn-sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x7b,0xc0]
175; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
176; X86-NEXT:    retl # encoding: [0xc3]
177;
178; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_512:
179; X64:       # %bb.0:
180; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
181; X64-NEXT:    vcvtps2qq {ru-sae}, %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x7b,0xc8]
182; X64-NEXT:    vcvtps2qq {rn-sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x7b,0xc0]
183; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
184; X64-NEXT:    retq # encoding: [0xc3]
185  %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 10)
186  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8)
187  %res2 = add <8 x i64> %res, %res1
188  ret <8 x i64> %res2
189}
190
191declare <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float>, <8 x i64>, i8, i32)
192
193define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
194; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_512:
195; X86:       # %bb.0:
196; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
197; X86-NEXT:    vcvtps2uqq {ru-sae}, %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x79,0xc8]
198; X86-NEXT:    vcvtps2uqq {rn-sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x79,0xc0]
199; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
200; X86-NEXT:    retl # encoding: [0xc3]
201;
202; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_512:
203; X64:       # %bb.0:
204; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
205; X64-NEXT:    vcvtps2uqq {ru-sae}, %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x79,0xc8]
206; X64-NEXT:    vcvtps2uqq {rn-sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x79,0xc0]
207; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
208; X64-NEXT:    retq # encoding: [0xc3]
209  %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 10)
210  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8)
211  %res2 = add <8 x i64> %res, %res1
212  ret <8 x i64> %res2
213}
214
215declare <8 x double> @llvm.x86.avx512.sitofp.round.v8f64.v8i64(<8 x i64>, i32)
216
217define <8 x double>@test_int_x86_avx512_mask_cvt_qq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) {
218; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_512:
219; X86:       # %bb.0:
220; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
221; X86-NEXT:    vcvtqq2pd %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x49,0xe6,0xc8]
222; X86-NEXT:    vcvtqq2pd {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfe,0x18,0xe6,0xc0]
223; X86-NEXT:    vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
224; X86-NEXT:    retl # encoding: [0xc3]
225;
226; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_512:
227; X64:       # %bb.0:
228; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
229; X64-NEXT:    vcvtqq2pd %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x49,0xe6,0xc8]
230; X64-NEXT:    vcvtqq2pd {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfe,0x18,0xe6,0xc0]
231; X64-NEXT:    vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
232; X64-NEXT:    retq # encoding: [0xc3]
233  %cvt = sitofp <8 x i64> %x0 to <8 x double>
234  %1 = bitcast i8 %x2 to <8 x i1>
235  %2 = select <8 x i1> %1, <8 x double> %cvt, <8 x double> %x1
236  %3 = call <8 x double> @llvm.x86.avx512.sitofp.round.v8f64.v8i64(<8 x i64> %x0, i32 8)
237  %res2 = fadd <8 x double> %2, %3
238  ret <8 x double> %res2
239}
240
241declare <8 x float> @llvm.x86.avx512.sitofp.round.v8f32.v8i64(<8 x i64>, i32)
242
243define <8 x float>@test_int_x86_avx512_mask_cvt_qq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) {
244; X86-AVX512DQ-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512:
245; X86-AVX512DQ:       # %bb.0:
246; X86-AVX512DQ-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
247; X86-AVX512DQ-NEXT:    vcvtqq2ps %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x49,0x5b,0xc8]
248; X86-AVX512DQ-NEXT:    vcvtqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xfc,0x18,0x5b,0xc0]
249; X86-AVX512DQ-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf4,0x58,0xc0]
250; X86-AVX512DQ-NEXT:    retl # encoding: [0xc3]
251;
252; X86-AVX512DQVL-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512:
253; X86-AVX512DQVL:       # %bb.0:
254; X86-AVX512DQVL-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
255; X86-AVX512DQVL-NEXT:    vcvtqq2ps %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x49,0x5b,0xc8]
256; X86-AVX512DQVL-NEXT:    vcvtqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xfc,0x18,0x5b,0xc0]
257; X86-AVX512DQVL-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
258; X86-AVX512DQVL-NEXT:    retl # encoding: [0xc3]
259;
260; X64-AVX512DQ-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512:
261; X64-AVX512DQ:       # %bb.0:
262; X64-AVX512DQ-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
263; X64-AVX512DQ-NEXT:    vcvtqq2ps %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x49,0x5b,0xc8]
264; X64-AVX512DQ-NEXT:    vcvtqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xfc,0x18,0x5b,0xc0]
265; X64-AVX512DQ-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf4,0x58,0xc0]
266; X64-AVX512DQ-NEXT:    retq # encoding: [0xc3]
267;
268; X64-AVX512DQVL-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512:
269; X64-AVX512DQVL:       # %bb.0:
270; X64-AVX512DQVL-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
271; X64-AVX512DQVL-NEXT:    vcvtqq2ps %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x49,0x5b,0xc8]
272; X64-AVX512DQVL-NEXT:    vcvtqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xfc,0x18,0x5b,0xc0]
273; X64-AVX512DQVL-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
274; X64-AVX512DQVL-NEXT:    retq # encoding: [0xc3]
275  %cvt = sitofp <8 x i64> %x0 to <8 x float>
276  %1 = bitcast i8 %x2 to <8 x i1>
277  %2 = select <8 x i1> %1, <8 x float> %cvt, <8 x float> %x1
278  %3 = call <8 x float> @llvm.x86.avx512.sitofp.round.v8f32.v8i64(<8 x i64> %x0, i32 8)
279  %res2 = fadd <8 x float> %2, %3
280  ret <8 x float> %res2
281}
282
283declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double>, <8 x i64>, i8, i32)
284
285define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
286; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_512:
287; X86:       # %bb.0:
288; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
289; X86-NEXT:    vcvttpd2qq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x7a,0xc8]
290; X86-NEXT:    vcvttpd2qq {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x7a,0xc0]
291; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
292; X86-NEXT:    retl # encoding: [0xc3]
293;
294; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_512:
295; X64:       # %bb.0:
296; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
297; X64-NEXT:    vcvttpd2qq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x7a,0xc8]
298; X64-NEXT:    vcvttpd2qq {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x7a,0xc0]
299; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
300; X64-NEXT:    retq # encoding: [0xc3]
301  %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4)
302  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8)
303  %res2 = add <8 x i64> %res, %res1
304  ret <8 x i64> %res2
305}
306
307declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double>, <8 x i64>, i8, i32)
308
309define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
310; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_512:
311; X86:       # %bb.0:
312; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
313; X86-NEXT:    vcvttpd2uqq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x78,0xc8]
314; X86-NEXT:    vcvttpd2uqq {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x78,0xc0]
315; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
316; X86-NEXT:    retl # encoding: [0xc3]
317;
318; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_512:
319; X64:       # %bb.0:
320; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
321; X64-NEXT:    vcvttpd2uqq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x78,0xc8]
322; X64-NEXT:    vcvttpd2uqq {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x78,0xc0]
323; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
324; X64-NEXT:    retq # encoding: [0xc3]
325  %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4)
326  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8)
327  %res2 = add <8 x i64> %res, %res1
328  ret <8 x i64> %res2
329}
330
331declare <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float>, <8 x i64>, i8, i32)
332
333define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
334; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_512:
335; X86:       # %bb.0:
336; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
337; X86-NEXT:    vcvttps2qq %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x7a,0xc8]
338; X86-NEXT:    vcvttps2qq {sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x7a,0xc0]
339; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
340; X86-NEXT:    retl # encoding: [0xc3]
341;
342; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_512:
343; X64:       # %bb.0:
344; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
345; X64-NEXT:    vcvttps2qq %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x7a,0xc8]
346; X64-NEXT:    vcvttps2qq {sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x7a,0xc0]
347; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
348; X64-NEXT:    retq # encoding: [0xc3]
349  %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4)
350  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8)
351  %res2 = add <8 x i64> %res, %res1
352  ret <8 x i64> %res2
353}
354
355declare <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float>, <8 x i64>, i8, i32)
356
357define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
358; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_512:
359; X86:       # %bb.0:
360; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
361; X86-NEXT:    vcvttps2uqq %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x78,0xc8]
362; X86-NEXT:    vcvttps2uqq {sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x78,0xc0]
363; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
364; X86-NEXT:    retl # encoding: [0xc3]
365;
366; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_512:
367; X64:       # %bb.0:
368; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
369; X64-NEXT:    vcvttps2uqq %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x78,0xc8]
370; X64-NEXT:    vcvttps2uqq {sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x78,0xc0]
371; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
372; X64-NEXT:    retq # encoding: [0xc3]
373  %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4)
374  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8)
375  %res2 = add <8 x i64> %res, %res1
376  ret <8 x i64> %res2
377}
378
379declare <8 x double> @llvm.x86.avx512.uitofp.round.v8f64.v8i64(<8 x i64>, i32)
380
381define <8 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) {
382; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_512:
383; X86:       # %bb.0:
384; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
385; X86-NEXT:    vcvtuqq2pd %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x49,0x7a,0xc8]
386; X86-NEXT:    vcvtuqq2pd {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfe,0x18,0x7a,0xc0]
387; X86-NEXT:    vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
388; X86-NEXT:    retl # encoding: [0xc3]
389;
390; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_512:
391; X64:       # %bb.0:
392; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
393; X64-NEXT:    vcvtuqq2pd %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x49,0x7a,0xc8]
394; X64-NEXT:    vcvtuqq2pd {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfe,0x18,0x7a,0xc0]
395; X64-NEXT:    vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
396; X64-NEXT:    retq # encoding: [0xc3]
397  %cvt = uitofp <8 x i64> %x0 to <8 x double>
398  %1 = bitcast i8 %x2 to <8 x i1>
399  %2 = select <8 x i1> %1, <8 x double> %cvt, <8 x double> %x1
400  %3 = call <8 x double> @llvm.x86.avx512.uitofp.round.v8f64.v8i64(<8 x i64> %x0, i32 8)
401  %res2 = fadd <8 x double> %2, %3
402  ret <8 x double> %res2
403}
404
405declare <8 x float> @llvm.x86.avx512.uitofp.round.v8f32.v8i64(<8 x i64>, i32)
406
407define <8 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) {
408; X86-AVX512DQ-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512:
409; X86-AVX512DQ:       # %bb.0:
410; X86-AVX512DQ-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
411; X86-AVX512DQ-NEXT:    vcvtuqq2ps %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x7a,0xc8]
412; X86-AVX512DQ-NEXT:    vcvtuqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xff,0x18,0x7a,0xc0]
413; X86-AVX512DQ-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf4,0x58,0xc0]
414; X86-AVX512DQ-NEXT:    retl # encoding: [0xc3]
415;
416; X86-AVX512DQVL-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512:
417; X86-AVX512DQVL:       # %bb.0:
418; X86-AVX512DQVL-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
419; X86-AVX512DQVL-NEXT:    vcvtuqq2ps %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x7a,0xc8]
420; X86-AVX512DQVL-NEXT:    vcvtuqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xff,0x18,0x7a,0xc0]
421; X86-AVX512DQVL-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
422; X86-AVX512DQVL-NEXT:    retl # encoding: [0xc3]
423;
424; X64-AVX512DQ-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512:
425; X64-AVX512DQ:       # %bb.0:
426; X64-AVX512DQ-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
427; X64-AVX512DQ-NEXT:    vcvtuqq2ps %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x7a,0xc8]
428; X64-AVX512DQ-NEXT:    vcvtuqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xff,0x18,0x7a,0xc0]
429; X64-AVX512DQ-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf4,0x58,0xc0]
430; X64-AVX512DQ-NEXT:    retq # encoding: [0xc3]
431;
432; X64-AVX512DQVL-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512:
433; X64-AVX512DQVL:       # %bb.0:
434; X64-AVX512DQVL-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
435; X64-AVX512DQVL-NEXT:    vcvtuqq2ps %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x7a,0xc8]
436; X64-AVX512DQVL-NEXT:    vcvtuqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xff,0x18,0x7a,0xc0]
437; X64-AVX512DQVL-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
438; X64-AVX512DQVL-NEXT:    retq # encoding: [0xc3]
439  %cvt = uitofp <8 x i64> %x0 to <8 x float>
440  %1 = bitcast i8 %x2 to <8 x i1>
441  %2 = select <8 x i1> %1, <8 x float> %cvt, <8 x float> %x1
442  %3 = call <8 x float> @llvm.x86.avx512.uitofp.round.v8f32.v8i64(<8 x i64> %x0, i32 8)
443  %res2 = fadd <8 x float> %2, %3
444  ret <8 x float> %res2
445}
446
447declare <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
448
449define <8 x double>@test_int_x86_avx512_mask_reduce_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
450; X86-LABEL: test_int_x86_avx512_mask_reduce_pd_512:
451; X86:       # %bb.0:
452; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
453; X86-NEXT:    vreducepd $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x56,0xc8,0x08]
454; X86-NEXT:    vreducepd $4, {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x18,0x56,0xc0,0x04]
455; X86-NEXT:    vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
456; X86-NEXT:    retl # encoding: [0xc3]
457;
458; X64-LABEL: test_int_x86_avx512_mask_reduce_pd_512:
459; X64:       # %bb.0:
460; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
461; X64-NEXT:    vreducepd $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x56,0xc8,0x08]
462; X64-NEXT:    vreducepd $4, {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x18,0x56,0xc0,0x04]
463; X64-NEXT:    vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
464; X64-NEXT:    retq # encoding: [0xc3]
465  %res = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 8, <8 x double> %x2, i8 %x3, i32 4)
466  %res1 = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 4, <8 x double> %x2, i8 -1, i32 8)
467  %res2 = fadd <8 x double> %res, %res1
468  ret <8 x double> %res2
469}
470
471declare <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
472
473define <16 x float>@test_int_x86_avx512_mask_reduce_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
474; X86-LABEL: test_int_x86_avx512_mask_reduce_ps_512:
475; X86:       # %bb.0:
476; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
477; X86-NEXT:    vreduceps $44, {sae}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x19,0x56,0xc8,0x2c]
478; X86-NEXT:    vreduceps $11, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x56,0xc0,0x0b]
479; X86-NEXT:    vaddps %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
480; X86-NEXT:    retl # encoding: [0xc3]
481;
482; X64-LABEL: test_int_x86_avx512_mask_reduce_ps_512:
483; X64:       # %bb.0:
484; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
485; X64-NEXT:    vreduceps $44, {sae}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x19,0x56,0xc8,0x2c]
486; X64-NEXT:    vreduceps $11, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x56,0xc0,0x0b]
487; X64-NEXT:    vaddps %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
488; X64-NEXT:    retq # encoding: [0xc3]
489  %res = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 44, <16 x float> %x2, i16 %x3, i32 8)
490  %res1 = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 4)
491  %res2 = fadd <16 x float> %res, %res1
492  ret <16 x float> %res2
493}
494
495declare <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8, i32)
496
497define <8 x double>@test_int_x86_avx512_mask_range_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
498; X86-LABEL: test_int_x86_avx512_mask_range_pd_512:
499; X86:       # %bb.0:
500; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
501; X86-NEXT:    vrangepd $8, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x50,0xd1,0x08]
502; X86-NEXT:    vrangepd $4, {sae}, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x18,0x50,0xc1,0x04]
503; X86-NEXT:    vaddpd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0]
504; X86-NEXT:    retl # encoding: [0xc3]
505;
506; X64-LABEL: test_int_x86_avx512_mask_range_pd_512:
507; X64:       # %bb.0:
508; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
509; X64-NEXT:    vrangepd $8, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x50,0xd1,0x08]
510; X64-NEXT:    vrangepd $4, {sae}, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x18,0x50,0xc1,0x04]
511; X64-NEXT:    vaddpd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0]
512; X64-NEXT:    retq # encoding: [0xc3]
513  %res = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 8, <8 x double> %x3, i8 %x4, i32 4)
514  %res1 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 4, <8 x double> %x3, i8 -1, i32 8)
515  %res2 = fadd <8 x double> %res, %res1
516  ret <8 x double> %res2
517}
518
519declare <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16, i32)
520
521define <16 x float>@test_int_x86_avx512_mask_range_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
522; X86-LABEL: test_int_x86_avx512_mask_range_ps_512:
523; X86:       # %bb.0:
524; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
525; X86-NEXT:    vrangeps $88, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x50,0xd1,0x58]
526; X86-NEXT:    vrangeps $4, {sae}, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x18,0x50,0xc1,0x04]
527; X86-NEXT:    vaddps %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0]
528; X86-NEXT:    retl # encoding: [0xc3]
529;
530; X64-LABEL: test_int_x86_avx512_mask_range_ps_512:
531; X64:       # %bb.0:
532; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
533; X64-NEXT:    vrangeps $88, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x50,0xd1,0x58]
534; X64-NEXT:    vrangeps $4, {sae}, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x18,0x50,0xc1,0x04]
535; X64-NEXT:    vaddps %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0]
536; X64-NEXT:    retq # encoding: [0xc3]
537  %res = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 88, <16 x float> %x3, i16 %x4, i32 4)
538  %res1 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 4, <16 x float> %x3, i16 -1, i32 8)
539  %res2 = fadd <16 x float> %res, %res1
540  ret <16 x float> %res2
541}
542
543declare <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32)
544
545define <4 x float>@test_int_x86_avx512_mask_reduce_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
546; X86-AVX512DQ-LABEL: test_int_x86_avx512_mask_reduce_ss:
547; X86-AVX512DQ:       # %bb.0:
548; X86-AVX512DQ-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
549; X86-AVX512DQ-NEXT:    vreducess $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x57,0xd1,0x04]
550; X86-AVX512DQ-NEXT:    vreducess $4, {sae}, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x18,0x57,0xc1,0x04]
551; X86-AVX512DQ-NEXT:    vaddps %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe8,0x58,0xc0]
552; X86-AVX512DQ-NEXT:    retl # encoding: [0xc3]
553;
554; X86-AVX512DQVL-LABEL: test_int_x86_avx512_mask_reduce_ss:
555; X86-AVX512DQVL:       # %bb.0:
556; X86-AVX512DQVL-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
557; X86-AVX512DQVL-NEXT:    vreducess $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x57,0xd1,0x04]
558; X86-AVX512DQVL-NEXT:    vreducess $4, {sae}, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x18,0x57,0xc1,0x04]
559; X86-AVX512DQVL-NEXT:    vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
560; X86-AVX512DQVL-NEXT:    retl # encoding: [0xc3]
561;
562; X64-AVX512DQ-LABEL: test_int_x86_avx512_mask_reduce_ss:
563; X64-AVX512DQ:       # %bb.0:
564; X64-AVX512DQ-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
565; X64-AVX512DQ-NEXT:    vreducess $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x57,0xd1,0x04]
566; X64-AVX512DQ-NEXT:    vreducess $4, {sae}, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x18,0x57,0xc1,0x04]
567; X64-AVX512DQ-NEXT:    vaddps %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe8,0x58,0xc0]
568; X64-AVX512DQ-NEXT:    retq # encoding: [0xc3]
569;
570; X64-AVX512DQVL-LABEL: test_int_x86_avx512_mask_reduce_ss:
571; X64-AVX512DQVL:       # %bb.0:
572; X64-AVX512DQVL-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
573; X64-AVX512DQVL-NEXT:    vreducess $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x57,0xd1,0x04]
574; X64-AVX512DQVL-NEXT:    vreducess $4, {sae}, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x18,0x57,0xc1,0x04]
575; X64-AVX512DQVL-NEXT:    vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
576; X64-AVX512DQVL-NEXT:    retq # encoding: [0xc3]
577  %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 4)
578  %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8)
579  %res2 = fadd <4 x float> %res, %res1
580  ret <4 x float> %res2
581}
582
583declare <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32)
584
585define <4 x float>@test_int_x86_avx512_mask_range_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
586; X86-AVX512DQ-LABEL: test_int_x86_avx512_mask_range_ss:
587; X86-AVX512DQ:       # %bb.0:
588; X86-AVX512DQ-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
589; X86-AVX512DQ-NEXT:    vrangess $4, {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x19,0x51,0xd1,0x04]
590; X86-AVX512DQ-NEXT:    vrangess $5, {sae}, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x18,0x51,0xd9,0x05]
591; X86-AVX512DQ-NEXT:    vaddps %xmm3, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x58,0xd3]
592; X86-AVX512DQ-NEXT:    vrangess $6, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x51,0xc1,0x06]
593; X86-AVX512DQ-NEXT:    vaddps %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x58,0xc2]
594; X86-AVX512DQ-NEXT:    retl # encoding: [0xc3]
595;
596; X86-AVX512DQVL-LABEL: test_int_x86_avx512_mask_range_ss:
597; X86-AVX512DQVL:       # %bb.0:
598; X86-AVX512DQVL-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
599; X86-AVX512DQVL-NEXT:    vrangess $4, {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x19,0x51,0xd1,0x04]
600; X86-AVX512DQVL-NEXT:    vrangess $5, {sae}, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x18,0x51,0xd9,0x05]
601; X86-AVX512DQVL-NEXT:    vaddps %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xd3]
602; X86-AVX512DQVL-NEXT:    vrangess $6, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x51,0xc1,0x06]
603; X86-AVX512DQVL-NEXT:    vaddps %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc2]
604; X86-AVX512DQVL-NEXT:    retl # encoding: [0xc3]
605;
606; X64-AVX512DQ-LABEL: test_int_x86_avx512_mask_range_ss:
607; X64-AVX512DQ:       # %bb.0:
608; X64-AVX512DQ-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
609; X64-AVX512DQ-NEXT:    vrangess $4, {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x19,0x51,0xd1,0x04]
610; X64-AVX512DQ-NEXT:    vrangess $5, {sae}, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x18,0x51,0xd9,0x05]
611; X64-AVX512DQ-NEXT:    vaddps %xmm3, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x58,0xd3]
612; X64-AVX512DQ-NEXT:    vrangess $6, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x51,0xc1,0x06]
613; X64-AVX512DQ-NEXT:    vaddps %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x58,0xc2]
614; X64-AVX512DQ-NEXT:    retq # encoding: [0xc3]
615;
616; X64-AVX512DQVL-LABEL: test_int_x86_avx512_mask_range_ss:
617; X64-AVX512DQVL:       # %bb.0:
618; X64-AVX512DQVL-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
619; X64-AVX512DQVL-NEXT:    vrangess $4, {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x19,0x51,0xd1,0x04]
620; X64-AVX512DQVL-NEXT:    vrangess $5, {sae}, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x18,0x51,0xd9,0x05]
621; X64-AVX512DQVL-NEXT:    vaddps %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xd3]
622; X64-AVX512DQVL-NEXT:    vrangess $6, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x51,0xc1,0x06]
623; X64-AVX512DQVL-NEXT:    vaddps %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc2]
624; X64-AVX512DQVL-NEXT:    retq # encoding: [0xc3]
625  %res = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 8)
626  %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 5, i32 8)
627  %res2 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 6, i32 4)
628  %res3 = fadd <4 x float> %res, %res1
629  %res4 = fadd <4 x float> %res2, %res3
630  ret <4 x float> %res4
631}
632
633declare <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32)
634
635define <2 x double>@test_int_x86_avx512_mask_reduce_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
636; X86-AVX512DQ-LABEL: test_int_x86_avx512_mask_reduce_sd:
637; X86-AVX512DQ:       # %bb.0:
638; X86-AVX512DQ-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
639; X86-AVX512DQ-NEXT:    vreducesd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x57,0xd1,0x04]
640; X86-AVX512DQ-NEXT:    vreducesd $4, {sae}, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x18,0x57,0xc1,0x04]
641; X86-AVX512DQ-NEXT:    vaddpd %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0x58,0xc0]
642; X86-AVX512DQ-NEXT:    retl # encoding: [0xc3]
643;
644; X86-AVX512DQVL-LABEL: test_int_x86_avx512_mask_reduce_sd:
645; X86-AVX512DQVL:       # %bb.0:
646; X86-AVX512DQVL-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
647; X86-AVX512DQVL-NEXT:    vreducesd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x57,0xd1,0x04]
648; X86-AVX512DQVL-NEXT:    vreducesd $4, {sae}, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x18,0x57,0xc1,0x04]
649; X86-AVX512DQVL-NEXT:    vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
650; X86-AVX512DQVL-NEXT:    retl # encoding: [0xc3]
651;
652; X64-AVX512DQ-LABEL: test_int_x86_avx512_mask_reduce_sd:
653; X64-AVX512DQ:       # %bb.0:
654; X64-AVX512DQ-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
655; X64-AVX512DQ-NEXT:    vreducesd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x57,0xd1,0x04]
656; X64-AVX512DQ-NEXT:    vreducesd $4, {sae}, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x18,0x57,0xc1,0x04]
657; X64-AVX512DQ-NEXT:    vaddpd %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0x58,0xc0]
658; X64-AVX512DQ-NEXT:    retq # encoding: [0xc3]
659;
660; X64-AVX512DQVL-LABEL: test_int_x86_avx512_mask_reduce_sd:
661; X64-AVX512DQVL:       # %bb.0:
662; X64-AVX512DQVL-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
663; X64-AVX512DQVL-NEXT:    vreducesd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x57,0xd1,0x04]
664; X64-AVX512DQVL-NEXT:    vreducesd $4, {sae}, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x18,0x57,0xc1,0x04]
665; X64-AVX512DQVL-NEXT:    vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
666; X64-AVX512DQVL-NEXT:    retq # encoding: [0xc3]
667  %res = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4)
668  %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8)
669  %res2 = fadd <2 x double> %res, %res1
670  ret <2 x double> %res2
671}
672
673declare <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32)
674
675define <2 x double>@test_int_x86_avx512_mask_range_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
676; X86-AVX512DQ-LABEL: test_int_x86_avx512_mask_range_sd:
677; X86-AVX512DQ:       # %bb.0:
678; X86-AVX512DQ-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
679; X86-AVX512DQ-NEXT:    vrangesd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x51,0xd1,0x04]
680; X86-AVX512DQ-NEXT:    vrangesd $5, {sae}, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x18,0x51,0xd9,0x05]
681; X86-AVX512DQ-NEXT:    vaddpd %xmm3, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x58,0xd3]
682; X86-AVX512DQ-NEXT:    vrangesd $6, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x51,0xc1,0x06]
683; X86-AVX512DQ-NEXT:    vaddpd %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc2]
684; X86-AVX512DQ-NEXT:    retl # encoding: [0xc3]
685;
686; X86-AVX512DQVL-LABEL: test_int_x86_avx512_mask_range_sd:
687; X86-AVX512DQVL:       # %bb.0:
688; X86-AVX512DQVL-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
689; X86-AVX512DQVL-NEXT:    vrangesd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x51,0xd1,0x04]
690; X86-AVX512DQVL-NEXT:    vrangesd $5, {sae}, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x18,0x51,0xd9,0x05]
691; X86-AVX512DQVL-NEXT:    vaddpd %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xd3]
692; X86-AVX512DQVL-NEXT:    vrangesd $6, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x51,0xc1,0x06]
693; X86-AVX512DQVL-NEXT:    vaddpd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc2]
694; X86-AVX512DQVL-NEXT:    retl # encoding: [0xc3]
695;
696; X64-AVX512DQ-LABEL: test_int_x86_avx512_mask_range_sd:
697; X64-AVX512DQ:       # %bb.0:
698; X64-AVX512DQ-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
699; X64-AVX512DQ-NEXT:    vrangesd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x51,0xd1,0x04]
700; X64-AVX512DQ-NEXT:    vrangesd $5, {sae}, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x18,0x51,0xd9,0x05]
701; X64-AVX512DQ-NEXT:    vaddpd %xmm3, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x58,0xd3]
702; X64-AVX512DQ-NEXT:    vrangesd $6, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x51,0xc1,0x06]
703; X64-AVX512DQ-NEXT:    vaddpd %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc2]
704; X64-AVX512DQ-NEXT:    retq # encoding: [0xc3]
705;
706; X64-AVX512DQVL-LABEL: test_int_x86_avx512_mask_range_sd:
707; X64-AVX512DQVL:       # %bb.0:
708; X64-AVX512DQVL-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
709; X64-AVX512DQVL-NEXT:    vrangesd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x51,0xd1,0x04]
710; X64-AVX512DQVL-NEXT:    vrangesd $5, {sae}, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x18,0x51,0xd9,0x05]
711; X64-AVX512DQVL-NEXT:    vaddpd %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xd3]
712; X64-AVX512DQVL-NEXT:    vrangesd $6, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x51,0xc1,0x06]
713; X64-AVX512DQVL-NEXT:    vaddpd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc2]
714; X64-AVX512DQVL-NEXT:    retq # encoding: [0xc3]
715  %res = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4)
716  %res1 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 5, i32 8)
717  %res2 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 6, i32 4)
718  %res3 = fadd <2 x double> %res, %res1
719  %res4 = fadd <2 x double> %res2, %res3
720  ret <2 x double> %res4
721}
722
723declare <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double>, i32)
724
725define i8 @test_int_x86_avx512_fpclass_pd_512(<8 x double> %x0) {
726; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_512:
727; CHECK:       # %bb.0:
728; CHECK-NEXT:    vfpclasspd $2, %zmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x48,0x66,0xc8,0x02]
729; CHECK-NEXT:    vfpclasspd $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x66,0xc0,0x04]
730; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
731; CHECK-NEXT:    # kill: def $al killed $al killed $eax
732; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
733; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
734  %res = call <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double> %x0, i32 4)
735  %res1 = call <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double> %x0, i32 2)
736  %1 = and <8 x i1> %res1, %res
737  %2 = bitcast <8 x i1> %1 to i8
738  ret i8 %2
739}
740declare <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float>, i32)
741
742define i16@test_int_x86_avx512_fpclass_ps_512(<16 x float> %x0) {
743; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_512:
744; CHECK:       # %bb.0:
745; CHECK-NEXT:    vfpclassps $2, %zmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x48,0x66,0xc8,0x02]
746; CHECK-NEXT:    vfpclassps $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x66,0xc0,0x04]
747; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
748; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
749; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
750; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
751  %res = call <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float> %x0, i32 4)
752  %res1 = call <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float> %x0, i32 2)
753  %1 = and <16 x i1> %res1, %res
754  %2 = bitcast <16 x i1> %1 to i16
755  ret i16 %2
756}
757
758declare i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double>, i32, i8)
759
760define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0) {
761; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_sd:
762; CHECK:       # %bb.0:
763; CHECK-NEXT:    vfpclasssd $4, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x67,0xc8,0x04]
764; CHECK-NEXT:    vfpclasssd $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x67,0xc0,0x02]
765; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
766; CHECK-NEXT:    # kill: def $al killed $al killed $eax
767; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
768  %res = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 2, i8 -1)
769  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 4, i8 %res)
770  ret i8 %res1
771}
772
773define i8 @test_int_x86_avx512_mask_fpclass_sd_load(<2 x double>* %x0ptr) {
774; X86-LABEL: test_int_x86_avx512_mask_fpclass_sd_load:
775; X86:       # %bb.0:
776; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
777; X86-NEXT:    vfpclasssd $4, (%eax), %k0 # encoding: [0x62,0xf3,0xfd,0x08,0x67,0x00,0x04]
778; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
779; X86-NEXT:    # kill: def $al killed $al killed $eax
780; X86-NEXT:    retl # encoding: [0xc3]
781;
782; X64-LABEL: test_int_x86_avx512_mask_fpclass_sd_load:
783; X64:       # %bb.0:
784; X64-NEXT:    vfpclasssd $4, (%rdi), %k0 # encoding: [0x62,0xf3,0xfd,0x08,0x67,0x07,0x04]
785; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
786; X64-NEXT:    # kill: def $al killed $al killed $eax
787; X64-NEXT:    retq # encoding: [0xc3]
788  %x0 = load <2 x double>, <2 x double>* %x0ptr
789  %res = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 4, i8 -1)
790  ret i8 %res
791}
792
793declare i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float>, i32, i8)
794
795define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0) {
796; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ss:
797; CHECK:       # %bb.0:
798; CHECK-NEXT:    vfpclassss $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x67,0xc8,0x04]
799; CHECK-NEXT:    vfpclassss $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x67,0xc0,0x02]
800; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
801; CHECK-NEXT:    # kill: def $al killed $al killed $eax
802; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
803  %res = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 2, i8 -1)
804  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 %res)
805  ret i8 %res1
806}
807
808define i8 @test_int_x86_avx512_mask_fpclass_ss_load(<4 x float>* %x0ptr, i8 %x1) {
809; X86-LABEL: test_int_x86_avx512_mask_fpclass_ss_load:
810; X86:       # %bb.0:
811; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
812; X86-NEXT:    vfpclassss $4, (%eax), %k0 # encoding: [0x62,0xf3,0x7d,0x08,0x67,0x00,0x04]
813; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
814; X86-NEXT:    # kill: def $al killed $al killed $eax
815; X86-NEXT:    retl # encoding: [0xc3]
816;
817; X64-LABEL: test_int_x86_avx512_mask_fpclass_ss_load:
818; X64:       # %bb.0:
819; X64-NEXT:    vfpclassss $4, (%rdi), %k0 # encoding: [0x62,0xf3,0x7d,0x08,0x67,0x07,0x04]
820; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
821; X64-NEXT:    # kill: def $al killed $al killed $eax
822; X64-NEXT:    retq # encoding: [0xc3]
823  %x0 = load <4 x float>, <4 x float>* %x0ptr
824  %res = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 -1)
825  ret i8 %res
826}
827