1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,X86-AVX512DQ 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,X86-AVX512DQVL 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,X64-AVX512DQ 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,X64-AVX512DQVL 6 7define i32 @test_int_x86_avx512_kadd_w(<16 x i32> %A, <16 x i32> %B) nounwind { 8; CHECK-LABEL: test_int_x86_avx512_kadd_w: 9; CHECK: # %bb.0: # %entry 10; CHECK-NEXT: vptestmd %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0] 11; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x27,0xc9] 12; CHECK-NEXT: kaddw %k1, %k0, %k0 # encoding: [0xc5,0xfc,0x4a,0xc1] 13; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 14; CHECK-NEXT: kortestw %k0, %k0 # encoding: [0xc5,0xf8,0x98,0xc0] 15; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] 16; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 17; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 18entry: 19 %0 = icmp ne <16 x i32> %A, zeroinitializer 20 %1 = icmp ne <16 x i32> %B, zeroinitializer 21 %2 = call <16 x i1> @llvm.x86.avx512.kadd.w(<16 x i1> %0, <16 x i1> %1) 22 %3 = bitcast <16 x i1> %2 to i16 23 %4 = icmp eq i16 %3, 0 24 %5 = zext i1 %4 to i32 25 ret i32 %5 26} 27declare <16 x i1> @llvm.x86.avx512.kadd.w(<16 x i1>, <16 x i1>) 28 29define i32 @test_int_x86_avx512_kadd_b(<8 x i64> %A, <8 x i64> %B) nounwind { 30; CHECK-LABEL: test_int_x86_avx512_kadd_b: 31; CHECK: # %bb.0: # %entry 32; CHECK-NEXT: vptestmq %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc0] 33; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x27,0xc9] 34; CHECK-NEXT: kaddb %k1, %k0, %k0 # encoding: [0xc5,0xfd,0x4a,0xc1] 35; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 36; CHECK-NEXT: kortestb %k0, %k0 # encoding: [0xc5,0xf9,0x98,0xc0] 37; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] 38; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 39; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 40entry: 41 %0 = icmp ne <8 x i64> %A, zeroinitializer 42 %1 = icmp ne <8 x i64> %B, zeroinitializer 43 %2 = call <8 x i1> @llvm.x86.avx512.kadd.b(<8 x i1> %0, <8 x i1> %1) 44 %3 = bitcast <8 x i1> %2 to i8 45 %4 = icmp eq i8 %3, 0 46 %5 = zext i1 %4 to i32 47 ret i32 %5 48} 49declare <8 x i1> @llvm.x86.avx512.kadd.b(<8 x i1>, <8 x i1>) 50 51define i32 @test_x86_avx512_ktestc_w(<16 x i32> %A, <16 x i32> %B) { 52; CHECK-LABEL: test_x86_avx512_ktestc_w: 53; CHECK: # %bb.0: 54; CHECK-NEXT: vptestmd %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0] 55; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x27,0xc9] 56; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 57; CHECK-NEXT: ktestw %k1, %k0 # encoding: [0xc5,0xf8,0x99,0xc1] 58; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0] 59; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 60; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 61 %1 = icmp ne <16 x i32> %A, zeroinitializer 62 %2 = icmp ne <16 x i32> %B, zeroinitializer 63 %res = call i32 @llvm.x86.avx512.ktestc.w(<16 x i1> %1, <16 x i1> %2) ; <i32> [#uses=1] 64 ret i32 %res 65} 66declare i32 @llvm.x86.avx512.ktestc.w(<16 x i1>, <16 x i1>) nounwind readnone 67 68define i32 @test_x86_avx512_ktestz_w(<16 x i32> %A, <16 x i32> %B) { 69; CHECK-LABEL: test_x86_avx512_ktestz_w: 70; CHECK: # %bb.0: 71; CHECK-NEXT: vptestmd %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0] 72; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x27,0xc9] 73; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 74; CHECK-NEXT: ktestw %k1, %k0 # encoding: [0xc5,0xf8,0x99,0xc1] 75; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] 76; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 77; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 78 %1 = icmp ne <16 x i32> %A, zeroinitializer 79 %2 = icmp ne <16 x i32> %B, zeroinitializer 80 %res = call i32 @llvm.x86.avx512.ktestz.w(<16 x i1> %1, <16 x i1> %2) ; <i32> [#uses=1] 81 ret i32 %res 82} 83declare i32 @llvm.x86.avx512.ktestz.w(<16 x i1>, <16 x i1>) nounwind readnone 84 85define i32 @test_x86_avx512_ktestc_b(<8 x i64> %A, <8 x i64> %B) { 86; CHECK-LABEL: test_x86_avx512_ktestc_b: 87; CHECK: # %bb.0: 88; CHECK-NEXT: vptestmq %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc0] 89; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x27,0xc9] 90; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 91; CHECK-NEXT: ktestb %k1, %k0 # encoding: [0xc5,0xf9,0x99,0xc1] 92; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0] 93; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 94; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 95 %1 = icmp ne <8 x i64> %A, zeroinitializer 96 %2 = icmp ne <8 x i64> %B, zeroinitializer 97 %res = call i32 @llvm.x86.avx512.ktestc.b(<8 x i1> %1, <8 x i1> %2) ; <i32> [#uses=1] 98 ret i32 %res 99} 100declare i32 @llvm.x86.avx512.ktestc.b(<8 x i1>, <8 x i1>) nounwind readnone 101 102define i32 @test_x86_avx512_ktestz_b(<8 x i64> %A, <8 x i64> %B) { 103; CHECK-LABEL: test_x86_avx512_ktestz_b: 104; CHECK: # %bb.0: 105; CHECK-NEXT: vptestmq %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc0] 106; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x27,0xc9] 107; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 108; CHECK-NEXT: ktestb %k1, %k0 # encoding: [0xc5,0xf9,0x99,0xc1] 109; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] 110; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 111; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 112 %1 = icmp ne <8 x i64> %A, zeroinitializer 113 %2 = icmp ne <8 x i64> %B, zeroinitializer 114 %res = call i32 @llvm.x86.avx512.ktestz.b(<8 x i1> %1, <8 x i1> %2) ; <i32> [#uses=1] 115 ret i32 %res 116} 117declare i32 @llvm.x86.avx512.ktestz.b(<8 x i1>, <8 x i1>) nounwind readnone 118 119declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double>, <8 x i64>, i8, i32) 120 121define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) { 122; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_512: 123; X86: # %bb.0: 124; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 125; X86-NEXT: vcvtpd2qq {ru-sae}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x59,0x7b,0xc8] 126; X86-NEXT: vcvtpd2qq {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x7b,0xc0] 127; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 128; X86-NEXT: retl # encoding: [0xc3] 129; 130; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_512: 131; X64: # %bb.0: 132; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 133; X64-NEXT: vcvtpd2qq {ru-sae}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x59,0x7b,0xc8] 134; X64-NEXT: vcvtpd2qq {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x7b,0xc0] 135; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 136; X64-NEXT: retq # encoding: [0xc3] 137 %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 10) 138 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8) 139 %res2 = add <8 x i64> %res, %res1 140 ret <8 x i64> %res2 141} 142 143declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double>, <8 x i64>, i8, i32) 144 145define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) { 146; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_512: 147; X86: # %bb.0: 148; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 149; X86-NEXT: vcvtpd2uqq {ru-sae}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x59,0x79,0xc8] 150; X86-NEXT: vcvtpd2uqq {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x79,0xc0] 151; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 152; X86-NEXT: retl # encoding: [0xc3] 153; 154; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_512: 155; X64: # %bb.0: 156; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 157; X64-NEXT: vcvtpd2uqq {ru-sae}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x59,0x79,0xc8] 158; X64-NEXT: vcvtpd2uqq {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x79,0xc0] 159; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 160; X64-NEXT: retq # encoding: [0xc3] 161 %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 10) 162 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8) 163 %res2 = add <8 x i64> %res, %res1 164 ret <8 x i64> %res2 165} 166 167declare <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float>, <8 x i64>, i8, i32) 168 169define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) { 170; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_512: 171; X86: # %bb.0: 172; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 173; X86-NEXT: vcvtps2qq {ru-sae}, %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x7b,0xc8] 174; X86-NEXT: vcvtps2qq {rn-sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x7b,0xc0] 175; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 176; X86-NEXT: retl # encoding: [0xc3] 177; 178; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_512: 179; X64: # %bb.0: 180; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 181; X64-NEXT: vcvtps2qq {ru-sae}, %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x7b,0xc8] 182; X64-NEXT: vcvtps2qq {rn-sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x7b,0xc0] 183; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 184; X64-NEXT: retq # encoding: [0xc3] 185 %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 10) 186 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8) 187 %res2 = add <8 x i64> %res, %res1 188 ret <8 x i64> %res2 189} 190 191declare <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float>, <8 x i64>, i8, i32) 192 193define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) { 194; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_512: 195; X86: # %bb.0: 196; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 197; X86-NEXT: vcvtps2uqq {ru-sae}, %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x79,0xc8] 198; X86-NEXT: vcvtps2uqq {rn-sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x79,0xc0] 199; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 200; X86-NEXT: retl # encoding: [0xc3] 201; 202; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_512: 203; X64: # %bb.0: 204; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 205; X64-NEXT: vcvtps2uqq {ru-sae}, %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x79,0xc8] 206; X64-NEXT: vcvtps2uqq {rn-sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x79,0xc0] 207; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 208; X64-NEXT: retq # encoding: [0xc3] 209 %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 10) 210 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8) 211 %res2 = add <8 x i64> %res, %res1 212 ret <8 x i64> %res2 213} 214 215declare <8 x double> @llvm.x86.avx512.sitofp.round.v8f64.v8i64(<8 x i64>, i32) 216 217define <8 x double>@test_int_x86_avx512_mask_cvt_qq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) { 218; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_512: 219; X86: # %bb.0: 220; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 221; X86-NEXT: vcvtqq2pd %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x49,0xe6,0xc8] 222; X86-NEXT: vcvtqq2pd {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfe,0x18,0xe6,0xc0] 223; X86-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0] 224; X86-NEXT: retl # encoding: [0xc3] 225; 226; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_512: 227; X64: # %bb.0: 228; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 229; X64-NEXT: vcvtqq2pd %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x49,0xe6,0xc8] 230; X64-NEXT: vcvtqq2pd {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfe,0x18,0xe6,0xc0] 231; X64-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0] 232; X64-NEXT: retq # encoding: [0xc3] 233 %cvt = sitofp <8 x i64> %x0 to <8 x double> 234 %1 = bitcast i8 %x2 to <8 x i1> 235 %2 = select <8 x i1> %1, <8 x double> %cvt, <8 x double> %x1 236 %3 = call <8 x double> @llvm.x86.avx512.sitofp.round.v8f64.v8i64(<8 x i64> %x0, i32 8) 237 %res2 = fadd <8 x double> %2, %3 238 ret <8 x double> %res2 239} 240 241declare <8 x float> @llvm.x86.avx512.sitofp.round.v8f32.v8i64(<8 x i64>, i32) 242 243define <8 x float>@test_int_x86_avx512_mask_cvt_qq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) { 244; X86-AVX512DQ-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512: 245; X86-AVX512DQ: # %bb.0: 246; X86-AVX512DQ-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 247; X86-AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x49,0x5b,0xc8] 248; X86-AVX512DQ-NEXT: vcvtqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xfc,0x18,0x5b,0xc0] 249; X86-AVX512DQ-NEXT: vaddps %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf4,0x58,0xc0] 250; X86-AVX512DQ-NEXT: retl # encoding: [0xc3] 251; 252; X86-AVX512DQVL-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512: 253; X86-AVX512DQVL: # %bb.0: 254; X86-AVX512DQVL-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 255; X86-AVX512DQVL-NEXT: vcvtqq2ps %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x49,0x5b,0xc8] 256; X86-AVX512DQVL-NEXT: vcvtqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xfc,0x18,0x5b,0xc0] 257; X86-AVX512DQVL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] 258; X86-AVX512DQVL-NEXT: retl # encoding: [0xc3] 259; 260; X64-AVX512DQ-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512: 261; X64-AVX512DQ: # %bb.0: 262; X64-AVX512DQ-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 263; X64-AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x49,0x5b,0xc8] 264; X64-AVX512DQ-NEXT: vcvtqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xfc,0x18,0x5b,0xc0] 265; X64-AVX512DQ-NEXT: vaddps %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf4,0x58,0xc0] 266; X64-AVX512DQ-NEXT: retq # encoding: [0xc3] 267; 268; X64-AVX512DQVL-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512: 269; X64-AVX512DQVL: # %bb.0: 270; X64-AVX512DQVL-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 271; X64-AVX512DQVL-NEXT: vcvtqq2ps %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x49,0x5b,0xc8] 272; X64-AVX512DQVL-NEXT: vcvtqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xfc,0x18,0x5b,0xc0] 273; X64-AVX512DQVL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] 274; X64-AVX512DQVL-NEXT: retq # encoding: [0xc3] 275 %cvt = sitofp <8 x i64> %x0 to <8 x float> 276 %1 = bitcast i8 %x2 to <8 x i1> 277 %2 = select <8 x i1> %1, <8 x float> %cvt, <8 x float> %x1 278 %3 = call <8 x float> @llvm.x86.avx512.sitofp.round.v8f32.v8i64(<8 x i64> %x0, i32 8) 279 %res2 = fadd <8 x float> %2, %3 280 ret <8 x float> %res2 281} 282 283declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double>, <8 x i64>, i8, i32) 284 285define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) { 286; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_512: 287; X86: # %bb.0: 288; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 289; X86-NEXT: vcvttpd2qq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x7a,0xc8] 290; X86-NEXT: vcvttpd2qq {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x7a,0xc0] 291; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 292; X86-NEXT: retl # encoding: [0xc3] 293; 294; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_512: 295; X64: # %bb.0: 296; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 297; X64-NEXT: vcvttpd2qq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x7a,0xc8] 298; X64-NEXT: vcvttpd2qq {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x7a,0xc0] 299; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 300; X64-NEXT: retq # encoding: [0xc3] 301 %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4) 302 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8) 303 %res2 = add <8 x i64> %res, %res1 304 ret <8 x i64> %res2 305} 306 307declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double>, <8 x i64>, i8, i32) 308 309define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) { 310; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_512: 311; X86: # %bb.0: 312; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 313; X86-NEXT: vcvttpd2uqq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x78,0xc8] 314; X86-NEXT: vcvttpd2uqq {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x78,0xc0] 315; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 316; X86-NEXT: retl # encoding: [0xc3] 317; 318; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_512: 319; X64: # %bb.0: 320; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 321; X64-NEXT: vcvttpd2uqq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x78,0xc8] 322; X64-NEXT: vcvttpd2uqq {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x78,0xc0] 323; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 324; X64-NEXT: retq # encoding: [0xc3] 325 %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4) 326 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8) 327 %res2 = add <8 x i64> %res, %res1 328 ret <8 x i64> %res2 329} 330 331declare <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float>, <8 x i64>, i8, i32) 332 333define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) { 334; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_512: 335; X86: # %bb.0: 336; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 337; X86-NEXT: vcvttps2qq %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x7a,0xc8] 338; X86-NEXT: vcvttps2qq {sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x7a,0xc0] 339; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 340; X86-NEXT: retl # encoding: [0xc3] 341; 342; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_512: 343; X64: # %bb.0: 344; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 345; X64-NEXT: vcvttps2qq %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x7a,0xc8] 346; X64-NEXT: vcvttps2qq {sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x7a,0xc0] 347; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 348; X64-NEXT: retq # encoding: [0xc3] 349 %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4) 350 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8) 351 %res2 = add <8 x i64> %res, %res1 352 ret <8 x i64> %res2 353} 354 355declare <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float>, <8 x i64>, i8, i32) 356 357define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) { 358; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_512: 359; X86: # %bb.0: 360; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 361; X86-NEXT: vcvttps2uqq %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x78,0xc8] 362; X86-NEXT: vcvttps2uqq {sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x78,0xc0] 363; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 364; X86-NEXT: retl # encoding: [0xc3] 365; 366; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_512: 367; X64: # %bb.0: 368; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 369; X64-NEXT: vcvttps2uqq %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x78,0xc8] 370; X64-NEXT: vcvttps2uqq {sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x78,0xc0] 371; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 372; X64-NEXT: retq # encoding: [0xc3] 373 %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4) 374 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8) 375 %res2 = add <8 x i64> %res, %res1 376 ret <8 x i64> %res2 377} 378 379declare <8 x double> @llvm.x86.avx512.uitofp.round.v8f64.v8i64(<8 x i64>, i32) 380 381define <8 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) { 382; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_512: 383; X86: # %bb.0: 384; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 385; X86-NEXT: vcvtuqq2pd %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x49,0x7a,0xc8] 386; X86-NEXT: vcvtuqq2pd {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfe,0x18,0x7a,0xc0] 387; X86-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0] 388; X86-NEXT: retl # encoding: [0xc3] 389; 390; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_512: 391; X64: # %bb.0: 392; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 393; X64-NEXT: vcvtuqq2pd %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x49,0x7a,0xc8] 394; X64-NEXT: vcvtuqq2pd {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfe,0x18,0x7a,0xc0] 395; X64-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0] 396; X64-NEXT: retq # encoding: [0xc3] 397 %cvt = uitofp <8 x i64> %x0 to <8 x double> 398 %1 = bitcast i8 %x2 to <8 x i1> 399 %2 = select <8 x i1> %1, <8 x double> %cvt, <8 x double> %x1 400 %3 = call <8 x double> @llvm.x86.avx512.uitofp.round.v8f64.v8i64(<8 x i64> %x0, i32 8) 401 %res2 = fadd <8 x double> %2, %3 402 ret <8 x double> %res2 403} 404 405declare <8 x float> @llvm.x86.avx512.uitofp.round.v8f32.v8i64(<8 x i64>, i32) 406 407define <8 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) { 408; X86-AVX512DQ-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512: 409; X86-AVX512DQ: # %bb.0: 410; X86-AVX512DQ-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 411; X86-AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x7a,0xc8] 412; X86-AVX512DQ-NEXT: vcvtuqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xff,0x18,0x7a,0xc0] 413; X86-AVX512DQ-NEXT: vaddps %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf4,0x58,0xc0] 414; X86-AVX512DQ-NEXT: retl # encoding: [0xc3] 415; 416; X86-AVX512DQVL-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512: 417; X86-AVX512DQVL: # %bb.0: 418; X86-AVX512DQVL-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 419; X86-AVX512DQVL-NEXT: vcvtuqq2ps %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x7a,0xc8] 420; X86-AVX512DQVL-NEXT: vcvtuqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xff,0x18,0x7a,0xc0] 421; X86-AVX512DQVL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] 422; X86-AVX512DQVL-NEXT: retl # encoding: [0xc3] 423; 424; X64-AVX512DQ-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512: 425; X64-AVX512DQ: # %bb.0: 426; X64-AVX512DQ-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 427; X64-AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x7a,0xc8] 428; X64-AVX512DQ-NEXT: vcvtuqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xff,0x18,0x7a,0xc0] 429; X64-AVX512DQ-NEXT: vaddps %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf4,0x58,0xc0] 430; X64-AVX512DQ-NEXT: retq # encoding: [0xc3] 431; 432; X64-AVX512DQVL-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512: 433; X64-AVX512DQVL: # %bb.0: 434; X64-AVX512DQVL-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 435; X64-AVX512DQVL-NEXT: vcvtuqq2ps %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x7a,0xc8] 436; X64-AVX512DQVL-NEXT: vcvtuqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xff,0x18,0x7a,0xc0] 437; X64-AVX512DQVL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] 438; X64-AVX512DQVL-NEXT: retq # encoding: [0xc3] 439 %cvt = uitofp <8 x i64> %x0 to <8 x float> 440 %1 = bitcast i8 %x2 to <8 x i1> 441 %2 = select <8 x i1> %1, <8 x float> %cvt, <8 x float> %x1 442 %3 = call <8 x float> @llvm.x86.avx512.uitofp.round.v8f32.v8i64(<8 x i64> %x0, i32 8) 443 %res2 = fadd <8 x float> %2, %3 444 ret <8 x float> %res2 445} 446 447declare <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double>, i32, <8 x double>, i8, i32) 448 449define <8 x double>@test_int_x86_avx512_mask_reduce_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) { 450; X86-LABEL: test_int_x86_avx512_mask_reduce_pd_512: 451; X86: # %bb.0: 452; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 453; X86-NEXT: vreducepd $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x56,0xc8,0x08] 454; X86-NEXT: vreducepd $4, {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x18,0x56,0xc0,0x04] 455; X86-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0] 456; X86-NEXT: retl # encoding: [0xc3] 457; 458; X64-LABEL: test_int_x86_avx512_mask_reduce_pd_512: 459; X64: # %bb.0: 460; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 461; X64-NEXT: vreducepd $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x56,0xc8,0x08] 462; X64-NEXT: vreducepd $4, {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x18,0x56,0xc0,0x04] 463; X64-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0] 464; X64-NEXT: retq # encoding: [0xc3] 465 %res = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 8, <8 x double> %x2, i8 %x3, i32 4) 466 %res1 = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 4, <8 x double> %x2, i8 -1, i32 8) 467 %res2 = fadd <8 x double> %res, %res1 468 ret <8 x double> %res2 469} 470 471declare <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float>, i32, <16 x float>, i16, i32) 472 473define <16 x float>@test_int_x86_avx512_mask_reduce_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) { 474; X86-LABEL: test_int_x86_avx512_mask_reduce_ps_512: 475; X86: # %bb.0: 476; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 477; X86-NEXT: vreduceps $44, {sae}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x19,0x56,0xc8,0x2c] 478; X86-NEXT: vreduceps $11, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x56,0xc0,0x0b] 479; X86-NEXT: vaddps %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0] 480; X86-NEXT: retl # encoding: [0xc3] 481; 482; X64-LABEL: test_int_x86_avx512_mask_reduce_ps_512: 483; X64: # %bb.0: 484; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 485; X64-NEXT: vreduceps $44, {sae}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x19,0x56,0xc8,0x2c] 486; X64-NEXT: vreduceps $11, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x56,0xc0,0x0b] 487; X64-NEXT: vaddps %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0] 488; X64-NEXT: retq # encoding: [0xc3] 489 %res = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 44, <16 x float> %x2, i16 %x3, i32 8) 490 %res1 = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 4) 491 %res2 = fadd <16 x float> %res, %res1 492 ret <16 x float> %res2 493} 494 495declare <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8, i32) 496 497define <8 x double>@test_int_x86_avx512_mask_range_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) { 498; X86-LABEL: test_int_x86_avx512_mask_range_pd_512: 499; X86: # %bb.0: 500; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 501; X86-NEXT: vrangepd $8, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x50,0xd1,0x08] 502; X86-NEXT: vrangepd $4, {sae}, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x18,0x50,0xc1,0x04] 503; X86-NEXT: vaddpd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0] 504; X86-NEXT: retl # encoding: [0xc3] 505; 506; X64-LABEL: test_int_x86_avx512_mask_range_pd_512: 507; X64: # %bb.0: 508; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 509; X64-NEXT: vrangepd $8, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x50,0xd1,0x08] 510; X64-NEXT: vrangepd $4, {sae}, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x18,0x50,0xc1,0x04] 511; X64-NEXT: vaddpd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0] 512; X64-NEXT: retq # encoding: [0xc3] 513 %res = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 8, <8 x double> %x3, i8 %x4, i32 4) 514 %res1 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 4, <8 x double> %x3, i8 -1, i32 8) 515 %res2 = fadd <8 x double> %res, %res1 516 ret <8 x double> %res2 517} 518 519declare <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16, i32) 520 521define <16 x float>@test_int_x86_avx512_mask_range_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) { 522; X86-LABEL: test_int_x86_avx512_mask_range_ps_512: 523; X86: # %bb.0: 524; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 525; X86-NEXT: vrangeps $88, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x50,0xd1,0x58] 526; X86-NEXT: vrangeps $4, {sae}, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x18,0x50,0xc1,0x04] 527; X86-NEXT: vaddps %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0] 528; X86-NEXT: retl # encoding: [0xc3] 529; 530; X64-LABEL: test_int_x86_avx512_mask_range_ps_512: 531; X64: # %bb.0: 532; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 533; X64-NEXT: vrangeps $88, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x50,0xd1,0x58] 534; X64-NEXT: vrangeps $4, {sae}, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x18,0x50,0xc1,0x04] 535; X64-NEXT: vaddps %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0] 536; X64-NEXT: retq # encoding: [0xc3] 537 %res = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 88, <16 x float> %x3, i16 %x4, i32 4) 538 %res1 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 4, <16 x float> %x3, i16 -1, i32 8) 539 %res2 = fadd <16 x float> %res, %res1 540 ret <16 x float> %res2 541} 542 543declare <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32) 544 545define <4 x float>@test_int_x86_avx512_mask_reduce_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { 546; X86-AVX512DQ-LABEL: test_int_x86_avx512_mask_reduce_ss: 547; X86-AVX512DQ: # %bb.0: 548; X86-AVX512DQ-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 549; X86-AVX512DQ-NEXT: vreducess $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x57,0xd1,0x04] 550; X86-AVX512DQ-NEXT: vreducess $4, {sae}, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x18,0x57,0xc1,0x04] 551; X86-AVX512DQ-NEXT: vaddps %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe8,0x58,0xc0] 552; X86-AVX512DQ-NEXT: retl # encoding: [0xc3] 553; 554; X86-AVX512DQVL-LABEL: test_int_x86_avx512_mask_reduce_ss: 555; X86-AVX512DQVL: # %bb.0: 556; X86-AVX512DQVL-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 557; X86-AVX512DQVL-NEXT: vreducess $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x57,0xd1,0x04] 558; X86-AVX512DQVL-NEXT: vreducess $4, {sae}, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x18,0x57,0xc1,0x04] 559; X86-AVX512DQVL-NEXT: vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0] 560; X86-AVX512DQVL-NEXT: retl # encoding: [0xc3] 561; 562; X64-AVX512DQ-LABEL: test_int_x86_avx512_mask_reduce_ss: 563; X64-AVX512DQ: # %bb.0: 564; X64-AVX512DQ-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 565; X64-AVX512DQ-NEXT: vreducess $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x57,0xd1,0x04] 566; X64-AVX512DQ-NEXT: vreducess $4, {sae}, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x18,0x57,0xc1,0x04] 567; X64-AVX512DQ-NEXT: vaddps %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe8,0x58,0xc0] 568; X64-AVX512DQ-NEXT: retq # encoding: [0xc3] 569; 570; X64-AVX512DQVL-LABEL: test_int_x86_avx512_mask_reduce_ss: 571; X64-AVX512DQVL: # %bb.0: 572; X64-AVX512DQVL-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 573; X64-AVX512DQVL-NEXT: vreducess $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x57,0xd1,0x04] 574; X64-AVX512DQVL-NEXT: vreducess $4, {sae}, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x18,0x57,0xc1,0x04] 575; X64-AVX512DQVL-NEXT: vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0] 576; X64-AVX512DQVL-NEXT: retq # encoding: [0xc3] 577 %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 4) 578 %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8) 579 %res2 = fadd <4 x float> %res, %res1 580 ret <4 x float> %res2 581} 582 583declare <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32) 584 585define <4 x float>@test_int_x86_avx512_mask_range_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { 586; X86-AVX512DQ-LABEL: test_int_x86_avx512_mask_range_ss: 587; X86-AVX512DQ: # %bb.0: 588; X86-AVX512DQ-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 589; X86-AVX512DQ-NEXT: vrangess $4, {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x19,0x51,0xd1,0x04] 590; X86-AVX512DQ-NEXT: vrangess $5, {sae}, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x18,0x51,0xd9,0x05] 591; X86-AVX512DQ-NEXT: vaddps %xmm3, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x58,0xd3] 592; X86-AVX512DQ-NEXT: vrangess $6, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x51,0xc1,0x06] 593; X86-AVX512DQ-NEXT: vaddps %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x58,0xc2] 594; X86-AVX512DQ-NEXT: retl # encoding: [0xc3] 595; 596; X86-AVX512DQVL-LABEL: test_int_x86_avx512_mask_range_ss: 597; X86-AVX512DQVL: # %bb.0: 598; X86-AVX512DQVL-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 599; X86-AVX512DQVL-NEXT: vrangess $4, {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x19,0x51,0xd1,0x04] 600; X86-AVX512DQVL-NEXT: vrangess $5, {sae}, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x18,0x51,0xd9,0x05] 601; X86-AVX512DQVL-NEXT: vaddps %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xd3] 602; X86-AVX512DQVL-NEXT: vrangess $6, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x51,0xc1,0x06] 603; X86-AVX512DQVL-NEXT: vaddps %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc2] 604; X86-AVX512DQVL-NEXT: retl # encoding: [0xc3] 605; 606; X64-AVX512DQ-LABEL: test_int_x86_avx512_mask_range_ss: 607; X64-AVX512DQ: # %bb.0: 608; X64-AVX512DQ-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 609; X64-AVX512DQ-NEXT: vrangess $4, {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x19,0x51,0xd1,0x04] 610; X64-AVX512DQ-NEXT: vrangess $5, {sae}, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x18,0x51,0xd9,0x05] 611; X64-AVX512DQ-NEXT: vaddps %xmm3, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x58,0xd3] 612; X64-AVX512DQ-NEXT: vrangess $6, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x51,0xc1,0x06] 613; X64-AVX512DQ-NEXT: vaddps %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x58,0xc2] 614; X64-AVX512DQ-NEXT: retq # encoding: [0xc3] 615; 616; X64-AVX512DQVL-LABEL: test_int_x86_avx512_mask_range_ss: 617; X64-AVX512DQVL: # %bb.0: 618; X64-AVX512DQVL-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 619; X64-AVX512DQVL-NEXT: vrangess $4, {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x19,0x51,0xd1,0x04] 620; X64-AVX512DQVL-NEXT: vrangess $5, {sae}, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x18,0x51,0xd9,0x05] 621; X64-AVX512DQVL-NEXT: vaddps %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xd3] 622; X64-AVX512DQVL-NEXT: vrangess $6, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x51,0xc1,0x06] 623; X64-AVX512DQVL-NEXT: vaddps %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc2] 624; X64-AVX512DQVL-NEXT: retq # encoding: [0xc3] 625 %res = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 8) 626 %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 5, i32 8) 627 %res2 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 6, i32 4) 628 %res3 = fadd <4 x float> %res, %res1 629 %res4 = fadd <4 x float> %res2, %res3 630 ret <4 x float> %res4 631} 632 633declare <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32) 634 635define <2 x double>@test_int_x86_avx512_mask_reduce_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { 636; X86-AVX512DQ-LABEL: test_int_x86_avx512_mask_reduce_sd: 637; X86-AVX512DQ: # %bb.0: 638; X86-AVX512DQ-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 639; X86-AVX512DQ-NEXT: vreducesd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x57,0xd1,0x04] 640; X86-AVX512DQ-NEXT: vreducesd $4, {sae}, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x18,0x57,0xc1,0x04] 641; X86-AVX512DQ-NEXT: vaddpd %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0x58,0xc0] 642; X86-AVX512DQ-NEXT: retl # encoding: [0xc3] 643; 644; X86-AVX512DQVL-LABEL: test_int_x86_avx512_mask_reduce_sd: 645; X86-AVX512DQVL: # %bb.0: 646; X86-AVX512DQVL-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 647; X86-AVX512DQVL-NEXT: vreducesd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x57,0xd1,0x04] 648; X86-AVX512DQVL-NEXT: vreducesd $4, {sae}, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x18,0x57,0xc1,0x04] 649; X86-AVX512DQVL-NEXT: vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0] 650; X86-AVX512DQVL-NEXT: retl # encoding: [0xc3] 651; 652; X64-AVX512DQ-LABEL: test_int_x86_avx512_mask_reduce_sd: 653; X64-AVX512DQ: # %bb.0: 654; X64-AVX512DQ-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 655; X64-AVX512DQ-NEXT: vreducesd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x57,0xd1,0x04] 656; X64-AVX512DQ-NEXT: vreducesd $4, {sae}, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x18,0x57,0xc1,0x04] 657; X64-AVX512DQ-NEXT: vaddpd %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0x58,0xc0] 658; X64-AVX512DQ-NEXT: retq # encoding: [0xc3] 659; 660; X64-AVX512DQVL-LABEL: test_int_x86_avx512_mask_reduce_sd: 661; X64-AVX512DQVL: # %bb.0: 662; X64-AVX512DQVL-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 663; X64-AVX512DQVL-NEXT: vreducesd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x57,0xd1,0x04] 664; X64-AVX512DQVL-NEXT: vreducesd $4, {sae}, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x18,0x57,0xc1,0x04] 665; X64-AVX512DQVL-NEXT: vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0] 666; X64-AVX512DQVL-NEXT: retq # encoding: [0xc3] 667 %res = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4) 668 %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8) 669 %res2 = fadd <2 x double> %res, %res1 670 ret <2 x double> %res2 671} 672 673declare <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32) 674 675define <2 x double>@test_int_x86_avx512_mask_range_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { 676; X86-AVX512DQ-LABEL: test_int_x86_avx512_mask_range_sd: 677; X86-AVX512DQ: # %bb.0: 678; X86-AVX512DQ-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 679; X86-AVX512DQ-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x51,0xd1,0x04] 680; X86-AVX512DQ-NEXT: vrangesd $5, {sae}, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x18,0x51,0xd9,0x05] 681; X86-AVX512DQ-NEXT: vaddpd %xmm3, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x58,0xd3] 682; X86-AVX512DQ-NEXT: vrangesd $6, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x51,0xc1,0x06] 683; X86-AVX512DQ-NEXT: vaddpd %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc2] 684; X86-AVX512DQ-NEXT: retl # encoding: [0xc3] 685; 686; X86-AVX512DQVL-LABEL: test_int_x86_avx512_mask_range_sd: 687; X86-AVX512DQVL: # %bb.0: 688; X86-AVX512DQVL-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 689; X86-AVX512DQVL-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x51,0xd1,0x04] 690; X86-AVX512DQVL-NEXT: vrangesd $5, {sae}, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x18,0x51,0xd9,0x05] 691; X86-AVX512DQVL-NEXT: vaddpd %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xd3] 692; X86-AVX512DQVL-NEXT: vrangesd $6, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x51,0xc1,0x06] 693; X86-AVX512DQVL-NEXT: vaddpd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc2] 694; X86-AVX512DQVL-NEXT: retl # encoding: [0xc3] 695; 696; X64-AVX512DQ-LABEL: test_int_x86_avx512_mask_range_sd: 697; X64-AVX512DQ: # %bb.0: 698; X64-AVX512DQ-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 699; X64-AVX512DQ-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x51,0xd1,0x04] 700; X64-AVX512DQ-NEXT: vrangesd $5, {sae}, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x18,0x51,0xd9,0x05] 701; X64-AVX512DQ-NEXT: vaddpd %xmm3, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x58,0xd3] 702; X64-AVX512DQ-NEXT: vrangesd $6, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x51,0xc1,0x06] 703; X64-AVX512DQ-NEXT: vaddpd %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc2] 704; X64-AVX512DQ-NEXT: retq # encoding: [0xc3] 705; 706; X64-AVX512DQVL-LABEL: test_int_x86_avx512_mask_range_sd: 707; X64-AVX512DQVL: # %bb.0: 708; X64-AVX512DQVL-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 709; X64-AVX512DQVL-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x51,0xd1,0x04] 710; X64-AVX512DQVL-NEXT: vrangesd $5, {sae}, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x18,0x51,0xd9,0x05] 711; X64-AVX512DQVL-NEXT: vaddpd %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xd3] 712; X64-AVX512DQVL-NEXT: vrangesd $6, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x51,0xc1,0x06] 713; X64-AVX512DQVL-NEXT: vaddpd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc2] 714; X64-AVX512DQVL-NEXT: retq # encoding: [0xc3] 715 %res = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4) 716 %res1 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 5, i32 8) 717 %res2 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 6, i32 4) 718 %res3 = fadd <2 x double> %res, %res1 719 %res4 = fadd <2 x double> %res2, %res3 720 ret <2 x double> %res4 721} 722 723declare <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double>, i32) 724 725define i8 @test_int_x86_avx512_fpclass_pd_512(<8 x double> %x0) { 726; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_512: 727; CHECK: # %bb.0: 728; CHECK-NEXT: vfpclasspd $2, %zmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x48,0x66,0xc8,0x02] 729; CHECK-NEXT: vfpclasspd $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x66,0xc0,0x04] 730; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 731; CHECK-NEXT: # kill: def $al killed $al killed $eax 732; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 733; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 734 %res = call <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double> %x0, i32 4) 735 %res1 = call <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double> %x0, i32 2) 736 %1 = and <8 x i1> %res1, %res 737 %2 = bitcast <8 x i1> %1 to i8 738 ret i8 %2 739} 740declare <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float>, i32) 741 742define i16@test_int_x86_avx512_fpclass_ps_512(<16 x float> %x0) { 743; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_512: 744; CHECK: # %bb.0: 745; CHECK-NEXT: vfpclassps $2, %zmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x48,0x66,0xc8,0x02] 746; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x66,0xc0,0x04] 747; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 748; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 749; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 750; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 751 %res = call <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float> %x0, i32 4) 752 %res1 = call <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float> %x0, i32 2) 753 %1 = and <16 x i1> %res1, %res 754 %2 = bitcast <16 x i1> %1 to i16 755 ret i16 %2 756} 757 758declare i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double>, i32, i8) 759 760define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0) { 761; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_sd: 762; CHECK: # %bb.0: 763; CHECK-NEXT: vfpclasssd $4, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x67,0xc8,0x04] 764; CHECK-NEXT: vfpclasssd $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x67,0xc0,0x02] 765; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 766; CHECK-NEXT: # kill: def $al killed $al killed $eax 767; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 768 %res = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 2, i8 -1) 769 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 4, i8 %res) 770 ret i8 %res1 771} 772 773define i8 @test_int_x86_avx512_mask_fpclass_sd_load(<2 x double>* %x0ptr) { 774; X86-LABEL: test_int_x86_avx512_mask_fpclass_sd_load: 775; X86: # %bb.0: 776; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 777; X86-NEXT: vfpclasssd $4, (%eax), %k0 # encoding: [0x62,0xf3,0xfd,0x08,0x67,0x00,0x04] 778; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 779; X86-NEXT: # kill: def $al killed $al killed $eax 780; X86-NEXT: retl # encoding: [0xc3] 781; 782; X64-LABEL: test_int_x86_avx512_mask_fpclass_sd_load: 783; X64: # %bb.0: 784; X64-NEXT: vfpclasssd $4, (%rdi), %k0 # encoding: [0x62,0xf3,0xfd,0x08,0x67,0x07,0x04] 785; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 786; X64-NEXT: # kill: def $al killed $al killed $eax 787; X64-NEXT: retq # encoding: [0xc3] 788 %x0 = load <2 x double>, <2 x double>* %x0ptr 789 %res = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 4, i8 -1) 790 ret i8 %res 791} 792 793declare i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float>, i32, i8) 794 795define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0) { 796; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ss: 797; CHECK: # %bb.0: 798; CHECK-NEXT: vfpclassss $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x67,0xc8,0x04] 799; CHECK-NEXT: vfpclassss $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x67,0xc0,0x02] 800; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 801; CHECK-NEXT: # kill: def $al killed $al killed $eax 802; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 803 %res = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 2, i8 -1) 804 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 %res) 805 ret i8 %res1 806} 807 808define i8 @test_int_x86_avx512_mask_fpclass_ss_load(<4 x float>* %x0ptr, i8 %x1) { 809; X86-LABEL: test_int_x86_avx512_mask_fpclass_ss_load: 810; X86: # %bb.0: 811; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 812; X86-NEXT: vfpclassss $4, (%eax), %k0 # encoding: [0x62,0xf3,0x7d,0x08,0x67,0x00,0x04] 813; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 814; X86-NEXT: # kill: def $al killed $al killed $eax 815; X86-NEXT: retl # encoding: [0xc3] 816; 817; X64-LABEL: test_int_x86_avx512_mask_fpclass_ss_load: 818; X64: # %bb.0: 819; X64-NEXT: vfpclassss $4, (%rdi), %k0 # encoding: [0x62,0xf3,0x7d,0x08,0x67,0x07,0x04] 820; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 821; X64-NEXT: # kill: def $al killed $al killed $eax 822; X64-NEXT: retq # encoding: [0xc3] 823 %x0 = load <4 x float>, <4 x float>* %x0ptr 824 %res = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 -1) 825 ret i8 %res 826} 827