1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512f --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512f-builtins.c 6 7define <8 x double> @test_mm512_fmadd_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { 8; CHECK-LABEL: test_mm512_fmadd_round_pd: 9; CHECK: ## %bb.0: ## %entry 10; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xa8,0xc2] 11; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 12entry: 13 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8) 14 ret <8 x double> %0 15} 16 17declare <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i32) #1 18 19define <8 x double> @test_mm512_mask_fmadd_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { 20; X86-LABEL: test_mm512_mask_fmadd_round_pd: 21; X86: ## %bb.0: ## %entry 22; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 23; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 24; X86-NEXT: vfmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x19,0x98,0xc1] 25; X86-NEXT: retl ## encoding: [0xc3] 26; 27; X64-LABEL: test_mm512_mask_fmadd_round_pd: 28; X64: ## %bb.0: ## %entry 29; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 30; X64-NEXT: vfmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x19,0x98,0xc1] 31; X64-NEXT: retq ## encoding: [0xc3] 32entry: 33 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8) 34 %1 = bitcast i8 %__U to <8 x i1> 35 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A 36 ret <8 x double> %2 37} 38 39define <8 x double> @test_mm512_mask3_fmadd_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { 40; X86-LABEL: test_mm512_mask3_fmadd_round_pd: 41; X86: ## %bb.0: ## %entry 42; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 43; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 44; X86-NEXT: vfmadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0xb8,0xd1] 45; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 46; X86-NEXT: retl ## encoding: [0xc3] 47; 48; X64-LABEL: test_mm512_mask3_fmadd_round_pd: 49; X64: ## %bb.0: ## %entry 50; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 51; X64-NEXT: vfmadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0xb8,0xd1] 52; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 53; X64-NEXT: retq ## encoding: [0xc3] 54entry: 55 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8) 56 %1 = bitcast i8 %__U to <8 x i1> 57 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C 58 ret <8 x double> %2 59} 60 61define <8 x double> @test_mm512_maskz_fmadd_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { 62; X86-LABEL: test_mm512_maskz_fmadd_round_pd: 63; X86: ## %bb.0: ## %entry 64; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 65; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 66; X86-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x99,0xa8,0xc2] 67; X86-NEXT: retl ## encoding: [0xc3] 68; 69; X64-LABEL: test_mm512_maskz_fmadd_round_pd: 70; X64: ## %bb.0: ## %entry 71; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 72; X64-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x99,0xa8,0xc2] 73; X64-NEXT: retq ## encoding: [0xc3] 74entry: 75 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8) 76 %1 = bitcast i8 %__U to <8 x i1> 77 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer 78 ret <8 x double> %2 79} 80 81define <8 x double> @test_mm512_fmsub_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { 82; CHECK-LABEL: test_mm512_fmsub_round_pd: 83; CHECK: ## %bb.0: ## %entry 84; CHECK-NEXT: vfmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xaa,0xc2] 85; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 86entry: 87 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 88 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8) 89 ret <8 x double> %0 90} 91 92define <8 x double> @test_mm512_mask_fmsub_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { 93; X86-LABEL: test_mm512_mask_fmsub_round_pd: 94; X86: ## %bb.0: ## %entry 95; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 96; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 97; X86-NEXT: vfmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x19,0x9a,0xc1] 98; X86-NEXT: retl ## encoding: [0xc3] 99; 100; X64-LABEL: test_mm512_mask_fmsub_round_pd: 101; X64: ## %bb.0: ## %entry 102; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 103; X64-NEXT: vfmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x19,0x9a,0xc1] 104; X64-NEXT: retq ## encoding: [0xc3] 105entry: 106 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 107 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8) 108 %1 = bitcast i8 %__U to <8 x i1> 109 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A 110 ret <8 x double> %2 111} 112 113define <8 x double> @test_mm512_maskz_fmsub_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { 114; X86-LABEL: test_mm512_maskz_fmsub_round_pd: 115; X86: ## %bb.0: ## %entry 116; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 117; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 118; X86-NEXT: vfmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x99,0xaa,0xc2] 119; X86-NEXT: retl ## encoding: [0xc3] 120; 121; X64-LABEL: test_mm512_maskz_fmsub_round_pd: 122; X64: ## %bb.0: ## %entry 123; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 124; X64-NEXT: vfmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x99,0xaa,0xc2] 125; X64-NEXT: retq ## encoding: [0xc3] 126entry: 127 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 128 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8) 129 %1 = bitcast i8 %__U to <8 x i1> 130 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer 131 ret <8 x double> %2 132} 133 134define <8 x double> @test_mm512_fnmadd_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { 135; CHECK-LABEL: test_mm512_fnmadd_round_pd: 136; CHECK: ## %bb.0: ## %entry 137; CHECK-NEXT: vfnmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xac,0xc2] 138; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 139entry: 140 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__A 141 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %sub, <8 x double> %__B, <8 x double> %__C, i32 8) 142 ret <8 x double> %0 143} 144 145define <8 x double> @test_mm512_mask3_fnmadd_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { 146; X86-LABEL: test_mm512_mask3_fnmadd_round_pd: 147; X86: ## %bb.0: ## %entry 148; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 149; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 150; X86-NEXT: vfnmadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0xbc,0xd1] 151; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 152; X86-NEXT: retl ## encoding: [0xc3] 153; 154; X64-LABEL: test_mm512_mask3_fnmadd_round_pd: 155; X64: ## %bb.0: ## %entry 156; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 157; X64-NEXT: vfnmadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0xbc,0xd1] 158; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 159; X64-NEXT: retq ## encoding: [0xc3] 160entry: 161 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__A 162 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %sub, <8 x double> %__B, <8 x double> %__C, i32 8) 163 %1 = bitcast i8 %__U to <8 x i1> 164 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C 165 ret <8 x double> %2 166} 167 168define <8 x double> @test_mm512_maskz_fnmadd_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { 169; X86-LABEL: test_mm512_maskz_fnmadd_round_pd: 170; X86: ## %bb.0: ## %entry 171; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 172; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 173; X86-NEXT: vfnmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x99,0xac,0xc2] 174; X86-NEXT: retl ## encoding: [0xc3] 175; 176; X64-LABEL: test_mm512_maskz_fnmadd_round_pd: 177; X64: ## %bb.0: ## %entry 178; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 179; X64-NEXT: vfnmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x99,0xac,0xc2] 180; X64-NEXT: retq ## encoding: [0xc3] 181entry: 182 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__A 183 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %sub, <8 x double> %__B, <8 x double> %__C, i32 8) 184 %1 = bitcast i8 %__U to <8 x i1> 185 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer 186 ret <8 x double> %2 187} 188 189define <8 x double> @test_mm512_fnmsub_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { 190; CHECK-LABEL: test_mm512_fnmsub_round_pd: 191; CHECK: ## %bb.0: ## %entry 192; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xae,0xc2] 193; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 194entry: 195 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__A 196 %sub1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 197 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %sub, <8 x double> %__B, <8 x double> %sub1, i32 8) 198 ret <8 x double> %0 199} 200 201define <8 x double> @test_mm512_maskz_fnmsub_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { 202; X86-LABEL: test_mm512_maskz_fnmsub_round_pd: 203; X86: ## %bb.0: ## %entry 204; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 205; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 206; X86-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x99,0xae,0xc2] 207; X86-NEXT: retl ## encoding: [0xc3] 208; 209; X64-LABEL: test_mm512_maskz_fnmsub_round_pd: 210; X64: ## %bb.0: ## %entry 211; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 212; X64-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x99,0xae,0xc2] 213; X64-NEXT: retq ## encoding: [0xc3] 214entry: 215 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__A 216 %sub1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 217 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %sub, <8 x double> %__B, <8 x double> %sub1, i32 8) 218 %1 = bitcast i8 %__U to <8 x i1> 219 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer 220 ret <8 x double> %2 221} 222 223define <8 x double> @test_mm512_fmadd_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { 224; CHECK-LABEL: test_mm512_fmadd_pd: 225; CHECK: ## %bb.0: ## %entry 226; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xa8,0xc2] 227; CHECK-NEXT: ## zmm0 = (zmm1 * zmm0) + zmm2 228; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 229entry: 230 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10 231 ret <8 x double> %0 232} 233 234define <8 x double> @test_mm512_mask_fmadd_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { 235; X86-LABEL: test_mm512_mask_fmadd_pd: 236; X86: ## %bb.0: ## %entry 237; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 238; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 239; X86-NEXT: vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x98,0xc1] 240; X86-NEXT: ## zmm0 {%k1} = (zmm0 * zmm1) + zmm2 241; X86-NEXT: retl ## encoding: [0xc3] 242; 243; X64-LABEL: test_mm512_mask_fmadd_pd: 244; X64: ## %bb.0: ## %entry 245; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 246; X64-NEXT: vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x98,0xc1] 247; X64-NEXT: ## zmm0 {%k1} = (zmm0 * zmm1) + zmm2 248; X64-NEXT: retq ## encoding: [0xc3] 249entry: 250 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10 251 %1 = bitcast i8 %__U to <8 x i1> 252 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A 253 ret <8 x double> %2 254} 255 256define <8 x double> @test_mm512_mask3_fmadd_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { 257; X86-LABEL: test_mm512_mask3_fmadd_pd: 258; X86: ## %bb.0: ## %entry 259; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 260; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 261; X86-NEXT: vfmadd231pd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0xb8,0xd1] 262; X86-NEXT: ## zmm2 {%k1} = (zmm0 * zmm1) + zmm2 263; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 264; X86-NEXT: retl ## encoding: [0xc3] 265; 266; X64-LABEL: test_mm512_mask3_fmadd_pd: 267; X64: ## %bb.0: ## %entry 268; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 269; X64-NEXT: vfmadd231pd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0xb8,0xd1] 270; X64-NEXT: ## zmm2 {%k1} = (zmm0 * zmm1) + zmm2 271; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 272; X64-NEXT: retq ## encoding: [0xc3] 273entry: 274 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10 275 %1 = bitcast i8 %__U to <8 x i1> 276 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C 277 ret <8 x double> %2 278} 279 280define <8 x double> @test_mm512_maskz_fmadd_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { 281; X86-LABEL: test_mm512_maskz_fmadd_pd: 282; X86: ## %bb.0: ## %entry 283; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 284; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 285; X86-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0xa8,0xc2] 286; X86-NEXT: ## zmm0 {%k1} {z} = (zmm1 * zmm0) + zmm2 287; X86-NEXT: retl ## encoding: [0xc3] 288; 289; X64-LABEL: test_mm512_maskz_fmadd_pd: 290; X64: ## %bb.0: ## %entry 291; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 292; X64-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0xa8,0xc2] 293; X64-NEXT: ## zmm0 {%k1} {z} = (zmm1 * zmm0) + zmm2 294; X64-NEXT: retq ## encoding: [0xc3] 295entry: 296 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10 297 %1 = bitcast i8 %__U to <8 x i1> 298 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer 299 ret <8 x double> %2 300} 301 302define <8 x double> @test_mm512_fmsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { 303; CHECK-LABEL: test_mm512_fmsub_pd: 304; CHECK: ## %bb.0: ## %entry 305; CHECK-NEXT: vfmsub213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xaa,0xc2] 306; CHECK-NEXT: ## zmm0 = (zmm1 * zmm0) - zmm2 307; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 308entry: 309 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 310 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #10 311 ret <8 x double> %0 312} 313 314define <8 x double> @test_mm512_mask_fmsub_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { 315; X86-LABEL: test_mm512_mask_fmsub_pd: 316; X86: ## %bb.0: ## %entry 317; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 318; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 319; X86-NEXT: vfmsub132pd %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x9a,0xc1] 320; X86-NEXT: ## zmm0 {%k1} = (zmm0 * zmm1) - zmm2 321; X86-NEXT: retl ## encoding: [0xc3] 322; 323; X64-LABEL: test_mm512_mask_fmsub_pd: 324; X64: ## %bb.0: ## %entry 325; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 326; X64-NEXT: vfmsub132pd %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x9a,0xc1] 327; X64-NEXT: ## zmm0 {%k1} = (zmm0 * zmm1) - zmm2 328; X64-NEXT: retq ## encoding: [0xc3] 329entry: 330 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 331 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #10 332 %1 = bitcast i8 %__U to <8 x i1> 333 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A 334 ret <8 x double> %2 335} 336 337define <8 x double> @test_mm512_maskz_fmsub_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { 338; X86-LABEL: test_mm512_maskz_fmsub_pd: 339; X86: ## %bb.0: ## %entry 340; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 341; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 342; X86-NEXT: vfmsub213pd %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0xaa,0xc2] 343; X86-NEXT: ## zmm0 {%k1} {z} = (zmm1 * zmm0) - zmm2 344; X86-NEXT: retl ## encoding: [0xc3] 345; 346; X64-LABEL: test_mm512_maskz_fmsub_pd: 347; X64: ## %bb.0: ## %entry 348; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 349; X64-NEXT: vfmsub213pd %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0xaa,0xc2] 350; X64-NEXT: ## zmm0 {%k1} {z} = (zmm1 * zmm0) - zmm2 351; X64-NEXT: retq ## encoding: [0xc3] 352entry: 353 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 354 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #10 355 %1 = bitcast i8 %__U to <8 x i1> 356 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer 357 ret <8 x double> %2 358} 359 360define <8 x double> @test_mm512_fnmadd_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { 361; CHECK-LABEL: test_mm512_fnmadd_pd: 362; CHECK: ## %bb.0: ## %entry 363; CHECK-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xac,0xc2] 364; CHECK-NEXT: ## zmm0 = -(zmm1 * zmm0) + zmm2 365; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 366entry: 367 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__A 368 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %sub.i, <8 x double> %__B, <8 x double> %__C) #10 369 ret <8 x double> %0 370} 371 372define <8 x double> @test_mm512_mask3_fnmadd_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { 373; X86-LABEL: test_mm512_mask3_fnmadd_pd: 374; X86: ## %bb.0: ## %entry 375; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 376; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 377; X86-NEXT: vfnmadd231pd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0xbc,0xd1] 378; X86-NEXT: ## zmm2 {%k1} = -(zmm0 * zmm1) + zmm2 379; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 380; X86-NEXT: retl ## encoding: [0xc3] 381; 382; X64-LABEL: test_mm512_mask3_fnmadd_pd: 383; X64: ## %bb.0: ## %entry 384; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 385; X64-NEXT: vfnmadd231pd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0xbc,0xd1] 386; X64-NEXT: ## zmm2 {%k1} = -(zmm0 * zmm1) + zmm2 387; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 388; X64-NEXT: retq ## encoding: [0xc3] 389entry: 390 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__A 391 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %sub.i, <8 x double> %__B, <8 x double> %__C) #10 392 %1 = bitcast i8 %__U to <8 x i1> 393 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C 394 ret <8 x double> %2 395} 396 397define <8 x double> @test_mm512_maskz_fnmadd_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { 398; X86-LABEL: test_mm512_maskz_fnmadd_pd: 399; X86: ## %bb.0: ## %entry 400; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 401; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 402; X86-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0xac,0xc2] 403; X86-NEXT: ## zmm0 {%k1} {z} = -(zmm1 * zmm0) + zmm2 404; X86-NEXT: retl ## encoding: [0xc3] 405; 406; X64-LABEL: test_mm512_maskz_fnmadd_pd: 407; X64: ## %bb.0: ## %entry 408; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 409; X64-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0xac,0xc2] 410; X64-NEXT: ## zmm0 {%k1} {z} = -(zmm1 * zmm0) + zmm2 411; X64-NEXT: retq ## encoding: [0xc3] 412entry: 413 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__A 414 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %sub.i, <8 x double> %__B, <8 x double> %__C) #10 415 %1 = bitcast i8 %__U to <8 x i1> 416 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer 417 ret <8 x double> %2 418} 419 420define <8 x double> @test_mm512_fnmsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { 421; CHECK-LABEL: test_mm512_fnmsub_pd: 422; CHECK: ## %bb.0: ## %entry 423; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xae,0xc2] 424; CHECK-NEXT: ## zmm0 = -(zmm1 * zmm0) - zmm2 425; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 426entry: 427 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__A 428 %sub1.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 429 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %sub.i, <8 x double> %__B, <8 x double> %sub1.i) #10 430 ret <8 x double> %0 431} 432 433define <8 x double> @test_mm512_maskz_fnmsub_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { 434; X86-LABEL: test_mm512_maskz_fnmsub_pd: 435; X86: ## %bb.0: ## %entry 436; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 437; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 438; X86-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0xae,0xc2] 439; X86-NEXT: ## zmm0 {%k1} {z} = -(zmm1 * zmm0) - zmm2 440; X86-NEXT: retl ## encoding: [0xc3] 441; 442; X64-LABEL: test_mm512_maskz_fnmsub_pd: 443; X64: ## %bb.0: ## %entry 444; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 445; X64-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0xae,0xc2] 446; X64-NEXT: ## zmm0 {%k1} {z} = -(zmm1 * zmm0) - zmm2 447; X64-NEXT: retq ## encoding: [0xc3] 448entry: 449 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__A 450 %sub1.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 451 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %sub.i, <8 x double> %__B, <8 x double> %sub1.i) #10 452 %1 = bitcast i8 %__U to <8 x i1> 453 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer 454 ret <8 x double> %2 455} 456 457define <16 x float> @test_mm512_fmadd_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { 458; CHECK-LABEL: test_mm512_fmadd_round_ps: 459; CHECK: ## %bb.0: ## %entry 460; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0xc2] 461; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 462entry: 463 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8) 464 ret <16 x float> %0 465} 466 467declare <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i32) #1 468 469define <16 x float> @test_mm512_mask_fmadd_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) { 470; X86-LABEL: test_mm512_mask_fmadd_round_ps: 471; X86: ## %bb.0: ## %entry 472; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 473; X86-NEXT: vfmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x19,0x98,0xc1] 474; X86-NEXT: retl ## encoding: [0xc3] 475; 476; X64-LABEL: test_mm512_mask_fmadd_round_ps: 477; X64: ## %bb.0: ## %entry 478; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 479; X64-NEXT: vfmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x19,0x98,0xc1] 480; X64-NEXT: retq ## encoding: [0xc3] 481entry: 482 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8) 483 %1 = bitcast i16 %__U to <16 x i1> 484 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A 485 ret <16 x float> %2 486} 487 488define <16 x float> @test_mm512_mask3_fmadd_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) { 489; X86-LABEL: test_mm512_mask3_fmadd_round_ps: 490; X86: ## %bb.0: ## %entry 491; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 492; X86-NEXT: vfmadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0xb8,0xd1] 493; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 494; X86-NEXT: retl ## encoding: [0xc3] 495; 496; X64-LABEL: test_mm512_mask3_fmadd_round_ps: 497; X64: ## %bb.0: ## %entry 498; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 499; X64-NEXT: vfmadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0xb8,0xd1] 500; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 501; X64-NEXT: retq ## encoding: [0xc3] 502entry: 503 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8) 504 %1 = bitcast i16 %__U to <16 x i1> 505 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C 506 ret <16 x float> %2 507} 508 509define <16 x float> @test_mm512_maskz_fmadd_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { 510; X86-LABEL: test_mm512_maskz_fmadd_round_ps: 511; X86: ## %bb.0: ## %entry 512; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 513; X86-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x99,0xa8,0xc2] 514; X86-NEXT: retl ## encoding: [0xc3] 515; 516; X64-LABEL: test_mm512_maskz_fmadd_round_ps: 517; X64: ## %bb.0: ## %entry 518; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 519; X64-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x99,0xa8,0xc2] 520; X64-NEXT: retq ## encoding: [0xc3] 521entry: 522 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8) 523 %1 = bitcast i16 %__U to <16 x i1> 524 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer 525 ret <16 x float> %2 526} 527 528define <16 x float> @test_mm512_fmsub_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { 529; CHECK-LABEL: test_mm512_fmsub_round_ps: 530; CHECK: ## %bb.0: ## %entry 531; CHECK-NEXT: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xaa,0xc2] 532; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 533entry: 534 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 535 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8) 536 ret <16 x float> %0 537} 538 539define <16 x float> @test_mm512_mask_fmsub_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) { 540; X86-LABEL: test_mm512_mask_fmsub_round_ps: 541; X86: ## %bb.0: ## %entry 542; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 543; X86-NEXT: vfmsub132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x19,0x9a,0xc1] 544; X86-NEXT: retl ## encoding: [0xc3] 545; 546; X64-LABEL: test_mm512_mask_fmsub_round_ps: 547; X64: ## %bb.0: ## %entry 548; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 549; X64-NEXT: vfmsub132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x19,0x9a,0xc1] 550; X64-NEXT: retq ## encoding: [0xc3] 551entry: 552 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 553 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8) 554 %1 = bitcast i16 %__U to <16 x i1> 555 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A 556 ret <16 x float> %2 557} 558 559define <16 x float> @test_mm512_maskz_fmsub_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { 560; X86-LABEL: test_mm512_maskz_fmsub_round_ps: 561; X86: ## %bb.0: ## %entry 562; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 563; X86-NEXT: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x99,0xaa,0xc2] 564; X86-NEXT: retl ## encoding: [0xc3] 565; 566; X64-LABEL: test_mm512_maskz_fmsub_round_ps: 567; X64: ## %bb.0: ## %entry 568; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 569; X64-NEXT: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x99,0xaa,0xc2] 570; X64-NEXT: retq ## encoding: [0xc3] 571entry: 572 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 573 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8) 574 %1 = bitcast i16 %__U to <16 x i1> 575 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer 576 ret <16 x float> %2 577} 578 579define <16 x float> @test_mm512_fnmadd_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { 580; CHECK-LABEL: test_mm512_fnmadd_round_ps: 581; CHECK: ## %bb.0: ## %entry 582; CHECK-NEXT: vfnmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xac,0xc2] 583; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 584entry: 585 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A 586 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub, <16 x float> %__B, <16 x float> %__C, i32 8) 587 ret <16 x float> %0 588} 589 590define <16 x float> @test_mm512_mask3_fnmadd_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) { 591; X86-LABEL: test_mm512_mask3_fnmadd_round_ps: 592; X86: ## %bb.0: ## %entry 593; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 594; X86-NEXT: vfnmadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0xbc,0xd1] 595; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 596; X86-NEXT: retl ## encoding: [0xc3] 597; 598; X64-LABEL: test_mm512_mask3_fnmadd_round_ps: 599; X64: ## %bb.0: ## %entry 600; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 601; X64-NEXT: vfnmadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0xbc,0xd1] 602; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 603; X64-NEXT: retq ## encoding: [0xc3] 604entry: 605 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A 606 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub, <16 x float> %__B, <16 x float> %__C, i32 8) 607 %1 = bitcast i16 %__U to <16 x i1> 608 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C 609 ret <16 x float> %2 610} 611 612define <16 x float> @test_mm512_maskz_fnmadd_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { 613; X86-LABEL: test_mm512_maskz_fnmadd_round_ps: 614; X86: ## %bb.0: ## %entry 615; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 616; X86-NEXT: vfnmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x99,0xac,0xc2] 617; X86-NEXT: retl ## encoding: [0xc3] 618; 619; X64-LABEL: test_mm512_maskz_fnmadd_round_ps: 620; X64: ## %bb.0: ## %entry 621; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 622; X64-NEXT: vfnmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x99,0xac,0xc2] 623; X64-NEXT: retq ## encoding: [0xc3] 624entry: 625 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A 626 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub, <16 x float> %__B, <16 x float> %__C, i32 8) 627 %1 = bitcast i16 %__U to <16 x i1> 628 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer 629 ret <16 x float> %2 630} 631 632define <16 x float> @test_mm512_fnmsub_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { 633; CHECK-LABEL: test_mm512_fnmsub_round_ps: 634; CHECK: ## %bb.0: ## %entry 635; CHECK-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xae,0xc2] 636; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 637entry: 638 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A 639 %sub1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 640 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub, <16 x float> %__B, <16 x float> %sub1, i32 8) 641 ret <16 x float> %0 642} 643 644define <16 x float> @test_mm512_maskz_fnmsub_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { 645; X86-LABEL: test_mm512_maskz_fnmsub_round_ps: 646; X86: ## %bb.0: ## %entry 647; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 648; X86-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x99,0xae,0xc2] 649; X86-NEXT: retl ## encoding: [0xc3] 650; 651; X64-LABEL: test_mm512_maskz_fnmsub_round_ps: 652; X64: ## %bb.0: ## %entry 653; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 654; X64-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x99,0xae,0xc2] 655; X64-NEXT: retq ## encoding: [0xc3] 656entry: 657 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A 658 %sub1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 659 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub, <16 x float> %__B, <16 x float> %sub1, i32 8) 660 %1 = bitcast i16 %__U to <16 x i1> 661 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer 662 ret <16 x float> %2 663} 664 665define <16 x float> @test_mm512_fmadd_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { 666; CHECK-LABEL: test_mm512_fmadd_ps: 667; CHECK: ## %bb.0: ## %entry 668; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xa8,0xc2] 669; CHECK-NEXT: ## zmm0 = (zmm1 * zmm0) + zmm2 670; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 671entry: 672 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10 673 ret <16 x float> %0 674} 675 676define <16 x float> @test_mm512_mask_fmadd_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) { 677; X86-LABEL: test_mm512_mask_fmadd_ps: 678; X86: ## %bb.0: ## %entry 679; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 680; X86-NEXT: vfmadd132ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x98,0xc1] 681; X86-NEXT: ## zmm0 {%k1} = (zmm0 * zmm1) + zmm2 682; X86-NEXT: retl ## encoding: [0xc3] 683; 684; X64-LABEL: test_mm512_mask_fmadd_ps: 685; X64: ## %bb.0: ## %entry 686; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 687; X64-NEXT: vfmadd132ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x98,0xc1] 688; X64-NEXT: ## zmm0 {%k1} = (zmm0 * zmm1) + zmm2 689; X64-NEXT: retq ## encoding: [0xc3] 690entry: 691 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10 692 %1 = bitcast i16 %__U to <16 x i1> 693 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A 694 ret <16 x float> %2 695} 696 697define <16 x float> @test_mm512_mask3_fmadd_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) { 698; X86-LABEL: test_mm512_mask3_fmadd_ps: 699; X86: ## %bb.0: ## %entry 700; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 701; X86-NEXT: vfmadd231ps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0xb8,0xd1] 702; X86-NEXT: ## zmm2 {%k1} = (zmm0 * zmm1) + zmm2 703; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 704; X86-NEXT: retl ## encoding: [0xc3] 705; 706; X64-LABEL: test_mm512_mask3_fmadd_ps: 707; X64: ## %bb.0: ## %entry 708; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 709; X64-NEXT: vfmadd231ps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0xb8,0xd1] 710; X64-NEXT: ## zmm2 {%k1} = (zmm0 * zmm1) + zmm2 711; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 712; X64-NEXT: retq ## encoding: [0xc3] 713entry: 714 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10 715 %1 = bitcast i16 %__U to <16 x i1> 716 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C 717 ret <16 x float> %2 718} 719 720define <16 x float> @test_mm512_maskz_fmadd_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { 721; X86-LABEL: test_mm512_maskz_fmadd_ps: 722; X86: ## %bb.0: ## %entry 723; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 724; X86-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0xa8,0xc2] 725; X86-NEXT: ## zmm0 {%k1} {z} = (zmm1 * zmm0) + zmm2 726; X86-NEXT: retl ## encoding: [0xc3] 727; 728; X64-LABEL: test_mm512_maskz_fmadd_ps: 729; X64: ## %bb.0: ## %entry 730; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 731; X64-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0xa8,0xc2] 732; X64-NEXT: ## zmm0 {%k1} {z} = (zmm1 * zmm0) + zmm2 733; X64-NEXT: retq ## encoding: [0xc3] 734entry: 735 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10 736 %1 = bitcast i16 %__U to <16 x i1> 737 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer 738 ret <16 x float> %2 739} 740 741define <16 x float> @test_mm512_fmsub_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { 742; CHECK-LABEL: test_mm512_fmsub_ps: 743; CHECK: ## %bb.0: ## %entry 744; CHECK-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xaa,0xc2] 745; CHECK-NEXT: ## zmm0 = (zmm1 * zmm0) - zmm2 746; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 747entry: 748 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 749 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #10 750 ret <16 x float> %0 751} 752 753define <16 x float> @test_mm512_mask_fmsub_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) { 754; X86-LABEL: test_mm512_mask_fmsub_ps: 755; X86: ## %bb.0: ## %entry 756; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 757; X86-NEXT: vfmsub132ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x9a,0xc1] 758; X86-NEXT: ## zmm0 {%k1} = (zmm0 * zmm1) - zmm2 759; X86-NEXT: retl ## encoding: [0xc3] 760; 761; X64-LABEL: test_mm512_mask_fmsub_ps: 762; X64: ## %bb.0: ## %entry 763; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 764; X64-NEXT: vfmsub132ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x9a,0xc1] 765; X64-NEXT: ## zmm0 {%k1} = (zmm0 * zmm1) - zmm2 766; X64-NEXT: retq ## encoding: [0xc3] 767entry: 768 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 769 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #10 770 %1 = bitcast i16 %__U to <16 x i1> 771 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A 772 ret <16 x float> %2 773} 774 775define <16 x float> @test_mm512_maskz_fmsub_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { 776; X86-LABEL: test_mm512_maskz_fmsub_ps: 777; X86: ## %bb.0: ## %entry 778; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 779; X86-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0xaa,0xc2] 780; X86-NEXT: ## zmm0 {%k1} {z} = (zmm1 * zmm0) - zmm2 781; X86-NEXT: retl ## encoding: [0xc3] 782; 783; X64-LABEL: test_mm512_maskz_fmsub_ps: 784; X64: ## %bb.0: ## %entry 785; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 786; X64-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0xaa,0xc2] 787; X64-NEXT: ## zmm0 {%k1} {z} = (zmm1 * zmm0) - zmm2 788; X64-NEXT: retq ## encoding: [0xc3] 789entry: 790 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 791 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #10 792 %1 = bitcast i16 %__U to <16 x i1> 793 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer 794 ret <16 x float> %2 795} 796 797define <16 x float> @test_mm512_fnmadd_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { 798; CHECK-LABEL: test_mm512_fnmadd_ps: 799; CHECK: ## %bb.0: ## %entry 800; CHECK-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xac,0xc2] 801; CHECK-NEXT: ## zmm0 = -(zmm1 * zmm0) + zmm2 802; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 803entry: 804 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A 805 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %sub.i, <16 x float> %__B, <16 x float> %__C) #10 806 ret <16 x float> %0 807} 808 809define <16 x float> @test_mm512_mask3_fnmadd_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) { 810; X86-LABEL: test_mm512_mask3_fnmadd_ps: 811; X86: ## %bb.0: ## %entry 812; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 813; X86-NEXT: vfnmadd231ps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0xbc,0xd1] 814; X86-NEXT: ## zmm2 {%k1} = -(zmm0 * zmm1) + zmm2 815; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 816; X86-NEXT: retl ## encoding: [0xc3] 817; 818; X64-LABEL: test_mm512_mask3_fnmadd_ps: 819; X64: ## %bb.0: ## %entry 820; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 821; X64-NEXT: vfnmadd231ps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0xbc,0xd1] 822; X64-NEXT: ## zmm2 {%k1} = -(zmm0 * zmm1) + zmm2 823; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 824; X64-NEXT: retq ## encoding: [0xc3] 825entry: 826 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A 827 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %sub.i, <16 x float> %__B, <16 x float> %__C) #10 828 %1 = bitcast i16 %__U to <16 x i1> 829 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C 830 ret <16 x float> %2 831} 832 833define <16 x float> @test_mm512_maskz_fnmadd_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { 834; X86-LABEL: test_mm512_maskz_fnmadd_ps: 835; X86: ## %bb.0: ## %entry 836; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 837; X86-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0xac,0xc2] 838; X86-NEXT: ## zmm0 {%k1} {z} = -(zmm1 * zmm0) + zmm2 839; X86-NEXT: retl ## encoding: [0xc3] 840; 841; X64-LABEL: test_mm512_maskz_fnmadd_ps: 842; X64: ## %bb.0: ## %entry 843; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 844; X64-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0xac,0xc2] 845; X64-NEXT: ## zmm0 {%k1} {z} = -(zmm1 * zmm0) + zmm2 846; X64-NEXT: retq ## encoding: [0xc3] 847entry: 848 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A 849 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %sub.i, <16 x float> %__B, <16 x float> %__C) #10 850 %1 = bitcast i16 %__U to <16 x i1> 851 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer 852 ret <16 x float> %2 853} 854 855define <16 x float> @test_mm512_fnmsub_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { 856; CHECK-LABEL: test_mm512_fnmsub_ps: 857; CHECK: ## %bb.0: ## %entry 858; CHECK-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xae,0xc2] 859; CHECK-NEXT: ## zmm0 = -(zmm1 * zmm0) - zmm2 860; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 861entry: 862 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A 863 %sub1.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 864 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %sub.i, <16 x float> %__B, <16 x float> %sub1.i) #10 865 ret <16 x float> %0 866} 867 868define <16 x float> @test_mm512_maskz_fnmsub_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { 869; X86-LABEL: test_mm512_maskz_fnmsub_ps: 870; X86: ## %bb.0: ## %entry 871; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 872; X86-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0xae,0xc2] 873; X86-NEXT: ## zmm0 {%k1} {z} = -(zmm1 * zmm0) - zmm2 874; X86-NEXT: retl ## encoding: [0xc3] 875; 876; X64-LABEL: test_mm512_maskz_fnmsub_ps: 877; X64: ## %bb.0: ## %entry 878; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 879; X64-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0xae,0xc2] 880; X64-NEXT: ## zmm0 {%k1} {z} = -(zmm1 * zmm0) - zmm2 881; X64-NEXT: retq ## encoding: [0xc3] 882entry: 883 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A 884 %sub1.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 885 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %sub.i, <16 x float> %__B, <16 x float> %sub1.i) #10 886 %1 = bitcast i16 %__U to <16 x i1> 887 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer 888 ret <16 x float> %2 889} 890 891define <8 x double> @test_mm512_fmaddsub_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { 892; CHECK-LABEL: test_mm512_fmaddsub_round_pd: 893; CHECK: ## %bb.0: ## %entry 894; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xa6,0xc2] 895; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 896entry: 897 %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8) 898 ret <8 x double> %0 899} 900 901declare <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i32) #1 902 903define <8 x double> @test_mm512_mask_fmaddsub_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { 904; X86-LABEL: test_mm512_mask_fmaddsub_round_pd: 905; X86: ## %bb.0: ## %entry 906; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 907; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 908; X86-NEXT: vfmaddsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x19,0x96,0xc1] 909; X86-NEXT: retl ## encoding: [0xc3] 910; 911; X64-LABEL: test_mm512_mask_fmaddsub_round_pd: 912; X64: ## %bb.0: ## %entry 913; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 914; X64-NEXT: vfmaddsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x19,0x96,0xc1] 915; X64-NEXT: retq ## encoding: [0xc3] 916entry: 917 %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8) 918 %1 = bitcast i8 %__U to <8 x i1> 919 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A 920 ret <8 x double> %2 921} 922 923define <8 x double> @test_mm512_mask3_fmaddsub_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { 924; X86-LABEL: test_mm512_mask3_fmaddsub_round_pd: 925; X86: ## %bb.0: ## %entry 926; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 927; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 928; X86-NEXT: vfmaddsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0xb6,0xd1] 929; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 930; X86-NEXT: retl ## encoding: [0xc3] 931; 932; X64-LABEL: test_mm512_mask3_fmaddsub_round_pd: 933; X64: ## %bb.0: ## %entry 934; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 935; X64-NEXT: vfmaddsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0xb6,0xd1] 936; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 937; X64-NEXT: retq ## encoding: [0xc3] 938entry: 939 %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8) 940 %1 = bitcast i8 %__U to <8 x i1> 941 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C 942 ret <8 x double> %2 943} 944 945define <8 x double> @test_mm512_maskz_fmaddsub_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { 946; X86-LABEL: test_mm512_maskz_fmaddsub_round_pd: 947; X86: ## %bb.0: ## %entry 948; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 949; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 950; X86-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x99,0xa6,0xc2] 951; X86-NEXT: retl ## encoding: [0xc3] 952; 953; X64-LABEL: test_mm512_maskz_fmaddsub_round_pd: 954; X64: ## %bb.0: ## %entry 955; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 956; X64-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x99,0xa6,0xc2] 957; X64-NEXT: retq ## encoding: [0xc3] 958entry: 959 %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8) 960 %1 = bitcast i8 %__U to <8 x i1> 961 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer 962 ret <8 x double> %2 963} 964 965define <8 x double> @test_mm512_fmsubadd_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { 966; CHECK-LABEL: test_mm512_fmsubadd_round_pd: 967; CHECK: ## %bb.0: ## %entry 968; CHECK-NEXT: vfmsubadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xa7,0xc2] 969; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 970entry: 971 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 972 %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8) 973 ret <8 x double> %0 974} 975 976define <8 x double> @test_mm512_mask_fmsubadd_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { 977; X86-LABEL: test_mm512_mask_fmsubadd_round_pd: 978; X86: ## %bb.0: ## %entry 979; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 980; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 981; X86-NEXT: vfmsubadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x19,0x97,0xc1] 982; X86-NEXT: retl ## encoding: [0xc3] 983; 984; X64-LABEL: test_mm512_mask_fmsubadd_round_pd: 985; X64: ## %bb.0: ## %entry 986; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 987; X64-NEXT: vfmsubadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x19,0x97,0xc1] 988; X64-NEXT: retq ## encoding: [0xc3] 989entry: 990 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 991 %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8) 992 %1 = bitcast i8 %__U to <8 x i1> 993 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A 994 ret <8 x double> %2 995} 996 997define <8 x double> @test_mm512_maskz_fmsubadd_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { 998; X86-LABEL: test_mm512_maskz_fmsubadd_round_pd: 999; X86: ## %bb.0: ## %entry 1000; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1001; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1002; X86-NEXT: vfmsubadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x99,0xa7,0xc2] 1003; X86-NEXT: retl ## encoding: [0xc3] 1004; 1005; X64-LABEL: test_mm512_maskz_fmsubadd_round_pd: 1006; X64: ## %bb.0: ## %entry 1007; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1008; X64-NEXT: vfmsubadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x99,0xa7,0xc2] 1009; X64-NEXT: retq ## encoding: [0xc3] 1010entry: 1011 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 1012 %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8) 1013 %1 = bitcast i8 %__U to <8 x i1> 1014 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer 1015 ret <8 x double> %2 1016} 1017 1018define <8 x double> @test_mm512_fmaddsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { 1019; CHECK-LABEL: test_mm512_fmaddsub_pd: 1020; CHECK: ## %bb.0: ## %entry 1021; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xa6,0xc2] 1022; CHECK-NEXT: ## zmm0 = (zmm1 * zmm0) +/- zmm2 1023; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1024entry: 1025 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10 1026 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 1027 %2 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %1) #10 1028 %3 = shufflevector <8 x double> %2, <8 x double> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 1029 ret <8 x double> %3 1030} 1031 1032define <8 x double> @test_mm512_mask_fmaddsub_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { 1033; X86-LABEL: test_mm512_mask_fmaddsub_pd: 1034; X86: ## %bb.0: ## %entry 1035; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1036; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1037; X86-NEXT: vfmaddsub132pd %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x96,0xc1] 1038; X86-NEXT: ## zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2 1039; X86-NEXT: retl ## encoding: [0xc3] 1040; 1041; X64-LABEL: test_mm512_mask_fmaddsub_pd: 1042; X64: ## %bb.0: ## %entry 1043; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1044; X64-NEXT: vfmaddsub132pd %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x96,0xc1] 1045; X64-NEXT: ## zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2 1046; X64-NEXT: retq ## encoding: [0xc3] 1047entry: 1048 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10 1049 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 1050 %2 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %1) #10 1051 %3 = shufflevector <8 x double> %2, <8 x double> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 1052 %4 = bitcast i8 %__U to <8 x i1> 1053 %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %__A 1054 ret <8 x double> %5 1055} 1056 1057define <8 x double> @test_mm512_mask3_fmaddsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { 1058; X86-LABEL: test_mm512_mask3_fmaddsub_pd: 1059; X86: ## %bb.0: ## %entry 1060; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1061; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1062; X86-NEXT: vfmaddsub231pd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0xb6,0xd1] 1063; X86-NEXT: ## zmm2 {%k1} = (zmm0 * zmm1) +/- zmm2 1064; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 1065; X86-NEXT: retl ## encoding: [0xc3] 1066; 1067; X64-LABEL: test_mm512_mask3_fmaddsub_pd: 1068; X64: ## %bb.0: ## %entry 1069; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1070; X64-NEXT: vfmaddsub231pd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0xb6,0xd1] 1071; X64-NEXT: ## zmm2 {%k1} = (zmm0 * zmm1) +/- zmm2 1072; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 1073; X64-NEXT: retq ## encoding: [0xc3] 1074entry: 1075 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10 1076 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 1077 %2 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %1) #10 1078 %3 = shufflevector <8 x double> %2, <8 x double> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 1079 %4 = bitcast i8 %__U to <8 x i1> 1080 %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %__C 1081 ret <8 x double> %5 1082} 1083 1084define <8 x double> @test_mm512_maskz_fmaddsub_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { 1085; X86-LABEL: test_mm512_maskz_fmaddsub_pd: 1086; X86: ## %bb.0: ## %entry 1087; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1088; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1089; X86-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0xa6,0xc2] 1090; X86-NEXT: ## zmm0 {%k1} {z} = (zmm1 * zmm0) +/- zmm2 1091; X86-NEXT: retl ## encoding: [0xc3] 1092; 1093; X64-LABEL: test_mm512_maskz_fmaddsub_pd: 1094; X64: ## %bb.0: ## %entry 1095; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1096; X64-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0xa6,0xc2] 1097; X64-NEXT: ## zmm0 {%k1} {z} = (zmm1 * zmm0) +/- zmm2 1098; X64-NEXT: retq ## encoding: [0xc3] 1099entry: 1100 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10 1101 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 1102 %2 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %1) #10 1103 %3 = shufflevector <8 x double> %2, <8 x double> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 1104 %4 = bitcast i8 %__U to <8 x i1> 1105 %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> zeroinitializer 1106 ret <8 x double> %5 1107} 1108 1109define <8 x double> @test_mm512_fmsubadd_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { 1110; CHECK-LABEL: test_mm512_fmsubadd_pd: 1111; CHECK: ## %bb.0: ## %entry 1112; CHECK-NEXT: vfmsubadd213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xa7,0xc2] 1113; CHECK-NEXT: ## zmm0 = (zmm1 * zmm0) -/+ zmm2 1114; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1115entry: 1116 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 1117 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #10 1118 %1 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10 1119 %2 = shufflevector <8 x double> %1, <8 x double> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 1120 ret <8 x double> %2 1121} 1122 1123define <8 x double> @test_mm512_mask_fmsubadd_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { 1124; X86-LABEL: test_mm512_mask_fmsubadd_pd: 1125; X86: ## %bb.0: ## %entry 1126; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1127; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1128; X86-NEXT: vfmsubadd132pd %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x97,0xc1] 1129; X86-NEXT: ## zmm0 {%k1} = (zmm0 * zmm1) -/+ zmm2 1130; X86-NEXT: retl ## encoding: [0xc3] 1131; 1132; X64-LABEL: test_mm512_mask_fmsubadd_pd: 1133; X64: ## %bb.0: ## %entry 1134; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1135; X64-NEXT: vfmsubadd132pd %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x97,0xc1] 1136; X64-NEXT: ## zmm0 {%k1} = (zmm0 * zmm1) -/+ zmm2 1137; X64-NEXT: retq ## encoding: [0xc3] 1138entry: 1139 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 1140 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #10 1141 %1 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10 1142 %2 = shufflevector <8 x double> %1, <8 x double> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 1143 %3 = bitcast i8 %__U to <8 x i1> 1144 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__A 1145 ret <8 x double> %4 1146} 1147 1148define <8 x double> @test_mm512_maskz_fmsubadd_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { 1149; X86-LABEL: test_mm512_maskz_fmsubadd_pd: 1150; X86: ## %bb.0: ## %entry 1151; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1152; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1153; X86-NEXT: vfmsubadd213pd %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0xa7,0xc2] 1154; X86-NEXT: ## zmm0 {%k1} {z} = (zmm1 * zmm0) -/+ zmm2 1155; X86-NEXT: retl ## encoding: [0xc3] 1156; 1157; X64-LABEL: test_mm512_maskz_fmsubadd_pd: 1158; X64: ## %bb.0: ## %entry 1159; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1160; X64-NEXT: vfmsubadd213pd %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0xa7,0xc2] 1161; X64-NEXT: ## zmm0 {%k1} {z} = (zmm1 * zmm0) -/+ zmm2 1162; X64-NEXT: retq ## encoding: [0xc3] 1163entry: 1164 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 1165 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #10 1166 %1 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10 1167 %2 = shufflevector <8 x double> %1, <8 x double> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 1168 %3 = bitcast i8 %__U to <8 x i1> 1169 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer 1170 ret <8 x double> %4 1171} 1172 1173define <16 x float> @test_mm512_fmaddsub_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { 1174; CHECK-LABEL: test_mm512_fmaddsub_round_ps: 1175; CHECK: ## %bb.0: ## %entry 1176; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa6,0xc2] 1177; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1178entry: 1179 %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8) 1180 ret <16 x float> %0 1181} 1182 1183declare <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i32) #1 1184 1185define <16 x float> @test_mm512_mask_fmaddsub_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) { 1186; X86-LABEL: test_mm512_mask_fmaddsub_round_ps: 1187; X86: ## %bb.0: ## %entry 1188; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1189; X86-NEXT: vfmaddsub132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x19,0x96,0xc1] 1190; X86-NEXT: retl ## encoding: [0xc3] 1191; 1192; X64-LABEL: test_mm512_mask_fmaddsub_round_ps: 1193; X64: ## %bb.0: ## %entry 1194; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1195; X64-NEXT: vfmaddsub132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x19,0x96,0xc1] 1196; X64-NEXT: retq ## encoding: [0xc3] 1197entry: 1198 %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8) 1199 %1 = bitcast i16 %__U to <16 x i1> 1200 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A 1201 ret <16 x float> %2 1202} 1203 1204define <16 x float> @test_mm512_mask3_fmaddsub_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) { 1205; X86-LABEL: test_mm512_mask3_fmaddsub_round_ps: 1206; X86: ## %bb.0: ## %entry 1207; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1208; X86-NEXT: vfmaddsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0xb6,0xd1] 1209; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1210; X86-NEXT: retl ## encoding: [0xc3] 1211; 1212; X64-LABEL: test_mm512_mask3_fmaddsub_round_ps: 1213; X64: ## %bb.0: ## %entry 1214; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1215; X64-NEXT: vfmaddsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0xb6,0xd1] 1216; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1217; X64-NEXT: retq ## encoding: [0xc3] 1218entry: 1219 %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8) 1220 %1 = bitcast i16 %__U to <16 x i1> 1221 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C 1222 ret <16 x float> %2 1223} 1224 1225define <16 x float> @test_mm512_maskz_fmaddsub_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { 1226; X86-LABEL: test_mm512_maskz_fmaddsub_round_ps: 1227; X86: ## %bb.0: ## %entry 1228; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1229; X86-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x99,0xa6,0xc2] 1230; X86-NEXT: retl ## encoding: [0xc3] 1231; 1232; X64-LABEL: test_mm512_maskz_fmaddsub_round_ps: 1233; X64: ## %bb.0: ## %entry 1234; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1235; X64-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x99,0xa6,0xc2] 1236; X64-NEXT: retq ## encoding: [0xc3] 1237entry: 1238 %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8) 1239 %1 = bitcast i16 %__U to <16 x i1> 1240 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer 1241 ret <16 x float> %2 1242} 1243 1244define <16 x float> @test_mm512_fmsubadd_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { 1245; CHECK-LABEL: test_mm512_fmsubadd_round_ps: 1246; CHECK: ## %bb.0: ## %entry 1247; CHECK-NEXT: vfmsubadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa7,0xc2] 1248; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1249entry: 1250 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 1251 %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8) 1252 ret <16 x float> %0 1253} 1254 1255define <16 x float> @test_mm512_mask_fmsubadd_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) { 1256; X86-LABEL: test_mm512_mask_fmsubadd_round_ps: 1257; X86: ## %bb.0: ## %entry 1258; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1259; X86-NEXT: vfmsubadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x19,0x97,0xc1] 1260; X86-NEXT: retl ## encoding: [0xc3] 1261; 1262; X64-LABEL: test_mm512_mask_fmsubadd_round_ps: 1263; X64: ## %bb.0: ## %entry 1264; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1265; X64-NEXT: vfmsubadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x19,0x97,0xc1] 1266; X64-NEXT: retq ## encoding: [0xc3] 1267entry: 1268 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 1269 %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8) 1270 %1 = bitcast i16 %__U to <16 x i1> 1271 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A 1272 ret <16 x float> %2 1273} 1274 1275define <16 x float> @test_mm512_maskz_fmsubadd_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { 1276; X86-LABEL: test_mm512_maskz_fmsubadd_round_ps: 1277; X86: ## %bb.0: ## %entry 1278; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1279; X86-NEXT: vfmsubadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x99,0xa7,0xc2] 1280; X86-NEXT: retl ## encoding: [0xc3] 1281; 1282; X64-LABEL: test_mm512_maskz_fmsubadd_round_ps: 1283; X64: ## %bb.0: ## %entry 1284; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1285; X64-NEXT: vfmsubadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x99,0xa7,0xc2] 1286; X64-NEXT: retq ## encoding: [0xc3] 1287entry: 1288 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 1289 %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8) 1290 %1 = bitcast i16 %__U to <16 x i1> 1291 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer 1292 ret <16 x float> %2 1293} 1294 1295define <16 x float> @test_mm512_fmaddsub_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { 1296; CHECK-LABEL: test_mm512_fmaddsub_ps: 1297; CHECK: ## %bb.0: ## %entry 1298; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xa6,0xc2] 1299; CHECK-NEXT: ## zmm0 = (zmm1 * zmm0) +/- zmm2 1300; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1301entry: 1302 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10 1303 %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 1304 %2 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %1) #10 1305 %3 = shufflevector <16 x float> %2, <16 x float> %0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 1306 ret <16 x float> %3 1307} 1308 1309define <16 x float> @test_mm512_mask_fmaddsub_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) { 1310; X86-LABEL: test_mm512_mask_fmaddsub_ps: 1311; X86: ## %bb.0: ## %entry 1312; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1313; X86-NEXT: vfmaddsub132ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x96,0xc1] 1314; X86-NEXT: ## zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2 1315; X86-NEXT: retl ## encoding: [0xc3] 1316; 1317; X64-LABEL: test_mm512_mask_fmaddsub_ps: 1318; X64: ## %bb.0: ## %entry 1319; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1320; X64-NEXT: vfmaddsub132ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x96,0xc1] 1321; X64-NEXT: ## zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2 1322; X64-NEXT: retq ## encoding: [0xc3] 1323entry: 1324 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10 1325 %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 1326 %2 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %1) #10 1327 %3 = shufflevector <16 x float> %2, <16 x float> %0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 1328 %4 = bitcast i16 %__U to <16 x i1> 1329 %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> %__A 1330 ret <16 x float> %5 1331} 1332 1333define <16 x float> @test_mm512_mask3_fmaddsub_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) { 1334; X86-LABEL: test_mm512_mask3_fmaddsub_ps: 1335; X86: ## %bb.0: ## %entry 1336; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1337; X86-NEXT: vfmaddsub231ps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0xb6,0xd1] 1338; X86-NEXT: ## zmm2 {%k1} = (zmm0 * zmm1) +/- zmm2 1339; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1340; X86-NEXT: retl ## encoding: [0xc3] 1341; 1342; X64-LABEL: test_mm512_mask3_fmaddsub_ps: 1343; X64: ## %bb.0: ## %entry 1344; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1345; X64-NEXT: vfmaddsub231ps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0xb6,0xd1] 1346; X64-NEXT: ## zmm2 {%k1} = (zmm0 * zmm1) +/- zmm2 1347; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1348; X64-NEXT: retq ## encoding: [0xc3] 1349entry: 1350 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10 1351 %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 1352 %2 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %1) #10 1353 %3 = shufflevector <16 x float> %2, <16 x float> %0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 1354 %4 = bitcast i16 %__U to <16 x i1> 1355 %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> %__C 1356 ret <16 x float> %5 1357} 1358 1359define <16 x float> @test_mm512_maskz_fmaddsub_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { 1360; X86-LABEL: test_mm512_maskz_fmaddsub_ps: 1361; X86: ## %bb.0: ## %entry 1362; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1363; X86-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0xa6,0xc2] 1364; X86-NEXT: ## zmm0 {%k1} {z} = (zmm1 * zmm0) +/- zmm2 1365; X86-NEXT: retl ## encoding: [0xc3] 1366; 1367; X64-LABEL: test_mm512_maskz_fmaddsub_ps: 1368; X64: ## %bb.0: ## %entry 1369; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1370; X64-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0xa6,0xc2] 1371; X64-NEXT: ## zmm0 {%k1} {z} = (zmm1 * zmm0) +/- zmm2 1372; X64-NEXT: retq ## encoding: [0xc3] 1373entry: 1374 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10 1375 %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 1376 %2 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %1) #10 1377 %3 = shufflevector <16 x float> %2, <16 x float> %0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 1378 %4 = bitcast i16 %__U to <16 x i1> 1379 %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> zeroinitializer 1380 ret <16 x float> %5 1381} 1382 1383define <16 x float> @test_mm512_fmsubadd_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { 1384; CHECK-LABEL: test_mm512_fmsubadd_ps: 1385; CHECK: ## %bb.0: ## %entry 1386; CHECK-NEXT: vfmsubadd213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xa7,0xc2] 1387; CHECK-NEXT: ## zmm0 = (zmm1 * zmm0) -/+ zmm2 1388; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1389entry: 1390 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 1391 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #10 1392 %1 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10 1393 %2 = shufflevector <16 x float> %1, <16 x float> %0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 1394 ret <16 x float> %2 1395} 1396 1397define <16 x float> @test_mm512_mask_fmsubadd_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) { 1398; X86-LABEL: test_mm512_mask_fmsubadd_ps: 1399; X86: ## %bb.0: ## %entry 1400; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1401; X86-NEXT: vfmsubadd132ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x97,0xc1] 1402; X86-NEXT: ## zmm0 {%k1} = (zmm0 * zmm1) -/+ zmm2 1403; X86-NEXT: retl ## encoding: [0xc3] 1404; 1405; X64-LABEL: test_mm512_mask_fmsubadd_ps: 1406; X64: ## %bb.0: ## %entry 1407; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1408; X64-NEXT: vfmsubadd132ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x97,0xc1] 1409; X64-NEXT: ## zmm0 {%k1} = (zmm0 * zmm1) -/+ zmm2 1410; X64-NEXT: retq ## encoding: [0xc3] 1411entry: 1412 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 1413 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #10 1414 %1 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10 1415 %2 = shufflevector <16 x float> %1, <16 x float> %0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 1416 %3 = bitcast i16 %__U to <16 x i1> 1417 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__A 1418 ret <16 x float> %4 1419} 1420 1421define <16 x float> @test_mm512_maskz_fmsubadd_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { 1422; X86-LABEL: test_mm512_maskz_fmsubadd_ps: 1423; X86: ## %bb.0: ## %entry 1424; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1425; X86-NEXT: vfmsubadd213ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0xa7,0xc2] 1426; X86-NEXT: ## zmm0 {%k1} {z} = (zmm1 * zmm0) -/+ zmm2 1427; X86-NEXT: retl ## encoding: [0xc3] 1428; 1429; X64-LABEL: test_mm512_maskz_fmsubadd_ps: 1430; X64: ## %bb.0: ## %entry 1431; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1432; X64-NEXT: vfmsubadd213ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0xa7,0xc2] 1433; X64-NEXT: ## zmm0 {%k1} {z} = (zmm1 * zmm0) -/+ zmm2 1434; X64-NEXT: retq ## encoding: [0xc3] 1435entry: 1436 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 1437 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #10 1438 %1 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10 1439 %2 = shufflevector <16 x float> %1, <16 x float> %0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 1440 %3 = bitcast i16 %__U to <16 x i1> 1441 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer 1442 ret <16 x float> %4 1443} 1444 1445define <8 x double> @test_mm512_mask3_fmsub_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { 1446; X86-LABEL: test_mm512_mask3_fmsub_round_pd: 1447; X86: ## %bb.0: ## %entry 1448; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1449; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1450; X86-NEXT: vfmsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0xba,0xd1] 1451; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 1452; X86-NEXT: retl ## encoding: [0xc3] 1453; 1454; X64-LABEL: test_mm512_mask3_fmsub_round_pd: 1455; X64: ## %bb.0: ## %entry 1456; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1457; X64-NEXT: vfmsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0xba,0xd1] 1458; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 1459; X64-NEXT: retq ## encoding: [0xc3] 1460entry: 1461 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 1462 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8) 1463 %1 = bitcast i8 %__U to <8 x i1> 1464 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C 1465 ret <8 x double> %2 1466} 1467 1468define <8 x double> @test_mm512_mask3_fmsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { 1469; X86-LABEL: test_mm512_mask3_fmsub_pd: 1470; X86: ## %bb.0: ## %entry 1471; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1472; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1473; X86-NEXT: vfmsub231pd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0xba,0xd1] 1474; X86-NEXT: ## zmm2 {%k1} = (zmm0 * zmm1) - zmm2 1475; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 1476; X86-NEXT: retl ## encoding: [0xc3] 1477; 1478; X64-LABEL: test_mm512_mask3_fmsub_pd: 1479; X64: ## %bb.0: ## %entry 1480; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1481; X64-NEXT: vfmsub231pd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0xba,0xd1] 1482; X64-NEXT: ## zmm2 {%k1} = (zmm0 * zmm1) - zmm2 1483; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 1484; X64-NEXT: retq ## encoding: [0xc3] 1485entry: 1486 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 1487 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #10 1488 %1 = bitcast i8 %__U to <8 x i1> 1489 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C 1490 ret <8 x double> %2 1491} 1492 1493define <16 x float> @test_mm512_mask3_fmsub_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) { 1494; X86-LABEL: test_mm512_mask3_fmsub_round_ps: 1495; X86: ## %bb.0: ## %entry 1496; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1497; X86-NEXT: vfmsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0xba,0xd1] 1498; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1499; X86-NEXT: retl ## encoding: [0xc3] 1500; 1501; X64-LABEL: test_mm512_mask3_fmsub_round_ps: 1502; X64: ## %bb.0: ## %entry 1503; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1504; X64-NEXT: vfmsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0xba,0xd1] 1505; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1506; X64-NEXT: retq ## encoding: [0xc3] 1507entry: 1508 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 1509 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8) 1510 %1 = bitcast i16 %__U to <16 x i1> 1511 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C 1512 ret <16 x float> %2 1513} 1514 1515define <16 x float> @test_mm512_mask3_fmsub_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) { 1516; X86-LABEL: test_mm512_mask3_fmsub_ps: 1517; X86: ## %bb.0: ## %entry 1518; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1519; X86-NEXT: vfmsub231ps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0xba,0xd1] 1520; X86-NEXT: ## zmm2 {%k1} = (zmm0 * zmm1) - zmm2 1521; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1522; X86-NEXT: retl ## encoding: [0xc3] 1523; 1524; X64-LABEL: test_mm512_mask3_fmsub_ps: 1525; X64: ## %bb.0: ## %entry 1526; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1527; X64-NEXT: vfmsub231ps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0xba,0xd1] 1528; X64-NEXT: ## zmm2 {%k1} = (zmm0 * zmm1) - zmm2 1529; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1530; X64-NEXT: retq ## encoding: [0xc3] 1531entry: 1532 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 1533 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #10 1534 %1 = bitcast i16 %__U to <16 x i1> 1535 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C 1536 ret <16 x float> %2 1537} 1538 1539define <8 x double> @test_mm512_mask3_fmsubadd_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { 1540; X86-LABEL: test_mm512_mask3_fmsubadd_round_pd: 1541; X86: ## %bb.0: ## %entry 1542; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1543; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1544; X86-NEXT: vfmsubadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0xb7,0xd1] 1545; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 1546; X86-NEXT: retl ## encoding: [0xc3] 1547; 1548; X64-LABEL: test_mm512_mask3_fmsubadd_round_pd: 1549; X64: ## %bb.0: ## %entry 1550; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1551; X64-NEXT: vfmsubadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0xb7,0xd1] 1552; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 1553; X64-NEXT: retq ## encoding: [0xc3] 1554entry: 1555 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 1556 %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8) 1557 %1 = bitcast i8 %__U to <8 x i1> 1558 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C 1559 ret <8 x double> %2 1560} 1561 1562define <8 x double> @test_mm512_mask3_fmsubadd_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { 1563; X86-LABEL: test_mm512_mask3_fmsubadd_pd: 1564; X86: ## %bb.0: ## %entry 1565; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1566; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1567; X86-NEXT: vfmsubadd231pd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0xb7,0xd1] 1568; X86-NEXT: ## zmm2 {%k1} = (zmm0 * zmm1) -/+ zmm2 1569; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 1570; X86-NEXT: retl ## encoding: [0xc3] 1571; 1572; X64-LABEL: test_mm512_mask3_fmsubadd_pd: 1573; X64: ## %bb.0: ## %entry 1574; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1575; X64-NEXT: vfmsubadd231pd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0xb7,0xd1] 1576; X64-NEXT: ## zmm2 {%k1} = (zmm0 * zmm1) -/+ zmm2 1577; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 1578; X64-NEXT: retq ## encoding: [0xc3] 1579entry: 1580 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 1581 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #10 1582 %1 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10 1583 %2 = shufflevector <8 x double> %1, <8 x double> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 1584 %3 = bitcast i8 %__U to <8 x i1> 1585 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__C 1586 ret <8 x double> %4 1587} 1588 1589define <16 x float> @test_mm512_mask3_fmsubadd_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) { 1590; X86-LABEL: test_mm512_mask3_fmsubadd_round_ps: 1591; X86: ## %bb.0: ## %entry 1592; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1593; X86-NEXT: vfmsubadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0xb7,0xd1] 1594; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1595; X86-NEXT: retl ## encoding: [0xc3] 1596; 1597; X64-LABEL: test_mm512_mask3_fmsubadd_round_ps: 1598; X64: ## %bb.0: ## %entry 1599; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1600; X64-NEXT: vfmsubadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0xb7,0xd1] 1601; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1602; X64-NEXT: retq ## encoding: [0xc3] 1603entry: 1604 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 1605 %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8) 1606 %1 = bitcast i16 %__U to <16 x i1> 1607 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C 1608 ret <16 x float> %2 1609} 1610 1611define <16 x float> @test_mm512_mask3_fmsubadd_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) { 1612; X86-LABEL: test_mm512_mask3_fmsubadd_ps: 1613; X86: ## %bb.0: ## %entry 1614; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1615; X86-NEXT: vfmsubadd231ps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0xb7,0xd1] 1616; X86-NEXT: ## zmm2 {%k1} = (zmm0 * zmm1) -/+ zmm2 1617; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1618; X86-NEXT: retl ## encoding: [0xc3] 1619; 1620; X64-LABEL: test_mm512_mask3_fmsubadd_ps: 1621; X64: ## %bb.0: ## %entry 1622; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1623; X64-NEXT: vfmsubadd231ps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0xb7,0xd1] 1624; X64-NEXT: ## zmm2 {%k1} = (zmm0 * zmm1) -/+ zmm2 1625; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1626; X64-NEXT: retq ## encoding: [0xc3] 1627entry: 1628 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 1629 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #10 1630 %1 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10 1631 %2 = shufflevector <16 x float> %1, <16 x float> %0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 1632 %3 = bitcast i16 %__U to <16 x i1> 1633 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__C 1634 ret <16 x float> %4 1635} 1636 1637define <8 x double> @test_mm512_mask_fnmadd_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { 1638; X86-LABEL: test_mm512_mask_fnmadd_round_pd: 1639; X86: ## %bb.0: ## %entry 1640; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1641; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1642; X86-NEXT: vfnmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x19,0x9c,0xc1] 1643; X86-NEXT: retl ## encoding: [0xc3] 1644; 1645; X64-LABEL: test_mm512_mask_fnmadd_round_pd: 1646; X64: ## %bb.0: ## %entry 1647; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1648; X64-NEXT: vfnmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x19,0x9c,0xc1] 1649; X64-NEXT: retq ## encoding: [0xc3] 1650entry: 1651 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__A 1652 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %sub, <8 x double> %__B, <8 x double> %__C, i32 8) 1653 %1 = bitcast i8 %__U to <8 x i1> 1654 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A 1655 ret <8 x double> %2 1656} 1657 1658define <8 x double> @test_mm512_mask_fnmadd_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { 1659; X86-LABEL: test_mm512_mask_fnmadd_pd: 1660; X86: ## %bb.0: ## %entry 1661; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1662; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1663; X86-NEXT: vfnmadd132pd %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x9c,0xc1] 1664; X86-NEXT: ## zmm0 {%k1} = -(zmm0 * zmm1) + zmm2 1665; X86-NEXT: retl ## encoding: [0xc3] 1666; 1667; X64-LABEL: test_mm512_mask_fnmadd_pd: 1668; X64: ## %bb.0: ## %entry 1669; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1670; X64-NEXT: vfnmadd132pd %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x9c,0xc1] 1671; X64-NEXT: ## zmm0 {%k1} = -(zmm0 * zmm1) + zmm2 1672; X64-NEXT: retq ## encoding: [0xc3] 1673entry: 1674 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__A 1675 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %sub.i, <8 x double> %__B, <8 x double> %__C) #10 1676 %1 = bitcast i8 %__U to <8 x i1> 1677 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A 1678 ret <8 x double> %2 1679} 1680 1681define <16 x float> @test_mm512_mask_fnmadd_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) { 1682; X86-LABEL: test_mm512_mask_fnmadd_round_ps: 1683; X86: ## %bb.0: ## %entry 1684; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1685; X86-NEXT: vfnmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x19,0x9c,0xc1] 1686; X86-NEXT: retl ## encoding: [0xc3] 1687; 1688; X64-LABEL: test_mm512_mask_fnmadd_round_ps: 1689; X64: ## %bb.0: ## %entry 1690; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1691; X64-NEXT: vfnmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x19,0x9c,0xc1] 1692; X64-NEXT: retq ## encoding: [0xc3] 1693entry: 1694 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A 1695 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub, <16 x float> %__B, <16 x float> %__C, i32 8) 1696 %1 = bitcast i16 %__U to <16 x i1> 1697 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A 1698 ret <16 x float> %2 1699} 1700 1701define <16 x float> @test_mm512_mask_fnmadd_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) { 1702; X86-LABEL: test_mm512_mask_fnmadd_ps: 1703; X86: ## %bb.0: ## %entry 1704; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1705; X86-NEXT: vfnmadd132ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x9c,0xc1] 1706; X86-NEXT: ## zmm0 {%k1} = -(zmm0 * zmm1) + zmm2 1707; X86-NEXT: retl ## encoding: [0xc3] 1708; 1709; X64-LABEL: test_mm512_mask_fnmadd_ps: 1710; X64: ## %bb.0: ## %entry 1711; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1712; X64-NEXT: vfnmadd132ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x9c,0xc1] 1713; X64-NEXT: ## zmm0 {%k1} = -(zmm0 * zmm1) + zmm2 1714; X64-NEXT: retq ## encoding: [0xc3] 1715entry: 1716 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A 1717 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %sub.i, <16 x float> %__B, <16 x float> %__C) #10 1718 %1 = bitcast i16 %__U to <16 x i1> 1719 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A 1720 ret <16 x float> %2 1721} 1722 1723define <8 x double> @test_mm512_mask_fnmsub_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { 1724; X86-LABEL: test_mm512_mask_fnmsub_round_pd: 1725; X86: ## %bb.0: ## %entry 1726; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1727; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1728; X86-NEXT: vfnmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x19,0x9e,0xc1] 1729; X86-NEXT: retl ## encoding: [0xc3] 1730; 1731; X64-LABEL: test_mm512_mask_fnmsub_round_pd: 1732; X64: ## %bb.0: ## %entry 1733; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1734; X64-NEXT: vfnmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x19,0x9e,0xc1] 1735; X64-NEXT: retq ## encoding: [0xc3] 1736entry: 1737 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__B 1738 %sub1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 1739 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %sub, <8 x double> %sub1, i32 8) 1740 %1 = bitcast i8 %__U to <8 x i1> 1741 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A 1742 ret <8 x double> %2 1743} 1744 1745define <8 x double> @test_mm512_mask3_fnmsub_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { 1746; X86-LABEL: test_mm512_mask3_fnmsub_round_pd: 1747; X86: ## %bb.0: ## %entry 1748; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1749; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1750; X86-NEXT: vfnmsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0xbe,0xd1] 1751; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 1752; X86-NEXT: retl ## encoding: [0xc3] 1753; 1754; X64-LABEL: test_mm512_mask3_fnmsub_round_pd: 1755; X64: ## %bb.0: ## %entry 1756; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1757; X64-NEXT: vfnmsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0xbe,0xd1] 1758; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 1759; X64-NEXT: retq ## encoding: [0xc3] 1760entry: 1761 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__B 1762 %sub1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 1763 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %sub, <8 x double> %sub1, i32 8) 1764 %1 = bitcast i8 %__U to <8 x i1> 1765 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C 1766 ret <8 x double> %2 1767} 1768 1769define <8 x double> @test_mm512_mask_fnmsub_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { 1770; X86-LABEL: test_mm512_mask_fnmsub_pd: 1771; X86: ## %bb.0: ## %entry 1772; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1773; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1774; X86-NEXT: vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1] 1775; X86-NEXT: ## zmm0 {%k1} = -(zmm0 * zmm1) - zmm2 1776; X86-NEXT: retl ## encoding: [0xc3] 1777; 1778; X64-LABEL: test_mm512_mask_fnmsub_pd: 1779; X64: ## %bb.0: ## %entry 1780; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1781; X64-NEXT: vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1] 1782; X64-NEXT: ## zmm0 {%k1} = -(zmm0 * zmm1) - zmm2 1783; X64-NEXT: retq ## encoding: [0xc3] 1784entry: 1785 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__B 1786 %sub2.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 1787 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %sub.i, <8 x double> %sub2.i) #10 1788 %1 = bitcast i8 %__U to <8 x i1> 1789 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A 1790 ret <8 x double> %2 1791} 1792 1793define <8 x double> @test_mm512_mask3_fnmsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { 1794; X86-LABEL: test_mm512_mask3_fnmsub_pd: 1795; X86: ## %bb.0: ## %entry 1796; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1797; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1798; X86-NEXT: vfnmsub231pd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0xbe,0xd1] 1799; X86-NEXT: ## zmm2 {%k1} = -(zmm0 * zmm1) - zmm2 1800; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 1801; X86-NEXT: retl ## encoding: [0xc3] 1802; 1803; X64-LABEL: test_mm512_mask3_fnmsub_pd: 1804; X64: ## %bb.0: ## %entry 1805; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1806; X64-NEXT: vfnmsub231pd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0xbe,0xd1] 1807; X64-NEXT: ## zmm2 {%k1} = -(zmm0 * zmm1) - zmm2 1808; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 1809; X64-NEXT: retq ## encoding: [0xc3] 1810entry: 1811 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__B 1812 %sub2.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C 1813 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %sub.i, <8 x double> %sub2.i) #10 1814 %1 = bitcast i8 %__U to <8 x i1> 1815 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C 1816 ret <8 x double> %2 1817} 1818 1819define <16 x float> @test_mm512_mask_fnmsub_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) { 1820; X86-LABEL: test_mm512_mask_fnmsub_round_ps: 1821; X86: ## %bb.0: ## %entry 1822; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1823; X86-NEXT: vfnmsub132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x19,0x9e,0xc1] 1824; X86-NEXT: retl ## encoding: [0xc3] 1825; 1826; X64-LABEL: test_mm512_mask_fnmsub_round_ps: 1827; X64: ## %bb.0: ## %entry 1828; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1829; X64-NEXT: vfnmsub132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x19,0x9e,0xc1] 1830; X64-NEXT: retq ## encoding: [0xc3] 1831entry: 1832 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__B 1833 %sub1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 1834 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %sub, <16 x float> %sub1, i32 8) 1835 %1 = bitcast i16 %__U to <16 x i1> 1836 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A 1837 ret <16 x float> %2 1838} 1839 1840define <16 x float> @test_mm512_mask3_fnmsub_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) { 1841; X86-LABEL: test_mm512_mask3_fnmsub_round_ps: 1842; X86: ## %bb.0: ## %entry 1843; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1844; X86-NEXT: vfnmsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0xbe,0xd1] 1845; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1846; X86-NEXT: retl ## encoding: [0xc3] 1847; 1848; X64-LABEL: test_mm512_mask3_fnmsub_round_ps: 1849; X64: ## %bb.0: ## %entry 1850; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1851; X64-NEXT: vfnmsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0xbe,0xd1] 1852; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1853; X64-NEXT: retq ## encoding: [0xc3] 1854entry: 1855 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__B 1856 %sub1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 1857 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %sub, <16 x float> %sub1, i32 8) 1858 %1 = bitcast i16 %__U to <16 x i1> 1859 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C 1860 ret <16 x float> %2 1861} 1862 1863define <16 x float> @test_mm512_mask_fnmsub_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) { 1864; X86-LABEL: test_mm512_mask_fnmsub_ps: 1865; X86: ## %bb.0: ## %entry 1866; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1867; X86-NEXT: vfnmsub132ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x9e,0xc1] 1868; X86-NEXT: ## zmm0 {%k1} = -(zmm0 * zmm1) - zmm2 1869; X86-NEXT: retl ## encoding: [0xc3] 1870; 1871; X64-LABEL: test_mm512_mask_fnmsub_ps: 1872; X64: ## %bb.0: ## %entry 1873; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1874; X64-NEXT: vfnmsub132ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x9e,0xc1] 1875; X64-NEXT: ## zmm0 {%k1} = -(zmm0 * zmm1) - zmm2 1876; X64-NEXT: retq ## encoding: [0xc3] 1877entry: 1878 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__B 1879 %sub1.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 1880 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %sub.i, <16 x float> %sub1.i) #10 1881 %1 = bitcast i16 %__U to <16 x i1> 1882 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A 1883 ret <16 x float> %2 1884} 1885 1886define <16 x float> @test_mm512_mask3_fnmsub_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) { 1887; X86-LABEL: test_mm512_mask3_fnmsub_ps: 1888; X86: ## %bb.0: ## %entry 1889; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1890; X86-NEXT: vfnmsub231ps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0xbe,0xd1] 1891; X86-NEXT: ## zmm2 {%k1} = -(zmm0 * zmm1) - zmm2 1892; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1893; X86-NEXT: retl ## encoding: [0xc3] 1894; 1895; X64-LABEL: test_mm512_mask3_fnmsub_ps: 1896; X64: ## %bb.0: ## %entry 1897; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1898; X64-NEXT: vfnmsub231ps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0xbe,0xd1] 1899; X64-NEXT: ## zmm2 {%k1} = -(zmm0 * zmm1) - zmm2 1900; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1901; X64-NEXT: retq ## encoding: [0xc3] 1902entry: 1903 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__B 1904 %sub1.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 1905 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %sub.i, <16 x float> %sub1.i) #10 1906 %1 = bitcast i16 %__U to <16 x i1> 1907 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C 1908 ret <16 x float> %2 1909} 1910 1911define <4 x float> @test_mm_mask_fmadd_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 1912; X86-LABEL: test_mm_mask_fmadd_ss: 1913; X86: ## %bb.0: ## %entry 1914; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 1915; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1916; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xc2] 1917; X86-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) + xmm2 1918; X86-NEXT: retl ## encoding: [0xc3] 1919; 1920; X64-LABEL: test_mm_mask_fmadd_ss: 1921; X64: ## %bb.0: ## %entry 1922; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1923; X64-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xc2] 1924; X64-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) + xmm2 1925; X64-NEXT: retq ## encoding: [0xc3] 1926entry: 1927 %0 = extractelement <4 x float> %__W, i64 0 1928 %1 = extractelement <4 x float> %__A, i64 0 1929 %2 = extractelement <4 x float> %__B, i64 0 1930 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #10 1931 %4 = and i8 %__U, 1 1932 %tobool.i = icmp eq i8 %4, 0 1933 %vecext1.i = extractelement <4 x float> %__W, i32 0 1934 %cond.i = select i1 %tobool.i, float %vecext1.i, float %3 1935 %vecins.i = insertelement <4 x float> %__W, float %cond.i, i32 0 1936 ret <4 x float> %vecins.i 1937} 1938 1939define <4 x float> @test_mm_mask_fmadd_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 1940; X86-LABEL: test_mm_mask_fmadd_round_ss: 1941; X86: ## %bb.0: ## %entry 1942; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 1943; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1944; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xc2] 1945; X86-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) + xmm2 1946; X86-NEXT: retl ## encoding: [0xc3] 1947; 1948; X64-LABEL: test_mm_mask_fmadd_round_ss: 1949; X64: ## %bb.0: ## %entry 1950; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1951; X64-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xc2] 1952; X64-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) + xmm2 1953; X64-NEXT: retq ## encoding: [0xc3] 1954entry: 1955 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %__W, <4 x float> %__A, <4 x float> %__B, i8 %__U, i32 4) 1956 ret <4 x float> %0 1957} 1958 1959declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) #1 1960 1961define <4 x float> @test_mm_maskz_fmadd_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { 1962; X86-LABEL: test_mm_maskz_fmadd_ss: 1963; X86: ## %bb.0: ## %entry 1964; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 1965; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1966; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2] 1967; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2 1968; X86-NEXT: retl ## encoding: [0xc3] 1969; 1970; X64-LABEL: test_mm_maskz_fmadd_ss: 1971; X64: ## %bb.0: ## %entry 1972; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1973; X64-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2] 1974; X64-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2 1975; X64-NEXT: retq ## encoding: [0xc3] 1976entry: 1977 %0 = extractelement <4 x float> %__A, i64 0 1978 %1 = extractelement <4 x float> %__B, i64 0 1979 %2 = extractelement <4 x float> %__C, i64 0 1980 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #10 1981 %4 = and i8 %__U, 1 1982 %tobool.i = icmp eq i8 %4, 0 1983 %cond.i = select i1 %tobool.i, float 0.000000e+00, float %3 1984 %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0 1985 ret <4 x float> %vecins.i 1986} 1987 1988define <4 x float> @test_mm_maskz_fmadd_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { 1989; X86-LABEL: test_mm_maskz_fmadd_round_ss: 1990; X86: ## %bb.0: ## %entry 1991; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 1992; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1993; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2] 1994; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2 1995; X86-NEXT: retl ## encoding: [0xc3] 1996; 1997; X64-LABEL: test_mm_maskz_fmadd_round_ss: 1998; X64: ## %bb.0: ## %entry 1999; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2000; X64-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2] 2001; X64-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2 2002; X64-NEXT: retq ## encoding: [0xc3] 2003entry: 2004 %0 = tail call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C, i8 %__U, i32 4) 2005 ret <4 x float> %0 2006} 2007 2008declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) #1 2009 2010define <4 x float> @test_mm_mask3_fmadd_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) { 2011; X86-LABEL: test_mm_mask3_fmadd_ss: 2012; X86: ## %bb.0: ## %entry 2013; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2014; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2015; X86-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xd1] 2016; X86-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) + xmm2 2017; X86-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2] 2018; X86-NEXT: retl ## encoding: [0xc3] 2019; 2020; X64-LABEL: test_mm_mask3_fmadd_ss: 2021; X64: ## %bb.0: ## %entry 2022; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2023; X64-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xd1] 2024; X64-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) + xmm2 2025; X64-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2] 2026; X64-NEXT: retq ## encoding: [0xc3] 2027entry: 2028 %0 = extractelement <4 x float> %__W, i64 0 2029 %1 = extractelement <4 x float> %__X, i64 0 2030 %2 = extractelement <4 x float> %__Y, i64 0 2031 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #10 2032 %4 = and i8 %__U, 1 2033 %tobool.i = icmp eq i8 %4, 0 2034 %vecext1.i = extractelement <4 x float> %__Y, i32 0 2035 %cond.i = select i1 %tobool.i, float %vecext1.i, float %3 2036 %vecins.i = insertelement <4 x float> %__Y, float %cond.i, i32 0 2037 ret <4 x float> %vecins.i 2038} 2039 2040define <4 x float> @test_mm_mask3_fmadd_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) { 2041; X86-LABEL: test_mm_mask3_fmadd_round_ss: 2042; X86: ## %bb.0: ## %entry 2043; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2044; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2045; X86-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xd1] 2046; X86-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) + xmm2 2047; X86-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2] 2048; X86-NEXT: retl ## encoding: [0xc3] 2049; 2050; X64-LABEL: test_mm_mask3_fmadd_round_ss: 2051; X64: ## %bb.0: ## %entry 2052; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2053; X64-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xd1] 2054; X64-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) + xmm2 2055; X64-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2] 2056; X64-NEXT: retq ## encoding: [0xc3] 2057entry: 2058 %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 %__U, i32 4) 2059 ret <4 x float> %0 2060} 2061 2062declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) #1 2063 2064define <4 x float> @test_mm_mask_fmsub_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 2065; X86-LABEL: test_mm_mask_fmsub_ss: 2066; X86: ## %bb.0: ## %entry 2067; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2068; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2069; X86-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xab,0xc2] 2070; X86-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) - xmm2 2071; X86-NEXT: retl ## encoding: [0xc3] 2072; 2073; X64-LABEL: test_mm_mask_fmsub_ss: 2074; X64: ## %bb.0: ## %entry 2075; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2076; X64-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xab,0xc2] 2077; X64-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) - xmm2 2078; X64-NEXT: retq ## encoding: [0xc3] 2079entry: 2080 %0 = extractelement <4 x float> %__W, i64 0 2081 %1 = extractelement <4 x float> %__A, i64 0 2082 %.rhs.i = extractelement <4 x float> %__B, i64 0 2083 %2 = fsub float -0.000000e+00, %.rhs.i 2084 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #10 2085 %4 = and i8 %__U, 1 2086 %tobool.i = icmp eq i8 %4, 0 2087 %vecext1.i = extractelement <4 x float> %__W, i32 0 2088 %cond.i = select i1 %tobool.i, float %vecext1.i, float %3 2089 %vecins.i = insertelement <4 x float> %__W, float %cond.i, i32 0 2090 ret <4 x float> %vecins.i 2091} 2092 2093define <4 x float> @test_mm_mask_fmsub_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 2094; X86-LABEL: test_mm_mask_fmsub_round_ss: 2095; X86: ## %bb.0: ## %entry 2096; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2097; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2098; X86-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xab,0xc2] 2099; X86-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) - xmm2 2100; X86-NEXT: retl ## encoding: [0xc3] 2101; 2102; X64-LABEL: test_mm_mask_fmsub_round_ss: 2103; X64: ## %bb.0: ## %entry 2104; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2105; X64-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xab,0xc2] 2106; X64-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) - xmm2 2107; X64-NEXT: retq ## encoding: [0xc3] 2108entry: 2109 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__B 2110 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %__W, <4 x float> %__A, <4 x float> %sub, i8 %__U, i32 4) 2111 ret <4 x float> %0 2112} 2113 2114define <4 x float> @test_mm_maskz_fmsub_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { 2115; X86-LABEL: test_mm_maskz_fmsub_ss: 2116; X86: ## %bb.0: ## %entry 2117; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2118; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2119; X86-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xab,0xc2] 2120; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2 2121; X86-NEXT: retl ## encoding: [0xc3] 2122; 2123; X64-LABEL: test_mm_maskz_fmsub_ss: 2124; X64: ## %bb.0: ## %entry 2125; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2126; X64-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xab,0xc2] 2127; X64-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2 2128; X64-NEXT: retq ## encoding: [0xc3] 2129entry: 2130 %0 = extractelement <4 x float> %__A, i64 0 2131 %1 = extractelement <4 x float> %__B, i64 0 2132 %.rhs.i = extractelement <4 x float> %__C, i64 0 2133 %2 = fsub float -0.000000e+00, %.rhs.i 2134 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #10 2135 %4 = and i8 %__U, 1 2136 %tobool.i = icmp eq i8 %4, 0 2137 %cond.i = select i1 %tobool.i, float 0.000000e+00, float %3 2138 %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0 2139 ret <4 x float> %vecins.i 2140} 2141 2142define <4 x float> @test_mm_maskz_fmsub_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { 2143; X86-LABEL: test_mm_maskz_fmsub_round_ss: 2144; X86: ## %bb.0: ## %entry 2145; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2146; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2147; X86-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xab,0xc2] 2148; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2 2149; X86-NEXT: retl ## encoding: [0xc3] 2150; 2151; X64-LABEL: test_mm_maskz_fmsub_round_ss: 2152; X64: ## %bb.0: ## %entry 2153; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2154; X64-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xab,0xc2] 2155; X64-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2 2156; X64-NEXT: retq ## encoding: [0xc3] 2157entry: 2158 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 2159 %0 = tail call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %sub, i8 %__U, i32 4) 2160 ret <4 x float> %0 2161} 2162 2163define <4 x float> @test_mm_mask3_fmsub_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) { 2164; X86-LABEL: test_mm_mask3_fmsub_ss: 2165; X86: ## %bb.0: ## %entry 2166; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2167; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2168; X86-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd1] 2169; X86-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) - xmm2 2170; X86-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2] 2171; X86-NEXT: retl ## encoding: [0xc3] 2172; 2173; X64-LABEL: test_mm_mask3_fmsub_ss: 2174; X64: ## %bb.0: ## %entry 2175; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2176; X64-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd1] 2177; X64-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) - xmm2 2178; X64-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2] 2179; X64-NEXT: retq ## encoding: [0xc3] 2180entry: 2181 %0 = extractelement <4 x float> %__W, i64 0 2182 %1 = extractelement <4 x float> %__X, i64 0 2183 %.rhs.i = extractelement <4 x float> %__Y, i64 0 2184 %2 = fsub float -0.000000e+00, %.rhs.i 2185 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #10 2186 %4 = and i8 %__U, 1 2187 %tobool.i = icmp eq i8 %4, 0 2188 %vecext1.i = extractelement <4 x float> %__Y, i32 0 2189 %cond.i = select i1 %tobool.i, float %vecext1.i, float %3 2190 %vecins.i = insertelement <4 x float> %__Y, float %cond.i, i32 0 2191 ret <4 x float> %vecins.i 2192} 2193 2194define <4 x float> @test_mm_mask3_fmsub_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) { 2195; X86-LABEL: test_mm_mask3_fmsub_round_ss: 2196; X86: ## %bb.0: ## %entry 2197; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2198; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2199; X86-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd1] 2200; X86-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) - xmm2 2201; X86-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2] 2202; X86-NEXT: retl ## encoding: [0xc3] 2203; 2204; X64-LABEL: test_mm_mask3_fmsub_round_ss: 2205; X64: ## %bb.0: ## %entry 2206; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2207; X64-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd1] 2208; X64-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) - xmm2 2209; X64-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2] 2210; X64-NEXT: retq ## encoding: [0xc3] 2211entry: 2212 %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 %__U, i32 4) 2213 ret <4 x float> %0 2214} 2215 2216declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) #1 2217 2218define <4 x float> @test_mm_mask_fnmadd_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 2219; X86-LABEL: test_mm_mask_fnmadd_ss: 2220; X86: ## %bb.0: ## %entry 2221; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2222; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2223; X86-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xad,0xc2] 2224; X86-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) + xmm2 2225; X86-NEXT: retl ## encoding: [0xc3] 2226; 2227; X64-LABEL: test_mm_mask_fnmadd_ss: 2228; X64: ## %bb.0: ## %entry 2229; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2230; X64-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xad,0xc2] 2231; X64-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) + xmm2 2232; X64-NEXT: retq ## encoding: [0xc3] 2233entry: 2234 %0 = extractelement <4 x float> %__W, i64 0 2235 %.rhs.i = extractelement <4 x float> %__A, i64 0 2236 %1 = fsub float -0.000000e+00, %.rhs.i 2237 %2 = extractelement <4 x float> %__B, i64 0 2238 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #10 2239 %4 = and i8 %__U, 1 2240 %tobool.i = icmp eq i8 %4, 0 2241 %vecext1.i = extractelement <4 x float> %__W, i32 0 2242 %cond.i = select i1 %tobool.i, float %vecext1.i, float %3 2243 %vecins.i = insertelement <4 x float> %__W, float %cond.i, i32 0 2244 ret <4 x float> %vecins.i 2245} 2246 2247define <4 x float> @test_mm_mask_fnmadd_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 2248; X86-LABEL: test_mm_mask_fnmadd_round_ss: 2249; X86: ## %bb.0: ## %entry 2250; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2251; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2252; X86-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xad,0xc2] 2253; X86-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) + xmm2 2254; X86-NEXT: retl ## encoding: [0xc3] 2255; 2256; X64-LABEL: test_mm_mask_fnmadd_round_ss: 2257; X64: ## %bb.0: ## %entry 2258; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2259; X64-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xad,0xc2] 2260; X64-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) + xmm2 2261; X64-NEXT: retq ## encoding: [0xc3] 2262entry: 2263 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A 2264 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %__W, <4 x float> %sub, <4 x float> %__B, i8 %__U, i32 4) 2265 ret <4 x float> %0 2266} 2267 2268define <4 x float> @test_mm_maskz_fnmadd_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { 2269; X86-LABEL: test_mm_maskz_fnmadd_ss: 2270; X86: ## %bb.0: ## %entry 2271; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2272; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2273; X86-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xad,0xc2] 2274; X86-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2 2275; X86-NEXT: retl ## encoding: [0xc3] 2276; 2277; X64-LABEL: test_mm_maskz_fnmadd_ss: 2278; X64: ## %bb.0: ## %entry 2279; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2280; X64-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xad,0xc2] 2281; X64-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2 2282; X64-NEXT: retq ## encoding: [0xc3] 2283entry: 2284 %0 = extractelement <4 x float> %__A, i64 0 2285 %.rhs.i = extractelement <4 x float> %__B, i64 0 2286 %1 = fsub float -0.000000e+00, %.rhs.i 2287 %2 = extractelement <4 x float> %__C, i64 0 2288 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #10 2289 %4 = and i8 %__U, 1 2290 %tobool.i = icmp eq i8 %4, 0 2291 %cond.i = select i1 %tobool.i, float 0.000000e+00, float %3 2292 %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0 2293 ret <4 x float> %vecins.i 2294} 2295 2296define <4 x float> @test_mm_maskz_fnmadd_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { 2297; X86-LABEL: test_mm_maskz_fnmadd_round_ss: 2298; X86: ## %bb.0: ## %entry 2299; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2300; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2301; X86-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xad,0xc2] 2302; X86-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2 2303; X86-NEXT: retl ## encoding: [0xc3] 2304; 2305; X64-LABEL: test_mm_maskz_fnmadd_round_ss: 2306; X64: ## %bb.0: ## %entry 2307; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2308; X64-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xad,0xc2] 2309; X64-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2 2310; X64-NEXT: retq ## encoding: [0xc3] 2311entry: 2312 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__B 2313 %0 = tail call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %__A, <4 x float> %sub, <4 x float> %__C, i8 %__U, i32 4) 2314 ret <4 x float> %0 2315} 2316 2317define <4 x float> @test_mm_mask3_fnmadd_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) { 2318; X86-LABEL: test_mm_mask3_fnmadd_ss: 2319; X86: ## %bb.0: ## %entry 2320; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2321; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2322; X86-NEXT: vfnmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbd,0xd1] 2323; X86-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) + xmm2 2324; X86-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2] 2325; X86-NEXT: retl ## encoding: [0xc3] 2326; 2327; X64-LABEL: test_mm_mask3_fnmadd_ss: 2328; X64: ## %bb.0: ## %entry 2329; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2330; X64-NEXT: vfnmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbd,0xd1] 2331; X64-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) + xmm2 2332; X64-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2] 2333; X64-NEXT: retq ## encoding: [0xc3] 2334entry: 2335 %0 = extractelement <4 x float> %__W, i64 0 2336 %.rhs.i = extractelement <4 x float> %__X, i64 0 2337 %1 = fsub float -0.000000e+00, %.rhs.i 2338 %2 = extractelement <4 x float> %__Y, i64 0 2339 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #10 2340 %4 = and i8 %__U, 1 2341 %tobool.i = icmp eq i8 %4, 0 2342 %vecext1.i = extractelement <4 x float> %__Y, i32 0 2343 %cond.i = select i1 %tobool.i, float %vecext1.i, float %3 2344 %vecins.i = insertelement <4 x float> %__Y, float %cond.i, i32 0 2345 ret <4 x float> %vecins.i 2346} 2347 2348define <4 x float> @test_mm_mask3_fnmadd_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) { 2349; X86-LABEL: test_mm_mask3_fnmadd_round_ss: 2350; X86: ## %bb.0: ## %entry 2351; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2352; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2353; X86-NEXT: vfnmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbd,0xd1] 2354; X86-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) + xmm2 2355; X86-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2] 2356; X86-NEXT: retl ## encoding: [0xc3] 2357; 2358; X64-LABEL: test_mm_mask3_fnmadd_round_ss: 2359; X64: ## %bb.0: ## %entry 2360; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2361; X64-NEXT: vfnmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbd,0xd1] 2362; X64-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) + xmm2 2363; X64-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2] 2364; X64-NEXT: retq ## encoding: [0xc3] 2365entry: 2366 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__X 2367 %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %__W, <4 x float> %sub, <4 x float> %__Y, i8 %__U, i32 4) 2368 ret <4 x float> %0 2369} 2370 2371define <4 x float> @test_mm_mask_fnmsub_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 2372; X86-LABEL: test_mm_mask_fnmsub_ss: 2373; X86: ## %bb.0: ## %entry 2374; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2375; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2376; X86-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xaf,0xc2] 2377; X86-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) - xmm2 2378; X86-NEXT: retl ## encoding: [0xc3] 2379; 2380; X64-LABEL: test_mm_mask_fnmsub_ss: 2381; X64: ## %bb.0: ## %entry 2382; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2383; X64-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xaf,0xc2] 2384; X64-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) - xmm2 2385; X64-NEXT: retq ## encoding: [0xc3] 2386entry: 2387 %0 = extractelement <4 x float> %__W, i64 0 2388 %.rhs.i = extractelement <4 x float> %__A, i64 0 2389 %1 = fsub float -0.000000e+00, %.rhs.i 2390 %.rhs7.i = extractelement <4 x float> %__B, i64 0 2391 %2 = fsub float -0.000000e+00, %.rhs7.i 2392 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #10 2393 %4 = and i8 %__U, 1 2394 %tobool.i = icmp eq i8 %4, 0 2395 %vecext2.i = extractelement <4 x float> %__W, i32 0 2396 %cond.i = select i1 %tobool.i, float %vecext2.i, float %3 2397 %vecins.i = insertelement <4 x float> %__W, float %cond.i, i32 0 2398 ret <4 x float> %vecins.i 2399} 2400 2401define <4 x float> @test_mm_mask_fnmsub_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 2402; X86-LABEL: test_mm_mask_fnmsub_round_ss: 2403; X86: ## %bb.0: ## %entry 2404; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2405; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2406; X86-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xaf,0xc2] 2407; X86-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) - xmm2 2408; X86-NEXT: retl ## encoding: [0xc3] 2409; 2410; X64-LABEL: test_mm_mask_fnmsub_round_ss: 2411; X64: ## %bb.0: ## %entry 2412; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2413; X64-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xaf,0xc2] 2414; X64-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) - xmm2 2415; X64-NEXT: retq ## encoding: [0xc3] 2416entry: 2417 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A 2418 %sub1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__B 2419 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %__W, <4 x float> %sub, <4 x float> %sub1, i8 %__U, i32 4) 2420 ret <4 x float> %0 2421} 2422 2423define <4 x float> @test_mm_maskz_fnmsub_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { 2424; X86-LABEL: test_mm_maskz_fnmsub_ss: 2425; X86: ## %bb.0: ## %entry 2426; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2427; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2428; X86-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xaf,0xc2] 2429; X86-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2 2430; X86-NEXT: retl ## encoding: [0xc3] 2431; 2432; X64-LABEL: test_mm_maskz_fnmsub_ss: 2433; X64: ## %bb.0: ## %entry 2434; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2435; X64-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xaf,0xc2] 2436; X64-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2 2437; X64-NEXT: retq ## encoding: [0xc3] 2438entry: 2439 %0 = extractelement <4 x float> %__A, i64 0 2440 %.rhs.i = extractelement <4 x float> %__B, i64 0 2441 %1 = fsub float -0.000000e+00, %.rhs.i 2442 %.rhs5.i = extractelement <4 x float> %__C, i64 0 2443 %2 = fsub float -0.000000e+00, %.rhs5.i 2444 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #10 2445 %4 = and i8 %__U, 1 2446 %tobool.i = icmp eq i8 %4, 0 2447 %cond.i = select i1 %tobool.i, float 0.000000e+00, float %3 2448 %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0 2449 ret <4 x float> %vecins.i 2450} 2451 2452define <4 x float> @test_mm_maskz_fnmsub_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { 2453; X86-LABEL: test_mm_maskz_fnmsub_round_ss: 2454; X86: ## %bb.0: ## %entry 2455; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2456; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2457; X86-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xaf,0xc2] 2458; X86-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2 2459; X86-NEXT: retl ## encoding: [0xc3] 2460; 2461; X64-LABEL: test_mm_maskz_fnmsub_round_ss: 2462; X64: ## %bb.0: ## %entry 2463; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2464; X64-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xaf,0xc2] 2465; X64-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2 2466; X64-NEXT: retq ## encoding: [0xc3] 2467entry: 2468 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__B 2469 %sub1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C 2470 %0 = tail call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %__A, <4 x float> %sub, <4 x float> %sub1, i8 %__U, i32 4) 2471 ret <4 x float> %0 2472} 2473 2474define <4 x float> @test_mm_mask3_fnmsub_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) { 2475; X86-LABEL: test_mm_mask3_fnmsub_ss: 2476; X86: ## %bb.0: ## %entry 2477; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2478; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2479; X86-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd1] 2480; X86-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) - xmm2 2481; X86-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2] 2482; X86-NEXT: retl ## encoding: [0xc3] 2483; 2484; X64-LABEL: test_mm_mask3_fnmsub_ss: 2485; X64: ## %bb.0: ## %entry 2486; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2487; X64-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd1] 2488; X64-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) - xmm2 2489; X64-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2] 2490; X64-NEXT: retq ## encoding: [0xc3] 2491entry: 2492 %0 = extractelement <4 x float> %__W, i64 0 2493 %.rhs.i = extractelement <4 x float> %__X, i64 0 2494 %1 = fsub float -0.000000e+00, %.rhs.i 2495 %.rhs7.i = extractelement <4 x float> %__Y, i64 0 2496 %2 = fsub float -0.000000e+00, %.rhs7.i 2497 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #10 2498 %4 = and i8 %__U, 1 2499 %tobool.i = icmp eq i8 %4, 0 2500 %vecext2.i = extractelement <4 x float> %__Y, i32 0 2501 %cond.i = select i1 %tobool.i, float %vecext2.i, float %3 2502 %vecins.i = insertelement <4 x float> %__Y, float %cond.i, i32 0 2503 ret <4 x float> %vecins.i 2504} 2505 2506define <4 x float> @test_mm_mask3_fnmsub_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) { 2507; X86-LABEL: test_mm_mask3_fnmsub_round_ss: 2508; X86: ## %bb.0: ## %entry 2509; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2510; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2511; X86-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd1] 2512; X86-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) - xmm2 2513; X86-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2] 2514; X86-NEXT: retl ## encoding: [0xc3] 2515; 2516; X64-LABEL: test_mm_mask3_fnmsub_round_ss: 2517; X64: ## %bb.0: ## %entry 2518; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2519; X64-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd1] 2520; X64-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) - xmm2 2521; X64-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2] 2522; X64-NEXT: retq ## encoding: [0xc3] 2523entry: 2524 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__X 2525 %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %__W, <4 x float> %sub, <4 x float> %__Y, i8 %__U, i32 4) 2526 ret <4 x float> %0 2527} 2528 2529define <2 x double> @test_mm_mask_fmadd_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 2530; X86-LABEL: test_mm_mask_fmadd_sd: 2531; X86: ## %bb.0: ## %entry 2532; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2533; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2534; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xc2] 2535; X86-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) + xmm2 2536; X86-NEXT: retl ## encoding: [0xc3] 2537; 2538; X64-LABEL: test_mm_mask_fmadd_sd: 2539; X64: ## %bb.0: ## %entry 2540; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2541; X64-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xc2] 2542; X64-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) + xmm2 2543; X64-NEXT: retq ## encoding: [0xc3] 2544entry: 2545 %0 = extractelement <2 x double> %__W, i64 0 2546 %1 = extractelement <2 x double> %__A, i64 0 2547 %2 = extractelement <2 x double> %__B, i64 0 2548 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #10 2549 %4 = and i8 %__U, 1 2550 %tobool.i = icmp eq i8 %4, 0 2551 %vecext1.i = extractelement <2 x double> %__W, i32 0 2552 %cond.i = select i1 %tobool.i, double %vecext1.i, double %3 2553 %vecins.i = insertelement <2 x double> %__W, double %cond.i, i32 0 2554 ret <2 x double> %vecins.i 2555} 2556 2557define <2 x double> @test_mm_mask_fmadd_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 2558; X86-LABEL: test_mm_mask_fmadd_round_sd: 2559; X86: ## %bb.0: ## %entry 2560; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2561; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2562; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xc2] 2563; X86-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) + xmm2 2564; X86-NEXT: retl ## encoding: [0xc3] 2565; 2566; X64-LABEL: test_mm_mask_fmadd_round_sd: 2567; X64: ## %bb.0: ## %entry 2568; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2569; X64-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xc2] 2570; X64-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) + xmm2 2571; X64-NEXT: retq ## encoding: [0xc3] 2572entry: 2573 %0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %__W, <2 x double> %__A, <2 x double> %__B, i8 %__U, i32 4) 2574 ret <2 x double> %0 2575} 2576 2577declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) #1 2578 2579define <2 x double> @test_mm_maskz_fmadd_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { 2580; X86-LABEL: test_mm_maskz_fmadd_sd: 2581; X86: ## %bb.0: ## %entry 2582; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2583; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2584; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xc2] 2585; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2 2586; X86-NEXT: retl ## encoding: [0xc3] 2587; 2588; X64-LABEL: test_mm_maskz_fmadd_sd: 2589; X64: ## %bb.0: ## %entry 2590; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2591; X64-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xc2] 2592; X64-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2 2593; X64-NEXT: retq ## encoding: [0xc3] 2594entry: 2595 %0 = extractelement <2 x double> %__A, i64 0 2596 %1 = extractelement <2 x double> %__B, i64 0 2597 %2 = extractelement <2 x double> %__C, i64 0 2598 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #10 2599 %4 = and i8 %__U, 1 2600 %tobool.i = icmp eq i8 %4, 0 2601 %cond.i = select i1 %tobool.i, double 0.000000e+00, double %3 2602 %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0 2603 ret <2 x double> %vecins.i 2604} 2605 2606define <2 x double> @test_mm_maskz_fmadd_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { 2607; X86-LABEL: test_mm_maskz_fmadd_round_sd: 2608; X86: ## %bb.0: ## %entry 2609; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2610; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2611; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xc2] 2612; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2 2613; X86-NEXT: retl ## encoding: [0xc3] 2614; 2615; X64-LABEL: test_mm_maskz_fmadd_round_sd: 2616; X64: ## %bb.0: ## %entry 2617; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2618; X64-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xc2] 2619; X64-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2 2620; X64-NEXT: retq ## encoding: [0xc3] 2621entry: 2622 %0 = tail call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C, i8 %__U, i32 4) 2623 ret <2 x double> %0 2624} 2625 2626declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) #1 2627 2628define <2 x double> @test_mm_mask3_fmadd_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) { 2629; X86-LABEL: test_mm_mask3_fmadd_sd: 2630; X86: ## %bb.0: ## %entry 2631; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2632; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2633; X86-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xd1] 2634; X86-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) + xmm2 2635; X86-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2] 2636; X86-NEXT: retl ## encoding: [0xc3] 2637; 2638; X64-LABEL: test_mm_mask3_fmadd_sd: 2639; X64: ## %bb.0: ## %entry 2640; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2641; X64-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xd1] 2642; X64-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) + xmm2 2643; X64-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2] 2644; X64-NEXT: retq ## encoding: [0xc3] 2645entry: 2646 %0 = extractelement <2 x double> %__W, i64 0 2647 %1 = extractelement <2 x double> %__X, i64 0 2648 %2 = extractelement <2 x double> %__Y, i64 0 2649 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #10 2650 %4 = and i8 %__U, 1 2651 %tobool.i = icmp eq i8 %4, 0 2652 %vecext1.i = extractelement <2 x double> %__Y, i32 0 2653 %cond.i = select i1 %tobool.i, double %vecext1.i, double %3 2654 %vecins.i = insertelement <2 x double> %__Y, double %cond.i, i32 0 2655 ret <2 x double> %vecins.i 2656} 2657 2658define <2 x double> @test_mm_mask3_fmadd_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) { 2659; X86-LABEL: test_mm_mask3_fmadd_round_sd: 2660; X86: ## %bb.0: ## %entry 2661; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2662; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2663; X86-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xd1] 2664; X86-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) + xmm2 2665; X86-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2] 2666; X86-NEXT: retl ## encoding: [0xc3] 2667; 2668; X64-LABEL: test_mm_mask3_fmadd_round_sd: 2669; X64: ## %bb.0: ## %entry 2670; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2671; X64-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xd1] 2672; X64-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) + xmm2 2673; X64-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2] 2674; X64-NEXT: retq ## encoding: [0xc3] 2675entry: 2676 %0 = tail call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 %__U, i32 4) 2677 ret <2 x double> %0 2678} 2679 2680declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) #1 2681 2682define <2 x double> @test_mm_mask_fmsub_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 2683; X86-LABEL: test_mm_mask_fmsub_sd: 2684; X86: ## %bb.0: ## %entry 2685; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2686; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2687; X86-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xab,0xc2] 2688; X86-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) - xmm2 2689; X86-NEXT: retl ## encoding: [0xc3] 2690; 2691; X64-LABEL: test_mm_mask_fmsub_sd: 2692; X64: ## %bb.0: ## %entry 2693; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2694; X64-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xab,0xc2] 2695; X64-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) - xmm2 2696; X64-NEXT: retq ## encoding: [0xc3] 2697entry: 2698 %0 = extractelement <2 x double> %__W, i64 0 2699 %1 = extractelement <2 x double> %__A, i64 0 2700 %.rhs.i = extractelement <2 x double> %__B, i64 0 2701 %2 = fsub double -0.000000e+00, %.rhs.i 2702 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #10 2703 %4 = and i8 %__U, 1 2704 %tobool.i = icmp eq i8 %4, 0 2705 %vecext1.i = extractelement <2 x double> %__W, i32 0 2706 %cond.i = select i1 %tobool.i, double %vecext1.i, double %3 2707 %vecins.i = insertelement <2 x double> %__W, double %cond.i, i32 0 2708 ret <2 x double> %vecins.i 2709} 2710 2711define <2 x double> @test_mm_mask_fmsub_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 2712; X86-LABEL: test_mm_mask_fmsub_round_sd: 2713; X86: ## %bb.0: ## %entry 2714; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2715; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2716; X86-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xab,0xc2] 2717; X86-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) - xmm2 2718; X86-NEXT: retl ## encoding: [0xc3] 2719; 2720; X64-LABEL: test_mm_mask_fmsub_round_sd: 2721; X64: ## %bb.0: ## %entry 2722; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2723; X64-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xab,0xc2] 2724; X64-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) - xmm2 2725; X64-NEXT: retq ## encoding: [0xc3] 2726entry: 2727 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %__B 2728 %0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %__W, <2 x double> %__A, <2 x double> %sub, i8 %__U, i32 4) 2729 ret <2 x double> %0 2730} 2731 2732define <2 x double> @test_mm_maskz_fmsub_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { 2733; X86-LABEL: test_mm_maskz_fmsub_sd: 2734; X86: ## %bb.0: ## %entry 2735; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2736; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2737; X86-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xab,0xc2] 2738; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2 2739; X86-NEXT: retl ## encoding: [0xc3] 2740; 2741; X64-LABEL: test_mm_maskz_fmsub_sd: 2742; X64: ## %bb.0: ## %entry 2743; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2744; X64-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xab,0xc2] 2745; X64-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2 2746; X64-NEXT: retq ## encoding: [0xc3] 2747entry: 2748 %0 = extractelement <2 x double> %__A, i64 0 2749 %1 = extractelement <2 x double> %__B, i64 0 2750 %.rhs.i = extractelement <2 x double> %__C, i64 0 2751 %2 = fsub double -0.000000e+00, %.rhs.i 2752 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #10 2753 %4 = and i8 %__U, 1 2754 %tobool.i = icmp eq i8 %4, 0 2755 %cond.i = select i1 %tobool.i, double 0.000000e+00, double %3 2756 %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0 2757 ret <2 x double> %vecins.i 2758} 2759 2760define <2 x double> @test_mm_maskz_fmsub_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { 2761; X86-LABEL: test_mm_maskz_fmsub_round_sd: 2762; X86: ## %bb.0: ## %entry 2763; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2764; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2765; X86-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xab,0xc2] 2766; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2 2767; X86-NEXT: retl ## encoding: [0xc3] 2768; 2769; X64-LABEL: test_mm_maskz_fmsub_round_sd: 2770; X64: ## %bb.0: ## %entry 2771; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2772; X64-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xab,0xc2] 2773; X64-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2 2774; X64-NEXT: retq ## encoding: [0xc3] 2775entry: 2776 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %__C 2777 %0 = tail call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %sub, i8 %__U, i32 4) 2778 ret <2 x double> %0 2779} 2780 2781define <2 x double> @test_mm_mask3_fmsub_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) { 2782; X86-LABEL: test_mm_mask3_fmsub_sd: 2783; X86: ## %bb.0: ## %entry 2784; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2785; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2786; X86-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd1] 2787; X86-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) - xmm2 2788; X86-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2] 2789; X86-NEXT: retl ## encoding: [0xc3] 2790; 2791; X64-LABEL: test_mm_mask3_fmsub_sd: 2792; X64: ## %bb.0: ## %entry 2793; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2794; X64-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd1] 2795; X64-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) - xmm2 2796; X64-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2] 2797; X64-NEXT: retq ## encoding: [0xc3] 2798entry: 2799 %0 = extractelement <2 x double> %__W, i64 0 2800 %1 = extractelement <2 x double> %__X, i64 0 2801 %.rhs.i = extractelement <2 x double> %__Y, i64 0 2802 %2 = fsub double -0.000000e+00, %.rhs.i 2803 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #10 2804 %4 = and i8 %__U, 1 2805 %tobool.i = icmp eq i8 %4, 0 2806 %vecext1.i = extractelement <2 x double> %__Y, i32 0 2807 %cond.i = select i1 %tobool.i, double %vecext1.i, double %3 2808 %vecins.i = insertelement <2 x double> %__Y, double %cond.i, i32 0 2809 ret <2 x double> %vecins.i 2810} 2811 2812define <2 x double> @test_mm_mask3_fmsub_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) { 2813; X86-LABEL: test_mm_mask3_fmsub_round_sd: 2814; X86: ## %bb.0: ## %entry 2815; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2816; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2817; X86-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd1] 2818; X86-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) - xmm2 2819; X86-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2] 2820; X86-NEXT: retl ## encoding: [0xc3] 2821; 2822; X64-LABEL: test_mm_mask3_fmsub_round_sd: 2823; X64: ## %bb.0: ## %entry 2824; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2825; X64-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd1] 2826; X64-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) - xmm2 2827; X64-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2] 2828; X64-NEXT: retq ## encoding: [0xc3] 2829entry: 2830 %0 = tail call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 %__U, i32 4) 2831 ret <2 x double> %0 2832} 2833 2834declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) #1 2835 2836define <2 x double> @test_mm_mask_fnmadd_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 2837; X86-LABEL: test_mm_mask_fnmadd_sd: 2838; X86: ## %bb.0: ## %entry 2839; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2840; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2841; X86-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xad,0xc2] 2842; X86-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) + xmm2 2843; X86-NEXT: retl ## encoding: [0xc3] 2844; 2845; X64-LABEL: test_mm_mask_fnmadd_sd: 2846; X64: ## %bb.0: ## %entry 2847; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2848; X64-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xad,0xc2] 2849; X64-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) + xmm2 2850; X64-NEXT: retq ## encoding: [0xc3] 2851entry: 2852 %0 = extractelement <2 x double> %__W, i64 0 2853 %.rhs.i = extractelement <2 x double> %__A, i64 0 2854 %1 = fsub double -0.000000e+00, %.rhs.i 2855 %2 = extractelement <2 x double> %__B, i64 0 2856 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #10 2857 %4 = and i8 %__U, 1 2858 %tobool.i = icmp eq i8 %4, 0 2859 %vecext1.i = extractelement <2 x double> %__W, i32 0 2860 %cond.i = select i1 %tobool.i, double %vecext1.i, double %3 2861 %vecins.i = insertelement <2 x double> %__W, double %cond.i, i32 0 2862 ret <2 x double> %vecins.i 2863} 2864 2865define <2 x double> @test_mm_mask_fnmadd_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 2866; X86-LABEL: test_mm_mask_fnmadd_round_sd: 2867; X86: ## %bb.0: ## %entry 2868; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2869; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2870; X86-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xad,0xc2] 2871; X86-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) + xmm2 2872; X86-NEXT: retl ## encoding: [0xc3] 2873; 2874; X64-LABEL: test_mm_mask_fnmadd_round_sd: 2875; X64: ## %bb.0: ## %entry 2876; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2877; X64-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xad,0xc2] 2878; X64-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) + xmm2 2879; X64-NEXT: retq ## encoding: [0xc3] 2880entry: 2881 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %__A 2882 %0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %__W, <2 x double> %sub, <2 x double> %__B, i8 %__U, i32 4) 2883 ret <2 x double> %0 2884} 2885 2886define <2 x double> @test_mm_maskz_fnmadd_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { 2887; X86-LABEL: test_mm_maskz_fnmadd_sd: 2888; X86: ## %bb.0: ## %entry 2889; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2890; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2891; X86-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xad,0xc2] 2892; X86-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2 2893; X86-NEXT: retl ## encoding: [0xc3] 2894; 2895; X64-LABEL: test_mm_maskz_fnmadd_sd: 2896; X64: ## %bb.0: ## %entry 2897; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2898; X64-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xad,0xc2] 2899; X64-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2 2900; X64-NEXT: retq ## encoding: [0xc3] 2901entry: 2902 %0 = extractelement <2 x double> %__A, i64 0 2903 %.rhs.i = extractelement <2 x double> %__B, i64 0 2904 %1 = fsub double -0.000000e+00, %.rhs.i 2905 %2 = extractelement <2 x double> %__C, i64 0 2906 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #10 2907 %4 = and i8 %__U, 1 2908 %tobool.i = icmp eq i8 %4, 0 2909 %cond.i = select i1 %tobool.i, double 0.000000e+00, double %3 2910 %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0 2911 ret <2 x double> %vecins.i 2912} 2913 2914define <2 x double> @test_mm_maskz_fnmadd_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { 2915; X86-LABEL: test_mm_maskz_fnmadd_round_sd: 2916; X86: ## %bb.0: ## %entry 2917; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2918; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2919; X86-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xad,0xc2] 2920; X86-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2 2921; X86-NEXT: retl ## encoding: [0xc3] 2922; 2923; X64-LABEL: test_mm_maskz_fnmadd_round_sd: 2924; X64: ## %bb.0: ## %entry 2925; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2926; X64-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xad,0xc2] 2927; X64-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2 2928; X64-NEXT: retq ## encoding: [0xc3] 2929entry: 2930 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %__B 2931 %0 = tail call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %__A, <2 x double> %sub, <2 x double> %__C, i8 %__U, i32 4) 2932 ret <2 x double> %0 2933} 2934 2935define <2 x double> @test_mm_mask3_fnmadd_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) { 2936; X86-LABEL: test_mm_mask3_fnmadd_sd: 2937; X86: ## %bb.0: ## %entry 2938; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2939; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2940; X86-NEXT: vfnmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbd,0xd1] 2941; X86-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) + xmm2 2942; X86-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2] 2943; X86-NEXT: retl ## encoding: [0xc3] 2944; 2945; X64-LABEL: test_mm_mask3_fnmadd_sd: 2946; X64: ## %bb.0: ## %entry 2947; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2948; X64-NEXT: vfnmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbd,0xd1] 2949; X64-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) + xmm2 2950; X64-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2] 2951; X64-NEXT: retq ## encoding: [0xc3] 2952entry: 2953 %0 = extractelement <2 x double> %__W, i64 0 2954 %.rhs.i = extractelement <2 x double> %__X, i64 0 2955 %1 = fsub double -0.000000e+00, %.rhs.i 2956 %2 = extractelement <2 x double> %__Y, i64 0 2957 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #10 2958 %4 = and i8 %__U, 1 2959 %tobool.i = icmp eq i8 %4, 0 2960 %vecext1.i = extractelement <2 x double> %__Y, i32 0 2961 %cond.i = select i1 %tobool.i, double %vecext1.i, double %3 2962 %vecins.i = insertelement <2 x double> %__Y, double %cond.i, i32 0 2963 ret <2 x double> %vecins.i 2964} 2965 2966define <2 x double> @test_mm_mask3_fnmadd_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) { 2967; X86-LABEL: test_mm_mask3_fnmadd_round_sd: 2968; X86: ## %bb.0: ## %entry 2969; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2970; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2971; X86-NEXT: vfnmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbd,0xd1] 2972; X86-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) + xmm2 2973; X86-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2] 2974; X86-NEXT: retl ## encoding: [0xc3] 2975; 2976; X64-LABEL: test_mm_mask3_fnmadd_round_sd: 2977; X64: ## %bb.0: ## %entry 2978; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2979; X64-NEXT: vfnmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbd,0xd1] 2980; X64-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) + xmm2 2981; X64-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2] 2982; X64-NEXT: retq ## encoding: [0xc3] 2983entry: 2984 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %__X 2985 %0 = tail call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %__W, <2 x double> %sub, <2 x double> %__Y, i8 %__U, i32 4) 2986 ret <2 x double> %0 2987} 2988 2989define <2 x double> @test_mm_mask_fnmsub_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 2990; X86-LABEL: test_mm_mask_fnmsub_sd: 2991; X86: ## %bb.0: ## %entry 2992; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 2993; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2994; X86-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xaf,0xc2] 2995; X86-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) - xmm2 2996; X86-NEXT: retl ## encoding: [0xc3] 2997; 2998; X64-LABEL: test_mm_mask_fnmsub_sd: 2999; X64: ## %bb.0: ## %entry 3000; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3001; X64-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xaf,0xc2] 3002; X64-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) - xmm2 3003; X64-NEXT: retq ## encoding: [0xc3] 3004entry: 3005 %0 = extractelement <2 x double> %__W, i64 0 3006 %.rhs.i = extractelement <2 x double> %__A, i64 0 3007 %1 = fsub double -0.000000e+00, %.rhs.i 3008 %.rhs7.i = extractelement <2 x double> %__B, i64 0 3009 %2 = fsub double -0.000000e+00, %.rhs7.i 3010 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #10 3011 %4 = and i8 %__U, 1 3012 %tobool.i = icmp eq i8 %4, 0 3013 %vecext2.i = extractelement <2 x double> %__W, i32 0 3014 %cond.i = select i1 %tobool.i, double %vecext2.i, double %3 3015 %vecins.i = insertelement <2 x double> %__W, double %cond.i, i32 0 3016 ret <2 x double> %vecins.i 3017} 3018 3019define <2 x double> @test_mm_mask_fnmsub_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 3020; X86-LABEL: test_mm_mask_fnmsub_round_sd: 3021; X86: ## %bb.0: ## %entry 3022; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 3023; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3024; X86-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xaf,0xc2] 3025; X86-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) - xmm2 3026; X86-NEXT: retl ## encoding: [0xc3] 3027; 3028; X64-LABEL: test_mm_mask_fnmsub_round_sd: 3029; X64: ## %bb.0: ## %entry 3030; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3031; X64-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xaf,0xc2] 3032; X64-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) - xmm2 3033; X64-NEXT: retq ## encoding: [0xc3] 3034entry: 3035 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %__A 3036 %sub1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %__B 3037 %0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %__W, <2 x double> %sub, <2 x double> %sub1, i8 %__U, i32 4) 3038 ret <2 x double> %0 3039} 3040 3041define <2 x double> @test_mm_maskz_fnmsub_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { 3042; X86-LABEL: test_mm_maskz_fnmsub_sd: 3043; X86: ## %bb.0: ## %entry 3044; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 3045; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3046; X86-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xaf,0xc2] 3047; X86-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2 3048; X86-NEXT: retl ## encoding: [0xc3] 3049; 3050; X64-LABEL: test_mm_maskz_fnmsub_sd: 3051; X64: ## %bb.0: ## %entry 3052; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3053; X64-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xaf,0xc2] 3054; X64-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2 3055; X64-NEXT: retq ## encoding: [0xc3] 3056entry: 3057 %0 = extractelement <2 x double> %__A, i64 0 3058 %.rhs.i = extractelement <2 x double> %__B, i64 0 3059 %1 = fsub double -0.000000e+00, %.rhs.i 3060 %.rhs5.i = extractelement <2 x double> %__C, i64 0 3061 %2 = fsub double -0.000000e+00, %.rhs5.i 3062 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #10 3063 %4 = and i8 %__U, 1 3064 %tobool.i = icmp eq i8 %4, 0 3065 %cond.i = select i1 %tobool.i, double 0.000000e+00, double %3 3066 %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0 3067 ret <2 x double> %vecins.i 3068} 3069 3070define <2 x double> @test_mm_maskz_fnmsub_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { 3071; X86-LABEL: test_mm_maskz_fnmsub_round_sd: 3072; X86: ## %bb.0: ## %entry 3073; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 3074; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3075; X86-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xaf,0xc2] 3076; X86-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2 3077; X86-NEXT: retl ## encoding: [0xc3] 3078; 3079; X64-LABEL: test_mm_maskz_fnmsub_round_sd: 3080; X64: ## %bb.0: ## %entry 3081; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3082; X64-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xaf,0xc2] 3083; X64-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2 3084; X64-NEXT: retq ## encoding: [0xc3] 3085entry: 3086 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %__B 3087 %sub1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %__C 3088 %0 = tail call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %__A, <2 x double> %sub, <2 x double> %sub1, i8 %__U, i32 4) 3089 ret <2 x double> %0 3090} 3091 3092define <2 x double> @test_mm_mask3_fnmsub_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) { 3093; X86-LABEL: test_mm_mask3_fnmsub_sd: 3094; X86: ## %bb.0: ## %entry 3095; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 3096; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3097; X86-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd1] 3098; X86-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) - xmm2 3099; X86-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2] 3100; X86-NEXT: retl ## encoding: [0xc3] 3101; 3102; X64-LABEL: test_mm_mask3_fnmsub_sd: 3103; X64: ## %bb.0: ## %entry 3104; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3105; X64-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd1] 3106; X64-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) - xmm2 3107; X64-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2] 3108; X64-NEXT: retq ## encoding: [0xc3] 3109entry: 3110 %0 = extractelement <2 x double> %__W, i64 0 3111 %.rhs.i = extractelement <2 x double> %__X, i64 0 3112 %1 = fsub double -0.000000e+00, %.rhs.i 3113 %.rhs7.i = extractelement <2 x double> %__Y, i64 0 3114 %2 = fsub double -0.000000e+00, %.rhs7.i 3115 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #10 3116 %4 = and i8 %__U, 1 3117 %tobool.i = icmp eq i8 %4, 0 3118 %vecext2.i = extractelement <2 x double> %__Y, i32 0 3119 %cond.i = select i1 %tobool.i, double %vecext2.i, double %3 3120 %vecins.i = insertelement <2 x double> %__Y, double %cond.i, i32 0 3121 ret <2 x double> %vecins.i 3122} 3123 3124define <2 x double> @test_mm_mask3_fnmsub_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) { 3125; X86-LABEL: test_mm_mask3_fnmsub_round_sd: 3126; X86: ## %bb.0: ## %entry 3127; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 3128; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3129; X86-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd1] 3130; X86-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) - xmm2 3131; X86-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2] 3132; X86-NEXT: retl ## encoding: [0xc3] 3133; 3134; X64-LABEL: test_mm_mask3_fnmsub_round_sd: 3135; X64: ## %bb.0: ## %entry 3136; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3137; X64-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd1] 3138; X64-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) - xmm2 3139; X64-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2] 3140; X64-NEXT: retq ## encoding: [0xc3] 3141entry: 3142 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %__X 3143 %0 = tail call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %__W, <2 x double> %sub, <2 x double> %__Y, i8 %__U, i32 4) 3144 ret <2 x double> %0 3145} 3146 3147define <4 x float> @test_mm_mask_add_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 3148; X86-LABEL: test_mm_mask_add_ss: 3149; X86: ## %bb.0: ## %entry 3150; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 3151; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3152; X86-NEXT: vaddss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x58,0xc2] 3153; X86-NEXT: retl ## encoding: [0xc3] 3154; 3155; X64-LABEL: test_mm_mask_add_ss: 3156; X64: ## %bb.0: ## %entry 3157; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3158; X64-NEXT: vaddss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x58,0xc2] 3159; X64-NEXT: retq ## encoding: [0xc3] 3160entry: 3161 %vecext.i.i = extractelement <4 x float> %__B, i32 0 3162 %vecext1.i.i = extractelement <4 x float> %__A, i32 0 3163 %add.i.i = fadd float %vecext1.i.i, %vecext.i.i 3164 %0 = and i8 %__U, 1 3165 %tobool.i = icmp eq i8 %0, 0 3166 %vecext1.i = extractelement <4 x float> %__W, i32 0 3167 %cond.i = select i1 %tobool.i, float %vecext1.i, float %add.i.i 3168 %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0 3169 ret <4 x float> %vecins.i 3170} 3171 3172define <4 x float> @test_mm_maskz_add_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 3173; X86-LABEL: test_mm_maskz_add_ss: 3174; X86: ## %bb.0: ## %entry 3175; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 3176; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3177; X86-NEXT: vaddss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x58,0xc1] 3178; X86-NEXT: retl ## encoding: [0xc3] 3179; 3180; X64-LABEL: test_mm_maskz_add_ss: 3181; X64: ## %bb.0: ## %entry 3182; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3183; X64-NEXT: vaddss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x58,0xc1] 3184; X64-NEXT: retq ## encoding: [0xc3] 3185entry: 3186 %vecext.i.i = extractelement <4 x float> %__B, i32 0 3187 %vecext1.i.i = extractelement <4 x float> %__A, i32 0 3188 %add.i.i = fadd float %vecext1.i.i, %vecext.i.i 3189 %0 = and i8 %__U, 1 3190 %tobool.i = icmp eq i8 %0, 0 3191 %cond.i = select i1 %tobool.i, float 0.000000e+00, float %add.i.i 3192 %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0 3193 ret <4 x float> %vecins.i 3194} 3195 3196define <2 x double> @test_mm_mask_add_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 3197; X86-LABEL: test_mm_mask_add_sd: 3198; X86: ## %bb.0: ## %entry 3199; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 3200; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3201; X86-NEXT: vaddsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x58,0xc2] 3202; X86-NEXT: retl ## encoding: [0xc3] 3203; 3204; X64-LABEL: test_mm_mask_add_sd: 3205; X64: ## %bb.0: ## %entry 3206; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3207; X64-NEXT: vaddsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x58,0xc2] 3208; X64-NEXT: retq ## encoding: [0xc3] 3209entry: 3210 %vecext.i.i = extractelement <2 x double> %__B, i32 0 3211 %vecext1.i.i = extractelement <2 x double> %__A, i32 0 3212 %add.i.i = fadd double %vecext1.i.i, %vecext.i.i 3213 %0 = and i8 %__U, 1 3214 %tobool.i = icmp eq i8 %0, 0 3215 %vecext1.i = extractelement <2 x double> %__W, i32 0 3216 %cond.i = select i1 %tobool.i, double %vecext1.i, double %add.i.i 3217 %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0 3218 ret <2 x double> %vecins.i 3219} 3220 3221define <2 x double> @test_mm_maskz_add_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 3222; X86-LABEL: test_mm_maskz_add_sd: 3223; X86: ## %bb.0: ## %entry 3224; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 3225; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3226; X86-NEXT: vaddsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x58,0xc1] 3227; X86-NEXT: retl ## encoding: [0xc3] 3228; 3229; X64-LABEL: test_mm_maskz_add_sd: 3230; X64: ## %bb.0: ## %entry 3231; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3232; X64-NEXT: vaddsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x58,0xc1] 3233; X64-NEXT: retq ## encoding: [0xc3] 3234entry: 3235 %vecext.i.i = extractelement <2 x double> %__B, i32 0 3236 %vecext1.i.i = extractelement <2 x double> %__A, i32 0 3237 %add.i.i = fadd double %vecext1.i.i, %vecext.i.i 3238 %0 = and i8 %__U, 1 3239 %tobool.i = icmp eq i8 %0, 0 3240 %cond.i = select i1 %tobool.i, double 0.000000e+00, double %add.i.i 3241 %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0 3242 ret <2 x double> %vecins.i 3243} 3244 3245define <4 x float> @test_mm_mask_sub_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 3246; X86-LABEL: test_mm_mask_sub_ss: 3247; X86: ## %bb.0: ## %entry 3248; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 3249; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3250; X86-NEXT: vsubss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x5c,0xc2] 3251; X86-NEXT: retl ## encoding: [0xc3] 3252; 3253; X64-LABEL: test_mm_mask_sub_ss: 3254; X64: ## %bb.0: ## %entry 3255; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3256; X64-NEXT: vsubss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x5c,0xc2] 3257; X64-NEXT: retq ## encoding: [0xc3] 3258entry: 3259 %vecext.i.i = extractelement <4 x float> %__B, i32 0 3260 %vecext1.i.i = extractelement <4 x float> %__A, i32 0 3261 %sub.i.i = fsub float %vecext1.i.i, %vecext.i.i 3262 %0 = and i8 %__U, 1 3263 %tobool.i = icmp eq i8 %0, 0 3264 %vecext1.i = extractelement <4 x float> %__W, i32 0 3265 %cond.i = select i1 %tobool.i, float %vecext1.i, float %sub.i.i 3266 %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0 3267 ret <4 x float> %vecins.i 3268} 3269 3270define <4 x float> @test_mm_maskz_sub_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 3271; X86-LABEL: test_mm_maskz_sub_ss: 3272; X86: ## %bb.0: ## %entry 3273; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 3274; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3275; X86-NEXT: vsubss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x5c,0xc1] 3276; X86-NEXT: retl ## encoding: [0xc3] 3277; 3278; X64-LABEL: test_mm_maskz_sub_ss: 3279; X64: ## %bb.0: ## %entry 3280; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3281; X64-NEXT: vsubss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x5c,0xc1] 3282; X64-NEXT: retq ## encoding: [0xc3] 3283entry: 3284 %vecext.i.i = extractelement <4 x float> %__B, i32 0 3285 %vecext1.i.i = extractelement <4 x float> %__A, i32 0 3286 %sub.i.i = fsub float %vecext1.i.i, %vecext.i.i 3287 %0 = and i8 %__U, 1 3288 %tobool.i = icmp eq i8 %0, 0 3289 %cond.i = select i1 %tobool.i, float 0.000000e+00, float %sub.i.i 3290 %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0 3291 ret <4 x float> %vecins.i 3292} 3293 3294define <2 x double> @test_mm_mask_sub_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 3295; X86-LABEL: test_mm_mask_sub_sd: 3296; X86: ## %bb.0: ## %entry 3297; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 3298; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3299; X86-NEXT: vsubsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x5c,0xc2] 3300; X86-NEXT: retl ## encoding: [0xc3] 3301; 3302; X64-LABEL: test_mm_mask_sub_sd: 3303; X64: ## %bb.0: ## %entry 3304; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3305; X64-NEXT: vsubsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x5c,0xc2] 3306; X64-NEXT: retq ## encoding: [0xc3] 3307entry: 3308 %vecext.i.i = extractelement <2 x double> %__B, i32 0 3309 %vecext1.i.i = extractelement <2 x double> %__A, i32 0 3310 %sub.i.i = fsub double %vecext1.i.i, %vecext.i.i 3311 %0 = and i8 %__U, 1 3312 %tobool.i = icmp eq i8 %0, 0 3313 %vecext1.i = extractelement <2 x double> %__W, i32 0 3314 %cond.i = select i1 %tobool.i, double %vecext1.i, double %sub.i.i 3315 %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0 3316 ret <2 x double> %vecins.i 3317} 3318 3319define <2 x double> @test_mm_maskz_sub_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 3320; X86-LABEL: test_mm_maskz_sub_sd: 3321; X86: ## %bb.0: ## %entry 3322; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 3323; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3324; X86-NEXT: vsubsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x5c,0xc1] 3325; X86-NEXT: retl ## encoding: [0xc3] 3326; 3327; X64-LABEL: test_mm_maskz_sub_sd: 3328; X64: ## %bb.0: ## %entry 3329; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3330; X64-NEXT: vsubsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x5c,0xc1] 3331; X64-NEXT: retq ## encoding: [0xc3] 3332entry: 3333 %vecext.i.i = extractelement <2 x double> %__B, i32 0 3334 %vecext1.i.i = extractelement <2 x double> %__A, i32 0 3335 %sub.i.i = fsub double %vecext1.i.i, %vecext.i.i 3336 %0 = and i8 %__U, 1 3337 %tobool.i = icmp eq i8 %0, 0 3338 %cond.i = select i1 %tobool.i, double 0.000000e+00, double %sub.i.i 3339 %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0 3340 ret <2 x double> %vecins.i 3341} 3342 3343define <4 x float> @test_mm_mask_mul_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 3344; X86-LABEL: test_mm_mask_mul_ss: 3345; X86: ## %bb.0: ## %entry 3346; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 3347; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3348; X86-NEXT: vmulss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x59,0xc2] 3349; X86-NEXT: retl ## encoding: [0xc3] 3350; 3351; X64-LABEL: test_mm_mask_mul_ss: 3352; X64: ## %bb.0: ## %entry 3353; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3354; X64-NEXT: vmulss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x59,0xc2] 3355; X64-NEXT: retq ## encoding: [0xc3] 3356entry: 3357 %vecext.i.i = extractelement <4 x float> %__B, i32 0 3358 %vecext1.i.i = extractelement <4 x float> %__A, i32 0 3359 %mul.i.i = fmul float %vecext1.i.i, %vecext.i.i 3360 %0 = and i8 %__U, 1 3361 %tobool.i = icmp eq i8 %0, 0 3362 %vecext1.i = extractelement <4 x float> %__W, i32 0 3363 %cond.i = select i1 %tobool.i, float %vecext1.i, float %mul.i.i 3364 %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0 3365 ret <4 x float> %vecins.i 3366} 3367 3368define <4 x float> @test_mm_maskz_mul_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 3369; X86-LABEL: test_mm_maskz_mul_ss: 3370; X86: ## %bb.0: ## %entry 3371; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 3372; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3373; X86-NEXT: vmulss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x59,0xc1] 3374; X86-NEXT: retl ## encoding: [0xc3] 3375; 3376; X64-LABEL: test_mm_maskz_mul_ss: 3377; X64: ## %bb.0: ## %entry 3378; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3379; X64-NEXT: vmulss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x59,0xc1] 3380; X64-NEXT: retq ## encoding: [0xc3] 3381entry: 3382 %vecext.i.i = extractelement <4 x float> %__B, i32 0 3383 %vecext1.i.i = extractelement <4 x float> %__A, i32 0 3384 %mul.i.i = fmul float %vecext1.i.i, %vecext.i.i 3385 %0 = and i8 %__U, 1 3386 %tobool.i = icmp eq i8 %0, 0 3387 %cond.i = select i1 %tobool.i, float 0.000000e+00, float %mul.i.i 3388 %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0 3389 ret <4 x float> %vecins.i 3390} 3391 3392define <2 x double> @test_mm_mask_mul_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 3393; X86-LABEL: test_mm_mask_mul_sd: 3394; X86: ## %bb.0: ## %entry 3395; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 3396; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3397; X86-NEXT: vmulsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x59,0xc2] 3398; X86-NEXT: retl ## encoding: [0xc3] 3399; 3400; X64-LABEL: test_mm_mask_mul_sd: 3401; X64: ## %bb.0: ## %entry 3402; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3403; X64-NEXT: vmulsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x59,0xc2] 3404; X64-NEXT: retq ## encoding: [0xc3] 3405entry: 3406 %vecext.i.i = extractelement <2 x double> %__B, i32 0 3407 %vecext1.i.i = extractelement <2 x double> %__A, i32 0 3408 %mul.i.i = fmul double %vecext1.i.i, %vecext.i.i 3409 %0 = and i8 %__U, 1 3410 %tobool.i = icmp eq i8 %0, 0 3411 %vecext1.i = extractelement <2 x double> %__W, i32 0 3412 %cond.i = select i1 %tobool.i, double %vecext1.i, double %mul.i.i 3413 %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0 3414 ret <2 x double> %vecins.i 3415} 3416 3417define <2 x double> @test_mm_maskz_mul_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 3418; X86-LABEL: test_mm_maskz_mul_sd: 3419; X86: ## %bb.0: ## %entry 3420; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 3421; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3422; X86-NEXT: vmulsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x59,0xc1] 3423; X86-NEXT: retl ## encoding: [0xc3] 3424; 3425; X64-LABEL: test_mm_maskz_mul_sd: 3426; X64: ## %bb.0: ## %entry 3427; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3428; X64-NEXT: vmulsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x59,0xc1] 3429; X64-NEXT: retq ## encoding: [0xc3] 3430entry: 3431 %vecext.i.i = extractelement <2 x double> %__B, i32 0 3432 %vecext1.i.i = extractelement <2 x double> %__A, i32 0 3433 %mul.i.i = fmul double %vecext1.i.i, %vecext.i.i 3434 %0 = and i8 %__U, 1 3435 %tobool.i = icmp eq i8 %0, 0 3436 %cond.i = select i1 %tobool.i, double 0.000000e+00, double %mul.i.i 3437 %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0 3438 ret <2 x double> %vecins.i 3439} 3440 3441define <4 x float> @test_mm_mask_div_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 3442; X86-LABEL: test_mm_mask_div_ss: 3443; X86: ## %bb.0: ## %entry 3444; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 3445; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3446; X86-NEXT: vdivss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x5e,0xc2] 3447; X86-NEXT: retl ## encoding: [0xc3] 3448; 3449; X64-LABEL: test_mm_mask_div_ss: 3450; X64: ## %bb.0: ## %entry 3451; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3452; X64-NEXT: vdivss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x5e,0xc2] 3453; X64-NEXT: retq ## encoding: [0xc3] 3454entry: 3455 %0 = extractelement <4 x float> %__A, i64 0 3456 %1 = extractelement <4 x float> %__B, i64 0 3457 %2 = extractelement <4 x float> %__W, i64 0 3458 %3 = fdiv float %0, %1 3459 %4 = bitcast i8 %__U to <8 x i1> 3460 %5 = extractelement <8 x i1> %4, i64 0 3461 %6 = select i1 %5, float %3, float %2 3462 %7 = insertelement <4 x float> %__A, float %6, i64 0 3463 ret <4 x float> %7 3464} 3465 3466define <4 x float> @test_mm_maskz_div_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 3467; X86-LABEL: test_mm_maskz_div_ss: 3468; X86: ## %bb.0: ## %entry 3469; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 3470; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3471; X86-NEXT: vdivss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x5e,0xc1] 3472; X86-NEXT: retl ## encoding: [0xc3] 3473; 3474; X64-LABEL: test_mm_maskz_div_ss: 3475; X64: ## %bb.0: ## %entry 3476; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3477; X64-NEXT: vdivss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x5e,0xc1] 3478; X64-NEXT: retq ## encoding: [0xc3] 3479entry: 3480 %0 = extractelement <4 x float> %__A, i64 0 3481 %1 = extractelement <4 x float> %__B, i64 0 3482 %2 = fdiv float %0, %1 3483 %3 = bitcast i8 %__U to <8 x i1> 3484 %4 = extractelement <8 x i1> %3, i64 0 3485 %5 = select i1 %4, float %2, float 0.000000e+00 3486 %6 = insertelement <4 x float> %__A, float %5, i64 0 3487 ret <4 x float> %6 3488} 3489 3490define <2 x double> @test_mm_mask_div_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 3491; X86-LABEL: test_mm_mask_div_sd: 3492; X86: ## %bb.0: ## %entry 3493; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 3494; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3495; X86-NEXT: vdivsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x5e,0xc2] 3496; X86-NEXT: retl ## encoding: [0xc3] 3497; 3498; X64-LABEL: test_mm_mask_div_sd: 3499; X64: ## %bb.0: ## %entry 3500; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3501; X64-NEXT: vdivsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x5e,0xc2] 3502; X64-NEXT: retq ## encoding: [0xc3] 3503entry: 3504 %0 = extractelement <2 x double> %__A, i64 0 3505 %1 = extractelement <2 x double> %__B, i64 0 3506 %2 = extractelement <2 x double> %__W, i64 0 3507 %3 = fdiv double %0, %1 3508 %4 = bitcast i8 %__U to <8 x i1> 3509 %5 = extractelement <8 x i1> %4, i64 0 3510 %6 = select i1 %5, double %3, double %2 3511 %7 = insertelement <2 x double> %__A, double %6, i64 0 3512 ret <2 x double> %7 3513} 3514 3515define <2 x double> @test_mm_maskz_div_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 3516; X86-LABEL: test_mm_maskz_div_sd: 3517; X86: ## %bb.0: ## %entry 3518; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 3519; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3520; X86-NEXT: vdivsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x5e,0xc1] 3521; X86-NEXT: retl ## encoding: [0xc3] 3522; 3523; X64-LABEL: test_mm_maskz_div_sd: 3524; X64: ## %bb.0: ## %entry 3525; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3526; X64-NEXT: vdivsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x5e,0xc1] 3527; X64-NEXT: retq ## encoding: [0xc3] 3528entry: 3529 %0 = extractelement <2 x double> %__A, i64 0 3530 %1 = extractelement <2 x double> %__B, i64 0 3531 %2 = fdiv double %0, %1 3532 %3 = bitcast i8 %__U to <8 x i1> 3533 %4 = extractelement <8 x i1> %3, i64 0 3534 %5 = select i1 %4, double %2, double 0.000000e+00 3535 %6 = insertelement <2 x double> %__A, double %5, i64 0 3536 ret <2 x double> %6 3537} 3538 3539declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>) #9 3540declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>) #9 3541declare float @llvm.fma.f32(float, float, float) #9 3542declare double @llvm.fma.f64(double, double, double) #9 3543