1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=KNL 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=SKX 4 5define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 6; ALL-LABEL: test_x86_fmadd_ps_z: 7; ALL: ## BB#0: 8; ALL-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 9; ALL-NEXT: retq 10 %x = fmul <16 x float> %a0, %a1 11 %res = fadd <16 x float> %x, %a2 12 ret <16 x float> %res 13} 14 15define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 16; ALL-LABEL: test_x86_fmsub_ps_z: 17; ALL: ## BB#0: 18; ALL-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0 19; ALL-NEXT: retq 20 %x = fmul <16 x float> %a0, %a1 21 %res = fsub <16 x float> %x, %a2 22 ret <16 x float> %res 23} 24 25define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 26; ALL-LABEL: test_x86_fnmadd_ps_z: 27; ALL: ## BB#0: 28; ALL-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0 29; ALL-NEXT: retq 30 %x = fmul <16 x float> %a0, %a1 31 %res = fsub <16 x float> %a2, %x 32 ret <16 x float> %res 33} 34 35define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 36; ALL-LABEL: test_x86_fnmsub_ps_z: 37; ALL: ## BB#0: 38; ALL-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 39; ALL-NEXT: retq 40 %x = fmul <16 x float> %a0, %a1 41 %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 42 float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 43 float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 44 float -0.000000e+00>, %x 45 %res = fsub <16 x float> %y, %a2 46 ret <16 x float> %res 47} 48 49define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 50; ALL-LABEL: test_x86_fmadd_pd_z: 51; ALL: ## BB#0: 52; ALL-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 53; ALL-NEXT: retq 54 %x = fmul <8 x double> %a0, %a1 55 %res = fadd <8 x double> %x, %a2 56 ret <8 x double> %res 57} 58 59define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 60; ALL-LABEL: test_x86_fmsub_pd_z: 61; ALL: ## BB#0: 62; ALL-NEXT: vfmsub213pd %zmm2, %zmm1, %zmm0 63; ALL-NEXT: retq 64 %x = fmul <8 x double> %a0, %a1 65 %res = fsub <8 x double> %x, %a2 66 ret <8 x double> %res 67} 68 69define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) { 70; ALL-LABEL: test_x86_fmsub_213: 71; ALL: ## BB#0: 72; ALL-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 73; ALL-NEXT: vmovaps %zmm1, %zmm0 74; ALL-NEXT: retq 75 %x = fmul double %a0, %a1 76 %res = fsub double %x, %a2 77 ret double %res 78} 79 80define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) { 81; ALL-LABEL: test_x86_fmsub_213_m: 82; ALL: ## BB#0: 83; ALL-NEXT: vfmsub213sd (%rdi), %xmm0, %xmm1 84; ALL-NEXT: vmovaps %zmm1, %zmm0 85; ALL-NEXT: retq 86 %a2 = load double , double *%a2_ptr 87 %x = fmul double %a0, %a1 88 %res = fsub double %x, %a2 89 ret double %res 90} 91 92define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) { 93; ALL-LABEL: test_x86_fmsub_231_m: 94; ALL: ## BB#0: 95; ALL-NEXT: vfmsub231sd (%rdi), %xmm0, %xmm1 96; ALL-NEXT: vmovaps %zmm1, %zmm0 97; ALL-NEXT: retq 98 %a2 = load double , double *%a2_ptr 99 %x = fmul double %a0, %a2 100 %res = fsub double %x, %a1 101 ret double %res 102} 103 104define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind { 105; ALL-LABEL: test231_br: 106; ALL: ## BB#0: 107; ALL-NEXT: vfmadd231ps {{.*}}(%rip){1to16}, %zmm0, %zmm1 108; ALL-NEXT: vmovaps %zmm1, %zmm0 109; ALL-NEXT: retq 110 %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 111 %b2 = fadd <16 x float> %b1, %a2 112 ret <16 x float> %b2 113} 114 115define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind { 116; ALL-LABEL: test213_br: 117; ALL: ## BB#0: 118; ALL-NEXT: vfmadd213ps {{.*}}(%rip){1to16}, %zmm1, %zmm0 119; ALL-NEXT: retq 120 %b1 = fmul <16 x float> %a1, %a2 121 %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 122 ret <16 x float> %b2 123} 124 125;mask (a*c+b , a) 126define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { 127; KNL-LABEL: test_x86_fmadd132_ps: 128; KNL: ## BB#0: 129; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 130; KNL-NEXT: vpslld $31, %zmm2, %zmm2 131; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1 132; KNL-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1} 133; KNL-NEXT: retq 134; 135; SKX-LABEL: test_x86_fmadd132_ps: 136; SKX: ## BB#0: 137; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 138; SKX-NEXT: vpmovb2m %xmm2, %k1 139; SKX-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1} 140; SKX-NEXT: retq 141 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 142 %x = fmul <16 x float> %a0, %a2 143 %y = fadd <16 x float> %x, %a1 144 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0 145 ret <16 x float> %res 146} 147 148;mask (a*c+b , b) 149define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { 150; KNL-LABEL: test_x86_fmadd231_ps: 151; KNL: ## BB#0: 152; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 153; KNL-NEXT: vpslld $31, %zmm2, %zmm2 154; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1 155; KNL-NEXT: vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1} 156; KNL-NEXT: vmovaps %zmm1, %zmm0 157; KNL-NEXT: retq 158; 159; SKX-LABEL: test_x86_fmadd231_ps: 160; SKX: ## BB#0: 161; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 162; SKX-NEXT: vpmovb2m %xmm2, %k1 163; SKX-NEXT: vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1} 164; SKX-NEXT: vmovaps %zmm1, %zmm0 165; SKX-NEXT: retq 166 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 167 %x = fmul <16 x float> %a0, %a2 168 %y = fadd <16 x float> %x, %a1 169 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1 170 ret <16 x float> %res 171} 172 173;mask (b*a+c , b) 174define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { 175; KNL-LABEL: test_x86_fmadd213_ps: 176; KNL: ## BB#0: 177; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 178; KNL-NEXT: vpslld $31, %zmm2, %zmm2 179; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1 180; KNL-NEXT: vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1} 181; KNL-NEXT: vmovaps %zmm1, %zmm0 182; KNL-NEXT: retq 183; 184; SKX-LABEL: test_x86_fmadd213_ps: 185; SKX: ## BB#0: 186; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 187; SKX-NEXT: vpmovb2m %xmm2, %k1 188; SKX-NEXT: vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1} 189; SKX-NEXT: vmovaps %zmm1, %zmm0 190; SKX-NEXT: retq 191 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 192 %x = fmul <16 x float> %a1, %a0 193 %y = fadd <16 x float> %x, %a2 194 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1 195 ret <16 x float> %res 196} 197 198