1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s 3 4define <4 x double> @test_double_to_4(double %s) { 5; CHECK-LABEL: test_double_to_4: 6; CHECK: # %bb.0: 7; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 8; CHECK-NEXT: retq 9 %vec = insertelement <2 x double> undef, double %s, i32 0 10 %res = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 11 ret <4 x double> %res 12} 13define <4 x double> @test_masked_double_to_4_mask0(double %s, <4 x double> %default, <4 x double> %mask) { 14; CHECK-LABEL: test_masked_double_to_4_mask0: 15; CHECK: # %bb.0: 16; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 17; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 18; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1} 19; CHECK-NEXT: vmovapd %ymm1, %ymm0 20; CHECK-NEXT: retq 21 %vec = insertelement <2 x double> undef, double %s, i32 0 22 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 23 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 24 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default 25 ret <4 x double> %res 26} 27 28define <4 x double> @test_masked_z_double_to_4_mask0(double %s, <4 x double> %mask) { 29; CHECK-LABEL: test_masked_z_double_to_4_mask0: 30; CHECK: # %bb.0: 31; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 32; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 33; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} 34; CHECK-NEXT: retq 35 %vec = insertelement <2 x double> undef, double %s, i32 0 36 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 37 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 38 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 39 ret <4 x double> %res 40} 41define <4 x double> @test_masked_double_to_4_mask1(double %s, <4 x double> %default, <4 x double> %mask) { 42; CHECK-LABEL: test_masked_double_to_4_mask1: 43; CHECK: # %bb.0: 44; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 45; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 46; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1} 47; CHECK-NEXT: vmovapd %ymm1, %ymm0 48; CHECK-NEXT: retq 49 %vec = insertelement <2 x double> undef, double %s, i32 0 50 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 51 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 52 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default 53 ret <4 x double> %res 54} 55 56define <4 x double> @test_masked_z_double_to_4_mask1(double %s, <4 x double> %mask) { 57; CHECK-LABEL: test_masked_z_double_to_4_mask1: 58; CHECK: # %bb.0: 59; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 60; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 61; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} 62; CHECK-NEXT: retq 63 %vec = insertelement <2 x double> undef, double %s, i32 0 64 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 65 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 66 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 67 ret <4 x double> %res 68} 69define <4 x double> @test_masked_double_to_4_mask2(double %s, <4 x double> %default, <4 x double> %mask) { 70; CHECK-LABEL: test_masked_double_to_4_mask2: 71; CHECK: # %bb.0: 72; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 73; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 74; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1} 75; CHECK-NEXT: vmovapd %ymm1, %ymm0 76; CHECK-NEXT: retq 77 %vec = insertelement <2 x double> undef, double %s, i32 0 78 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 79 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 80 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default 81 ret <4 x double> %res 82} 83 84define <4 x double> @test_masked_z_double_to_4_mask2(double %s, <4 x double> %mask) { 85; CHECK-LABEL: test_masked_z_double_to_4_mask2: 86; CHECK: # %bb.0: 87; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 88; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 89; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} 90; CHECK-NEXT: retq 91 %vec = insertelement <2 x double> undef, double %s, i32 0 92 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 93 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 94 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 95 ret <4 x double> %res 96} 97define <4 x double> @test_masked_double_to_4_mask3(double %s, <4 x double> %default, <4 x double> %mask) { 98; CHECK-LABEL: test_masked_double_to_4_mask3: 99; CHECK: # %bb.0: 100; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 101; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 102; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1} 103; CHECK-NEXT: vmovapd %ymm1, %ymm0 104; CHECK-NEXT: retq 105 %vec = insertelement <2 x double> undef, double %s, i32 0 106 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 107 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 108 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default 109 ret <4 x double> %res 110} 111 112define <4 x double> @test_masked_z_double_to_4_mask3(double %s, <4 x double> %mask) { 113; CHECK-LABEL: test_masked_z_double_to_4_mask3: 114; CHECK: # %bb.0: 115; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 116; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 117; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} 118; CHECK-NEXT: retq 119 %vec = insertelement <2 x double> undef, double %s, i32 0 120 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 121 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 122 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 123 ret <4 x double> %res 124} 125define <8 x double> @test_double_to_8(double %s) { 126; CHECK-LABEL: test_double_to_8: 127; CHECK: # %bb.0: 128; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 129; CHECK-NEXT: retq 130 %vec = insertelement <2 x double> undef, double %s, i32 0 131 %res = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 132 ret <8 x double> %res 133} 134define <8 x double> @test_masked_double_to_8_mask0(double %s, <8 x double> %default, <8 x double> %mask) { 135; CHECK-LABEL: test_masked_double_to_8_mask0: 136; CHECK: # %bb.0: 137; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 138; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 139; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} 140; CHECK-NEXT: vmovapd %zmm1, %zmm0 141; CHECK-NEXT: retq 142 %vec = insertelement <2 x double> undef, double %s, i32 0 143 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 144 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 145 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default 146 ret <8 x double> %res 147} 148 149define <8 x double> @test_masked_z_double_to_8_mask0(double %s, <8 x double> %mask) { 150; CHECK-LABEL: test_masked_z_double_to_8_mask0: 151; CHECK: # %bb.0: 152; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 153; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 154; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} 155; CHECK-NEXT: retq 156 %vec = insertelement <2 x double> undef, double %s, i32 0 157 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 158 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 159 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 160 ret <8 x double> %res 161} 162define <8 x double> @test_masked_double_to_8_mask1(double %s, <8 x double> %default, <8 x double> %mask) { 163; CHECK-LABEL: test_masked_double_to_8_mask1: 164; CHECK: # %bb.0: 165; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 166; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 167; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} 168; CHECK-NEXT: vmovapd %zmm1, %zmm0 169; CHECK-NEXT: retq 170 %vec = insertelement <2 x double> undef, double %s, i32 0 171 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 172 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 173 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default 174 ret <8 x double> %res 175} 176 177define <8 x double> @test_masked_z_double_to_8_mask1(double %s, <8 x double> %mask) { 178; CHECK-LABEL: test_masked_z_double_to_8_mask1: 179; CHECK: # %bb.0: 180; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 181; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 182; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} 183; CHECK-NEXT: retq 184 %vec = insertelement <2 x double> undef, double %s, i32 0 185 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 186 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 187 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 188 ret <8 x double> %res 189} 190define <8 x double> @test_masked_double_to_8_mask2(double %s, <8 x double> %default, <8 x double> %mask) { 191; CHECK-LABEL: test_masked_double_to_8_mask2: 192; CHECK: # %bb.0: 193; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 194; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 195; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} 196; CHECK-NEXT: vmovapd %zmm1, %zmm0 197; CHECK-NEXT: retq 198 %vec = insertelement <2 x double> undef, double %s, i32 0 199 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 200 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 201 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default 202 ret <8 x double> %res 203} 204 205define <8 x double> @test_masked_z_double_to_8_mask2(double %s, <8 x double> %mask) { 206; CHECK-LABEL: test_masked_z_double_to_8_mask2: 207; CHECK: # %bb.0: 208; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 209; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 210; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} 211; CHECK-NEXT: retq 212 %vec = insertelement <2 x double> undef, double %s, i32 0 213 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 214 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 215 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 216 ret <8 x double> %res 217} 218define <8 x double> @test_masked_double_to_8_mask3(double %s, <8 x double> %default, <8 x double> %mask) { 219; CHECK-LABEL: test_masked_double_to_8_mask3: 220; CHECK: # %bb.0: 221; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 222; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 223; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} 224; CHECK-NEXT: vmovapd %zmm1, %zmm0 225; CHECK-NEXT: retq 226 %vec = insertelement <2 x double> undef, double %s, i32 0 227 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 228 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 229 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default 230 ret <8 x double> %res 231} 232 233define <8 x double> @test_masked_z_double_to_8_mask3(double %s, <8 x double> %mask) { 234; CHECK-LABEL: test_masked_z_double_to_8_mask3: 235; CHECK: # %bb.0: 236; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 237; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 238; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} 239; CHECK-NEXT: retq 240 %vec = insertelement <2 x double> undef, double %s, i32 0 241 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 242 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 243 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 244 ret <8 x double> %res 245} 246define <4 x float> @test_float_to_4(float %s) { 247; CHECK-LABEL: test_float_to_4: 248; CHECK: # %bb.0: 249; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 250; CHECK-NEXT: retq 251 %vec = insertelement <2 x float> undef, float %s, i32 0 252 %res = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 253 ret <4 x float> %res 254} 255define <4 x float> @test_masked_float_to_4_mask0(float %s, <4 x float> %default, <4 x float> %mask) { 256; CHECK-LABEL: test_masked_float_to_4_mask0: 257; CHECK: # %bb.0: 258; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 259; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 260; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1} 261; CHECK-NEXT: vmovaps %xmm1, %xmm0 262; CHECK-NEXT: retq 263 %vec = insertelement <2 x float> undef, float %s, i32 0 264 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 265 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 266 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default 267 ret <4 x float> %res 268} 269 270define <4 x float> @test_masked_z_float_to_4_mask0(float %s, <4 x float> %mask) { 271; CHECK-LABEL: test_masked_z_float_to_4_mask0: 272; CHECK: # %bb.0: 273; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 274; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 275; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} 276; CHECK-NEXT: retq 277 %vec = insertelement <2 x float> undef, float %s, i32 0 278 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 279 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 280 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 281 ret <4 x float> %res 282} 283define <4 x float> @test_masked_float_to_4_mask1(float %s, <4 x float> %default, <4 x float> %mask) { 284; CHECK-LABEL: test_masked_float_to_4_mask1: 285; CHECK: # %bb.0: 286; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 287; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 288; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1} 289; CHECK-NEXT: vmovaps %xmm1, %xmm0 290; CHECK-NEXT: retq 291 %vec = insertelement <2 x float> undef, float %s, i32 0 292 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 293 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 294 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default 295 ret <4 x float> %res 296} 297 298define <4 x float> @test_masked_z_float_to_4_mask1(float %s, <4 x float> %mask) { 299; CHECK-LABEL: test_masked_z_float_to_4_mask1: 300; CHECK: # %bb.0: 301; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 302; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 303; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} 304; CHECK-NEXT: retq 305 %vec = insertelement <2 x float> undef, float %s, i32 0 306 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 307 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 308 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 309 ret <4 x float> %res 310} 311define <4 x float> @test_masked_float_to_4_mask2(float %s, <4 x float> %default, <4 x float> %mask) { 312; CHECK-LABEL: test_masked_float_to_4_mask2: 313; CHECK: # %bb.0: 314; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 315; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 316; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1} 317; CHECK-NEXT: vmovaps %xmm1, %xmm0 318; CHECK-NEXT: retq 319 %vec = insertelement <2 x float> undef, float %s, i32 0 320 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 321 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 322 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default 323 ret <4 x float> %res 324} 325 326define <4 x float> @test_masked_z_float_to_4_mask2(float %s, <4 x float> %mask) { 327; CHECK-LABEL: test_masked_z_float_to_4_mask2: 328; CHECK: # %bb.0: 329; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 330; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 331; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} 332; CHECK-NEXT: retq 333 %vec = insertelement <2 x float> undef, float %s, i32 0 334 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 335 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 336 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 337 ret <4 x float> %res 338} 339define <4 x float> @test_masked_float_to_4_mask3(float %s, <4 x float> %default, <4 x float> %mask) { 340; CHECK-LABEL: test_masked_float_to_4_mask3: 341; CHECK: # %bb.0: 342; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 343; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 344; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1} 345; CHECK-NEXT: vmovaps %xmm1, %xmm0 346; CHECK-NEXT: retq 347 %vec = insertelement <2 x float> undef, float %s, i32 0 348 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 349 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 350 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default 351 ret <4 x float> %res 352} 353 354define <4 x float> @test_masked_z_float_to_4_mask3(float %s, <4 x float> %mask) { 355; CHECK-LABEL: test_masked_z_float_to_4_mask3: 356; CHECK: # %bb.0: 357; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 358; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 359; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} 360; CHECK-NEXT: retq 361 %vec = insertelement <2 x float> undef, float %s, i32 0 362 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 363 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 364 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 365 ret <4 x float> %res 366} 367define <8 x float> @test_float_to_8(float %s) { 368; CHECK-LABEL: test_float_to_8: 369; CHECK: # %bb.0: 370; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 371; CHECK-NEXT: retq 372 %vec = insertelement <2 x float> undef, float %s, i32 0 373 %res = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 374 ret <8 x float> %res 375} 376define <8 x float> @test_masked_float_to_8_mask0(float %s, <8 x float> %default, <8 x float> %mask) { 377; CHECK-LABEL: test_masked_float_to_8_mask0: 378; CHECK: # %bb.0: 379; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 380; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 381; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1} 382; CHECK-NEXT: vmovaps %ymm1, %ymm0 383; CHECK-NEXT: retq 384 %vec = insertelement <2 x float> undef, float %s, i32 0 385 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 386 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 387 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default 388 ret <8 x float> %res 389} 390 391define <8 x float> @test_masked_z_float_to_8_mask0(float %s, <8 x float> %mask) { 392; CHECK-LABEL: test_masked_z_float_to_8_mask0: 393; CHECK: # %bb.0: 394; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 395; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 396; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z} 397; CHECK-NEXT: retq 398 %vec = insertelement <2 x float> undef, float %s, i32 0 399 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 400 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 401 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 402 ret <8 x float> %res 403} 404define <8 x float> @test_masked_float_to_8_mask1(float %s, <8 x float> %default, <8 x float> %mask) { 405; CHECK-LABEL: test_masked_float_to_8_mask1: 406; CHECK: # %bb.0: 407; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 408; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 409; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1} 410; CHECK-NEXT: vmovaps %ymm1, %ymm0 411; CHECK-NEXT: retq 412 %vec = insertelement <2 x float> undef, float %s, i32 0 413 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 414 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 415 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default 416 ret <8 x float> %res 417} 418 419define <8 x float> @test_masked_z_float_to_8_mask1(float %s, <8 x float> %mask) { 420; CHECK-LABEL: test_masked_z_float_to_8_mask1: 421; CHECK: # %bb.0: 422; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 423; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 424; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z} 425; CHECK-NEXT: retq 426 %vec = insertelement <2 x float> undef, float %s, i32 0 427 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 428 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 429 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 430 ret <8 x float> %res 431} 432define <8 x float> @test_masked_float_to_8_mask2(float %s, <8 x float> %default, <8 x float> %mask) { 433; CHECK-LABEL: test_masked_float_to_8_mask2: 434; CHECK: # %bb.0: 435; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 436; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 437; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1} 438; CHECK-NEXT: vmovaps %ymm1, %ymm0 439; CHECK-NEXT: retq 440 %vec = insertelement <2 x float> undef, float %s, i32 0 441 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 442 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 443 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default 444 ret <8 x float> %res 445} 446 447define <8 x float> @test_masked_z_float_to_8_mask2(float %s, <8 x float> %mask) { 448; CHECK-LABEL: test_masked_z_float_to_8_mask2: 449; CHECK: # %bb.0: 450; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 451; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 452; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z} 453; CHECK-NEXT: retq 454 %vec = insertelement <2 x float> undef, float %s, i32 0 455 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 456 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 457 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 458 ret <8 x float> %res 459} 460define <8 x float> @test_masked_float_to_8_mask3(float %s, <8 x float> %default, <8 x float> %mask) { 461; CHECK-LABEL: test_masked_float_to_8_mask3: 462; CHECK: # %bb.0: 463; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 464; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 465; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1} 466; CHECK-NEXT: vmovaps %ymm1, %ymm0 467; CHECK-NEXT: retq 468 %vec = insertelement <2 x float> undef, float %s, i32 0 469 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 470 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 471 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default 472 ret <8 x float> %res 473} 474 475define <8 x float> @test_masked_z_float_to_8_mask3(float %s, <8 x float> %mask) { 476; CHECK-LABEL: test_masked_z_float_to_8_mask3: 477; CHECK: # %bb.0: 478; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 479; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 480; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z} 481; CHECK-NEXT: retq 482 %vec = insertelement <2 x float> undef, float %s, i32 0 483 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 484 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 485 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 486 ret <8 x float> %res 487} 488define <16 x float> @test_float_to_16(float %s) { 489; CHECK-LABEL: test_float_to_16: 490; CHECK: # %bb.0: 491; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 492; CHECK-NEXT: retq 493 %vec = insertelement <2 x float> undef, float %s, i32 0 494 %res = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 495 ret <16 x float> %res 496} 497define <16 x float> @test_masked_float_to_16_mask0(float %s, <16 x float> %default, <16 x float> %mask) { 498; CHECK-LABEL: test_masked_float_to_16_mask0: 499; CHECK: # %bb.0: 500; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 501; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 502; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} 503; CHECK-NEXT: vmovaps %zmm1, %zmm0 504; CHECK-NEXT: retq 505 %vec = insertelement <2 x float> undef, float %s, i32 0 506 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 507 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 508 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default 509 ret <16 x float> %res 510} 511 512define <16 x float> @test_masked_z_float_to_16_mask0(float %s, <16 x float> %mask) { 513; CHECK-LABEL: test_masked_z_float_to_16_mask0: 514; CHECK: # %bb.0: 515; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 516; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 517; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} 518; CHECK-NEXT: retq 519 %vec = insertelement <2 x float> undef, float %s, i32 0 520 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 521 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 522 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 523 ret <16 x float> %res 524} 525define <16 x float> @test_masked_float_to_16_mask1(float %s, <16 x float> %default, <16 x float> %mask) { 526; CHECK-LABEL: test_masked_float_to_16_mask1: 527; CHECK: # %bb.0: 528; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 529; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 530; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} 531; CHECK-NEXT: vmovaps %zmm1, %zmm0 532; CHECK-NEXT: retq 533 %vec = insertelement <2 x float> undef, float %s, i32 0 534 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 535 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 536 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default 537 ret <16 x float> %res 538} 539 540define <16 x float> @test_masked_z_float_to_16_mask1(float %s, <16 x float> %mask) { 541; CHECK-LABEL: test_masked_z_float_to_16_mask1: 542; CHECK: # %bb.0: 543; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 544; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 545; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} 546; CHECK-NEXT: retq 547 %vec = insertelement <2 x float> undef, float %s, i32 0 548 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 549 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 550 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 551 ret <16 x float> %res 552} 553define <16 x float> @test_masked_float_to_16_mask2(float %s, <16 x float> %default, <16 x float> %mask) { 554; CHECK-LABEL: test_masked_float_to_16_mask2: 555; CHECK: # %bb.0: 556; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 557; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 558; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} 559; CHECK-NEXT: vmovaps %zmm1, %zmm0 560; CHECK-NEXT: retq 561 %vec = insertelement <2 x float> undef, float %s, i32 0 562 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 563 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 564 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default 565 ret <16 x float> %res 566} 567 568define <16 x float> @test_masked_z_float_to_16_mask2(float %s, <16 x float> %mask) { 569; CHECK-LABEL: test_masked_z_float_to_16_mask2: 570; CHECK: # %bb.0: 571; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 572; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 573; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} 574; CHECK-NEXT: retq 575 %vec = insertelement <2 x float> undef, float %s, i32 0 576 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 577 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 578 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 579 ret <16 x float> %res 580} 581define <16 x float> @test_masked_float_to_16_mask3(float %s, <16 x float> %default, <16 x float> %mask) { 582; CHECK-LABEL: test_masked_float_to_16_mask3: 583; CHECK: # %bb.0: 584; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 585; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 586; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} 587; CHECK-NEXT: vmovaps %zmm1, %zmm0 588; CHECK-NEXT: retq 589 %vec = insertelement <2 x float> undef, float %s, i32 0 590 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 591 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 592 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default 593 ret <16 x float> %res 594} 595 596define <16 x float> @test_masked_z_float_to_16_mask3(float %s, <16 x float> %mask) { 597; CHECK-LABEL: test_masked_z_float_to_16_mask3: 598; CHECK: # %bb.0: 599; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 600; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 601; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} 602; CHECK-NEXT: retq 603 %vec = insertelement <2 x float> undef, float %s, i32 0 604 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 605 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 606 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 607 ret <16 x float> %res 608} 609define <4 x double> @test_double_to_4_mem(double* %p) { 610; CHECK-LABEL: test_double_to_4_mem: 611; CHECK: # %bb.0: 612; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 613; CHECK-NEXT: retq 614 %s = load double, double* %p 615 %vec = insertelement <2 x double> undef, double %s, i32 0 616 %res = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 617 ret <4 x double> %res 618} 619define <4 x double> @test_masked_double_to_4_mem_mask0(double* %p, <4 x double> %default, <4 x double> %mask) { 620; CHECK-LABEL: test_masked_double_to_4_mem_mask0: 621; CHECK: # %bb.0: 622; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 623; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 624; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} 625; CHECK-NEXT: retq 626 %s = load double, double* %p 627 %vec = insertelement <2 x double> undef, double %s, i32 0 628 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 629 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 630 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default 631 ret <4 x double> %res 632} 633 634define <4 x double> @test_masked_z_double_to_4_mem_mask0(double* %p, <4 x double> %mask) { 635; CHECK-LABEL: test_masked_z_double_to_4_mem_mask0: 636; CHECK: # %bb.0: 637; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 638; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 639; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z} 640; CHECK-NEXT: retq 641 %s = load double, double* %p 642 %vec = insertelement <2 x double> undef, double %s, i32 0 643 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 644 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 645 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 646 ret <4 x double> %res 647} 648define <4 x double> @test_masked_double_to_4_mem_mask1(double* %p, <4 x double> %default, <4 x double> %mask) { 649; CHECK-LABEL: test_masked_double_to_4_mem_mask1: 650; CHECK: # %bb.0: 651; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 652; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 653; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} 654; CHECK-NEXT: retq 655 %s = load double, double* %p 656 %vec = insertelement <2 x double> undef, double %s, i32 0 657 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 658 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 659 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default 660 ret <4 x double> %res 661} 662 663define <4 x double> @test_masked_z_double_to_4_mem_mask1(double* %p, <4 x double> %mask) { 664; CHECK-LABEL: test_masked_z_double_to_4_mem_mask1: 665; CHECK: # %bb.0: 666; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 667; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 668; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z} 669; CHECK-NEXT: retq 670 %s = load double, double* %p 671 %vec = insertelement <2 x double> undef, double %s, i32 0 672 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 673 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 674 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 675 ret <4 x double> %res 676} 677define <4 x double> @test_masked_double_to_4_mem_mask2(double* %p, <4 x double> %default, <4 x double> %mask) { 678; CHECK-LABEL: test_masked_double_to_4_mem_mask2: 679; CHECK: # %bb.0: 680; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 681; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 682; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} 683; CHECK-NEXT: retq 684 %s = load double, double* %p 685 %vec = insertelement <2 x double> undef, double %s, i32 0 686 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 687 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 688 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default 689 ret <4 x double> %res 690} 691 692define <4 x double> @test_masked_z_double_to_4_mem_mask2(double* %p, <4 x double> %mask) { 693; CHECK-LABEL: test_masked_z_double_to_4_mem_mask2: 694; CHECK: # %bb.0: 695; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 696; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 697; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z} 698; CHECK-NEXT: retq 699 %s = load double, double* %p 700 %vec = insertelement <2 x double> undef, double %s, i32 0 701 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 702 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 703 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 704 ret <4 x double> %res 705} 706define <4 x double> @test_masked_double_to_4_mem_mask3(double* %p, <4 x double> %default, <4 x double> %mask) { 707; CHECK-LABEL: test_masked_double_to_4_mem_mask3: 708; CHECK: # %bb.0: 709; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 710; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 711; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} 712; CHECK-NEXT: retq 713 %s = load double, double* %p 714 %vec = insertelement <2 x double> undef, double %s, i32 0 715 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 716 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 717 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default 718 ret <4 x double> %res 719} 720 721define <4 x double> @test_masked_z_double_to_4_mem_mask3(double* %p, <4 x double> %mask) { 722; CHECK-LABEL: test_masked_z_double_to_4_mem_mask3: 723; CHECK: # %bb.0: 724; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 725; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 726; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z} 727; CHECK-NEXT: retq 728 %s = load double, double* %p 729 %vec = insertelement <2 x double> undef, double %s, i32 0 730 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 731 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 732 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 733 ret <4 x double> %res 734} 735define <8 x double> @test_double_to_8_mem(double* %p) { 736; CHECK-LABEL: test_double_to_8_mem: 737; CHECK: # %bb.0: 738; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 739; CHECK-NEXT: retq 740 %s = load double, double* %p 741 %vec = insertelement <2 x double> undef, double %s, i32 0 742 %res = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 743 ret <8 x double> %res 744} 745define <8 x double> @test_masked_double_to_8_mem_mask0(double* %p, <8 x double> %default, <8 x double> %mask) { 746; CHECK-LABEL: test_masked_double_to_8_mem_mask0: 747; CHECK: # %bb.0: 748; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 749; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 750; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} 751; CHECK-NEXT: retq 752 %s = load double, double* %p 753 %vec = insertelement <2 x double> undef, double %s, i32 0 754 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 755 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 756 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default 757 ret <8 x double> %res 758} 759 760define <8 x double> @test_masked_z_double_to_8_mem_mask0(double* %p, <8 x double> %mask) { 761; CHECK-LABEL: test_masked_z_double_to_8_mem_mask0: 762; CHECK: # %bb.0: 763; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 764; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 765; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} 766; CHECK-NEXT: retq 767 %s = load double, double* %p 768 %vec = insertelement <2 x double> undef, double %s, i32 0 769 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 770 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 771 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 772 ret <8 x double> %res 773} 774define <8 x double> @test_masked_double_to_8_mem_mask1(double* %p, <8 x double> %default, <8 x double> %mask) { 775; CHECK-LABEL: test_masked_double_to_8_mem_mask1: 776; CHECK: # %bb.0: 777; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 778; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 779; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} 780; CHECK-NEXT: retq 781 %s = load double, double* %p 782 %vec = insertelement <2 x double> undef, double %s, i32 0 783 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 784 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 785 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default 786 ret <8 x double> %res 787} 788 789define <8 x double> @test_masked_z_double_to_8_mem_mask1(double* %p, <8 x double> %mask) { 790; CHECK-LABEL: test_masked_z_double_to_8_mem_mask1: 791; CHECK: # %bb.0: 792; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 793; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 794; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} 795; CHECK-NEXT: retq 796 %s = load double, double* %p 797 %vec = insertelement <2 x double> undef, double %s, i32 0 798 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 799 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 800 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 801 ret <8 x double> %res 802} 803define <8 x double> @test_masked_double_to_8_mem_mask2(double* %p, <8 x double> %default, <8 x double> %mask) { 804; CHECK-LABEL: test_masked_double_to_8_mem_mask2: 805; CHECK: # %bb.0: 806; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 807; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 808; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} 809; CHECK-NEXT: retq 810 %s = load double, double* %p 811 %vec = insertelement <2 x double> undef, double %s, i32 0 812 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 813 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 814 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default 815 ret <8 x double> %res 816} 817 818define <8 x double> @test_masked_z_double_to_8_mem_mask2(double* %p, <8 x double> %mask) { 819; CHECK-LABEL: test_masked_z_double_to_8_mem_mask2: 820; CHECK: # %bb.0: 821; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 822; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 823; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} 824; CHECK-NEXT: retq 825 %s = load double, double* %p 826 %vec = insertelement <2 x double> undef, double %s, i32 0 827 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 828 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 829 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 830 ret <8 x double> %res 831} 832define <8 x double> @test_masked_double_to_8_mem_mask3(double* %p, <8 x double> %default, <8 x double> %mask) { 833; CHECK-LABEL: test_masked_double_to_8_mem_mask3: 834; CHECK: # %bb.0: 835; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 836; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 837; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} 838; CHECK-NEXT: retq 839 %s = load double, double* %p 840 %vec = insertelement <2 x double> undef, double %s, i32 0 841 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 842 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 843 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default 844 ret <8 x double> %res 845} 846 847define <8 x double> @test_masked_z_double_to_8_mem_mask3(double* %p, <8 x double> %mask) { 848; CHECK-LABEL: test_masked_z_double_to_8_mem_mask3: 849; CHECK: # %bb.0: 850; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 851; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 852; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} 853; CHECK-NEXT: retq 854 %s = load double, double* %p 855 %vec = insertelement <2 x double> undef, double %s, i32 0 856 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 857 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 858 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 859 ret <8 x double> %res 860} 861define <4 x float> @test_float_to_4_mem(float* %p) { 862; CHECK-LABEL: test_float_to_4_mem: 863; CHECK: # %bb.0: 864; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 865; CHECK-NEXT: retq 866 %s = load float, float* %p 867 %vec = insertelement <2 x float> undef, float %s, i32 0 868 %res = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 869 ret <4 x float> %res 870} 871define <4 x float> @test_masked_float_to_4_mem_mask0(float* %p, <4 x float> %default, <4 x float> %mask) { 872; CHECK-LABEL: test_masked_float_to_4_mem_mask0: 873; CHECK: # %bb.0: 874; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 875; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 876; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} 877; CHECK-NEXT: retq 878 %s = load float, float* %p 879 %vec = insertelement <2 x float> undef, float %s, i32 0 880 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 881 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 882 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default 883 ret <4 x float> %res 884} 885 886define <4 x float> @test_masked_z_float_to_4_mem_mask0(float* %p, <4 x float> %mask) { 887; CHECK-LABEL: test_masked_z_float_to_4_mem_mask0: 888; CHECK: # %bb.0: 889; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 890; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1 891; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z} 892; CHECK-NEXT: retq 893 %s = load float, float* %p 894 %vec = insertelement <2 x float> undef, float %s, i32 0 895 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 896 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 897 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 898 ret <4 x float> %res 899} 900define <4 x float> @test_masked_float_to_4_mem_mask1(float* %p, <4 x float> %default, <4 x float> %mask) { 901; CHECK-LABEL: test_masked_float_to_4_mem_mask1: 902; CHECK: # %bb.0: 903; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 904; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 905; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} 906; CHECK-NEXT: retq 907 %s = load float, float* %p 908 %vec = insertelement <2 x float> undef, float %s, i32 0 909 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 910 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 911 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default 912 ret <4 x float> %res 913} 914 915define <4 x float> @test_masked_z_float_to_4_mem_mask1(float* %p, <4 x float> %mask) { 916; CHECK-LABEL: test_masked_z_float_to_4_mem_mask1: 917; CHECK: # %bb.0: 918; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 919; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1 920; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z} 921; CHECK-NEXT: retq 922 %s = load float, float* %p 923 %vec = insertelement <2 x float> undef, float %s, i32 0 924 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 925 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 926 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 927 ret <4 x float> %res 928} 929define <4 x float> @test_masked_float_to_4_mem_mask2(float* %p, <4 x float> %default, <4 x float> %mask) { 930; CHECK-LABEL: test_masked_float_to_4_mem_mask2: 931; CHECK: # %bb.0: 932; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 933; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 934; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} 935; CHECK-NEXT: retq 936 %s = load float, float* %p 937 %vec = insertelement <2 x float> undef, float %s, i32 0 938 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 939 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 940 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default 941 ret <4 x float> %res 942} 943 944define <4 x float> @test_masked_z_float_to_4_mem_mask2(float* %p, <4 x float> %mask) { 945; CHECK-LABEL: test_masked_z_float_to_4_mem_mask2: 946; CHECK: # %bb.0: 947; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 948; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1 949; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z} 950; CHECK-NEXT: retq 951 %s = load float, float* %p 952 %vec = insertelement <2 x float> undef, float %s, i32 0 953 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 954 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 955 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 956 ret <4 x float> %res 957} 958define <4 x float> @test_masked_float_to_4_mem_mask3(float* %p, <4 x float> %default, <4 x float> %mask) { 959; CHECK-LABEL: test_masked_float_to_4_mem_mask3: 960; CHECK: # %bb.0: 961; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 962; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 963; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} 964; CHECK-NEXT: retq 965 %s = load float, float* %p 966 %vec = insertelement <2 x float> undef, float %s, i32 0 967 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 968 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 969 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default 970 ret <4 x float> %res 971} 972 973define <4 x float> @test_masked_z_float_to_4_mem_mask3(float* %p, <4 x float> %mask) { 974; CHECK-LABEL: test_masked_z_float_to_4_mem_mask3: 975; CHECK: # %bb.0: 976; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 977; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1 978; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z} 979; CHECK-NEXT: retq 980 %s = load float, float* %p 981 %vec = insertelement <2 x float> undef, float %s, i32 0 982 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 983 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 984 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 985 ret <4 x float> %res 986} 987define <8 x float> @test_float_to_8_mem(float* %p) { 988; CHECK-LABEL: test_float_to_8_mem: 989; CHECK: # %bb.0: 990; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 991; CHECK-NEXT: retq 992 %s = load float, float* %p 993 %vec = insertelement <2 x float> undef, float %s, i32 0 994 %res = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 995 ret <8 x float> %res 996} 997define <8 x float> @test_masked_float_to_8_mem_mask0(float* %p, <8 x float> %default, <8 x float> %mask) { 998; CHECK-LABEL: test_masked_float_to_8_mem_mask0: 999; CHECK: # %bb.0: 1000; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 1001; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 1002; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} 1003; CHECK-NEXT: retq 1004 %s = load float, float* %p 1005 %vec = insertelement <2 x float> undef, float %s, i32 0 1006 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1007 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1008 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default 1009 ret <8 x float> %res 1010} 1011 1012define <8 x float> @test_masked_z_float_to_8_mem_mask0(float* %p, <8 x float> %mask) { 1013; CHECK-LABEL: test_masked_z_float_to_8_mem_mask0: 1014; CHECK: # %bb.0: 1015; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 1016; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 1017; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z} 1018; CHECK-NEXT: retq 1019 %s = load float, float* %p 1020 %vec = insertelement <2 x float> undef, float %s, i32 0 1021 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1022 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1023 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 1024 ret <8 x float> %res 1025} 1026define <8 x float> @test_masked_float_to_8_mem_mask1(float* %p, <8 x float> %default, <8 x float> %mask) { 1027; CHECK-LABEL: test_masked_float_to_8_mem_mask1: 1028; CHECK: # %bb.0: 1029; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 1030; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 1031; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} 1032; CHECK-NEXT: retq 1033 %s = load float, float* %p 1034 %vec = insertelement <2 x float> undef, float %s, i32 0 1035 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1036 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1037 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default 1038 ret <8 x float> %res 1039} 1040 1041define <8 x float> @test_masked_z_float_to_8_mem_mask1(float* %p, <8 x float> %mask) { 1042; CHECK-LABEL: test_masked_z_float_to_8_mem_mask1: 1043; CHECK: # %bb.0: 1044; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 1045; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 1046; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z} 1047; CHECK-NEXT: retq 1048 %s = load float, float* %p 1049 %vec = insertelement <2 x float> undef, float %s, i32 0 1050 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1051 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1052 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 1053 ret <8 x float> %res 1054} 1055define <8 x float> @test_masked_float_to_8_mem_mask2(float* %p, <8 x float> %default, <8 x float> %mask) { 1056; CHECK-LABEL: test_masked_float_to_8_mem_mask2: 1057; CHECK: # %bb.0: 1058; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 1059; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 1060; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} 1061; CHECK-NEXT: retq 1062 %s = load float, float* %p 1063 %vec = insertelement <2 x float> undef, float %s, i32 0 1064 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1065 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1066 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default 1067 ret <8 x float> %res 1068} 1069 1070define <8 x float> @test_masked_z_float_to_8_mem_mask2(float* %p, <8 x float> %mask) { 1071; CHECK-LABEL: test_masked_z_float_to_8_mem_mask2: 1072; CHECK: # %bb.0: 1073; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 1074; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 1075; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z} 1076; CHECK-NEXT: retq 1077 %s = load float, float* %p 1078 %vec = insertelement <2 x float> undef, float %s, i32 0 1079 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1080 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1081 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 1082 ret <8 x float> %res 1083} 1084define <8 x float> @test_masked_float_to_8_mem_mask3(float* %p, <8 x float> %default, <8 x float> %mask) { 1085; CHECK-LABEL: test_masked_float_to_8_mem_mask3: 1086; CHECK: # %bb.0: 1087; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 1088; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 1089; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} 1090; CHECK-NEXT: retq 1091 %s = load float, float* %p 1092 %vec = insertelement <2 x float> undef, float %s, i32 0 1093 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1094 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1095 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default 1096 ret <8 x float> %res 1097} 1098 1099define <8 x float> @test_masked_z_float_to_8_mem_mask3(float* %p, <8 x float> %mask) { 1100; CHECK-LABEL: test_masked_z_float_to_8_mem_mask3: 1101; CHECK: # %bb.0: 1102; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 1103; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 1104; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z} 1105; CHECK-NEXT: retq 1106 %s = load float, float* %p 1107 %vec = insertelement <2 x float> undef, float %s, i32 0 1108 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1109 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1110 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 1111 ret <8 x float> %res 1112} 1113define <16 x float> @test_float_to_16_mem(float* %p) { 1114; CHECK-LABEL: test_float_to_16_mem: 1115; CHECK: # %bb.0: 1116; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 1117; CHECK-NEXT: retq 1118 %s = load float, float* %p 1119 %vec = insertelement <2 x float> undef, float %s, i32 0 1120 %res = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1121 ret <16 x float> %res 1122} 1123define <16 x float> @test_masked_float_to_16_mem_mask0(float* %p, <16 x float> %default, <16 x float> %mask) { 1124; CHECK-LABEL: test_masked_float_to_16_mem_mask0: 1125; CHECK: # %bb.0: 1126; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 1127; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 1128; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} 1129; CHECK-NEXT: retq 1130 %s = load float, float* %p 1131 %vec = insertelement <2 x float> undef, float %s, i32 0 1132 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1133 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1134 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default 1135 ret <16 x float> %res 1136} 1137 1138define <16 x float> @test_masked_z_float_to_16_mem_mask0(float* %p, <16 x float> %mask) { 1139; CHECK-LABEL: test_masked_z_float_to_16_mem_mask0: 1140; CHECK: # %bb.0: 1141; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 1142; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 1143; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} 1144; CHECK-NEXT: retq 1145 %s = load float, float* %p 1146 %vec = insertelement <2 x float> undef, float %s, i32 0 1147 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1148 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1149 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 1150 ret <16 x float> %res 1151} 1152define <16 x float> @test_masked_float_to_16_mem_mask1(float* %p, <16 x float> %default, <16 x float> %mask) { 1153; CHECK-LABEL: test_masked_float_to_16_mem_mask1: 1154; CHECK: # %bb.0: 1155; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 1156; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 1157; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} 1158; CHECK-NEXT: retq 1159 %s = load float, float* %p 1160 %vec = insertelement <2 x float> undef, float %s, i32 0 1161 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1162 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1163 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default 1164 ret <16 x float> %res 1165} 1166 1167define <16 x float> @test_masked_z_float_to_16_mem_mask1(float* %p, <16 x float> %mask) { 1168; CHECK-LABEL: test_masked_z_float_to_16_mem_mask1: 1169; CHECK: # %bb.0: 1170; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 1171; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 1172; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} 1173; CHECK-NEXT: retq 1174 %s = load float, float* %p 1175 %vec = insertelement <2 x float> undef, float %s, i32 0 1176 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1177 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1178 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 1179 ret <16 x float> %res 1180} 1181define <16 x float> @test_masked_float_to_16_mem_mask2(float* %p, <16 x float> %default, <16 x float> %mask) { 1182; CHECK-LABEL: test_masked_float_to_16_mem_mask2: 1183; CHECK: # %bb.0: 1184; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 1185; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 1186; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} 1187; CHECK-NEXT: retq 1188 %s = load float, float* %p 1189 %vec = insertelement <2 x float> undef, float %s, i32 0 1190 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1191 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1192 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default 1193 ret <16 x float> %res 1194} 1195 1196define <16 x float> @test_masked_z_float_to_16_mem_mask2(float* %p, <16 x float> %mask) { 1197; CHECK-LABEL: test_masked_z_float_to_16_mem_mask2: 1198; CHECK: # %bb.0: 1199; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 1200; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 1201; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} 1202; CHECK-NEXT: retq 1203 %s = load float, float* %p 1204 %vec = insertelement <2 x float> undef, float %s, i32 0 1205 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1206 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1207 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 1208 ret <16 x float> %res 1209} 1210define <16 x float> @test_masked_float_to_16_mem_mask3(float* %p, <16 x float> %default, <16 x float> %mask) { 1211; CHECK-LABEL: test_masked_float_to_16_mem_mask3: 1212; CHECK: # %bb.0: 1213; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 1214; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 1215; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} 1216; CHECK-NEXT: retq 1217 %s = load float, float* %p 1218 %vec = insertelement <2 x float> undef, float %s, i32 0 1219 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1220 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1221 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default 1222 ret <16 x float> %res 1223} 1224 1225define <16 x float> @test_masked_z_float_to_16_mem_mask3(float* %p, <16 x float> %mask) { 1226; CHECK-LABEL: test_masked_z_float_to_16_mem_mask3: 1227; CHECK: # %bb.0: 1228; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 1229; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 1230; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} 1231; CHECK-NEXT: retq 1232 %s = load float, float* %p 1233 %vec = insertelement <2 x float> undef, float %s, i32 0 1234 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1235 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1236 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 1237 ret <16 x float> %res 1238} 1239