1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s 3 4define <4 x float> @test_4xfloat_shuff_mask0(<4 x float> %vec1, <4 x float> %vec2) { 5; CHECK-LABEL: test_4xfloat_shuff_mask0: 6; CHECK: # %bb.0: 7; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,1],xmm1[3,1] 8; CHECK-NEXT: retq 9 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 1, i32 7, i32 5> 10 ret <4 x float> %res 11} 12define <4 x float> @test_4xfloat_masked_shuff_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) { 13; CHECK-LABEL: test_4xfloat_masked_shuff_mask0: 14; CHECK: # %bb.0: 15; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 16; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1 17; CHECK-NEXT: vshufps {{.*#+}} xmm2 {%k1} = xmm0[2,1],xmm1[3,1] 18; CHECK-NEXT: vmovaps %xmm2, %xmm0 19; CHECK-NEXT: retq 20 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 1, i32 7, i32 5> 21 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 22 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 23 ret <4 x float> %res 24} 25 26define <4 x float> @test_4xfloat_zero_masked_shuff_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) { 27; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mask0: 28; CHECK: # %bb.0: 29; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 30; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 31; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1],xmm1[3,1] 32; CHECK-NEXT: retq 33 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 1, i32 7, i32 5> 34 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 35 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 36 ret <4 x float> %res 37} 38define <4 x float> @test_4xfloat_masked_shuff_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) { 39; CHECK-LABEL: test_4xfloat_masked_shuff_mask1: 40; CHECK: # %bb.0: 41; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 42; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1 43; CHECK-NEXT: vshufps {{.*#+}} xmm2 {%k1} = xmm0[1,2],xmm1[3,2] 44; CHECK-NEXT: vmovaps %xmm2, %xmm0 45; CHECK-NEXT: retq 46 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 1, i32 2, i32 7, i32 6> 47 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 48 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 49 ret <4 x float> %res 50} 51 52define <4 x float> @test_4xfloat_zero_masked_shuff_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) { 53; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mask1: 54; CHECK: # %bb.0: 55; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 56; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 57; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2],xmm1[3,2] 58; CHECK-NEXT: retq 59 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 1, i32 2, i32 7, i32 6> 60 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 61 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 62 ret <4 x float> %res 63} 64define <4 x float> @test_4xfloat_masked_shuff_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) { 65; CHECK-LABEL: test_4xfloat_masked_shuff_mask2: 66; CHECK: # %bb.0: 67; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 68; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1 69; CHECK-NEXT: vshufps {{.*#+}} xmm2 {%k1} = xmm0[1,3],xmm1[2,1] 70; CHECK-NEXT: vmovaps %xmm2, %xmm0 71; CHECK-NEXT: retq 72 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 1, i32 3, i32 6, i32 5> 73 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 74 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 75 ret <4 x float> %res 76} 77 78define <4 x float> @test_4xfloat_zero_masked_shuff_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) { 79; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mask2: 80; CHECK: # %bb.0: 81; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 82; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 83; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,3],xmm1[2,1] 84; CHECK-NEXT: retq 85 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 1, i32 3, i32 6, i32 5> 86 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 87 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 88 ret <4 x float> %res 89} 90define <4 x float> @test_4xfloat_shuff_mask3(<4 x float> %vec1, <4 x float> %vec2) { 91; CHECK-LABEL: test_4xfloat_shuff_mask3: 92; CHECK: # %bb.0: 93; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3],xmm1[3,3] 94; CHECK-NEXT: retq 95 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 3, i32 3, i32 7, i32 7> 96 ret <4 x float> %res 97} 98define <4 x float> @test_4xfloat_masked_shuff_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) { 99; CHECK-LABEL: test_4xfloat_masked_shuff_mask3: 100; CHECK: # %bb.0: 101; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 102; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1 103; CHECK-NEXT: vshufps {{.*#+}} xmm2 {%k1} = xmm0[3,3],xmm1[3,3] 104; CHECK-NEXT: vmovaps %xmm2, %xmm0 105; CHECK-NEXT: retq 106 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 3, i32 3, i32 7, i32 7> 107 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 108 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 109 ret <4 x float> %res 110} 111 112define <4 x float> @test_4xfloat_zero_masked_shuff_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) { 113; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mask3: 114; CHECK: # %bb.0: 115; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 116; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 117; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3],xmm1[3,3] 118; CHECK-NEXT: retq 119 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 3, i32 3, i32 7, i32 7> 120 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 121 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 122 ret <4 x float> %res 123} 124define <4 x float> @test_4xfloat_shuff_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p) { 125; CHECK-LABEL: test_4xfloat_shuff_mem_mask0: 126; CHECK: # %bb.0: 127; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,0],mem[1,2] 128; CHECK-NEXT: retq 129 %vec2 = load <4 x float>, <4 x float>* %vec2p 130 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 1, i32 0, i32 5, i32 6> 131 ret <4 x float> %res 132} 133define <4 x float> @test_4xfloat_masked_shuff_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) { 134; CHECK-LABEL: test_4xfloat_masked_shuff_mem_mask0: 135; CHECK: # %bb.0: 136; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 137; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 138; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[1,0],mem[1,2] 139; CHECK-NEXT: vmovaps %xmm1, %xmm0 140; CHECK-NEXT: retq 141 %vec2 = load <4 x float>, <4 x float>* %vec2p 142 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 1, i32 0, i32 5, i32 6> 143 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 144 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 145 ret <4 x float> %res 146} 147 148define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) { 149; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mem_mask0: 150; CHECK: # %bb.0: 151; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 152; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 153; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0],mem[1,2] 154; CHECK-NEXT: retq 155 %vec2 = load <4 x float>, <4 x float>* %vec2p 156 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 1, i32 0, i32 5, i32 6> 157 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 158 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 159 ret <4 x float> %res 160} 161 162define <4 x float> @test_4xfloat_masked_shuff_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) { 163; CHECK-LABEL: test_4xfloat_masked_shuff_mem_mask1: 164; CHECK: # %bb.0: 165; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 166; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 167; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[3,3],mem[1,3] 168; CHECK-NEXT: vmovaps %xmm1, %xmm0 169; CHECK-NEXT: retq 170 %vec2 = load <4 x float>, <4 x float>* %vec2p 171 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 3, i32 3, i32 5, i32 7> 172 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 173 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 174 ret <4 x float> %res 175} 176 177define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) { 178; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mem_mask1: 179; CHECK: # %bb.0: 180; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 181; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 182; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3],mem[1,3] 183; CHECK-NEXT: retq 184 %vec2 = load <4 x float>, <4 x float>* %vec2p 185 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 3, i32 3, i32 5, i32 7> 186 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 187 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 188 ret <4 x float> %res 189} 190 191define <4 x float> @test_4xfloat_masked_shuff_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) { 192; CHECK-LABEL: test_4xfloat_masked_shuff_mem_mask2: 193; CHECK: # %bb.0: 194; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 195; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 196; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[1,3],mem[2,0] 197; CHECK-NEXT: vmovaps %xmm1, %xmm0 198; CHECK-NEXT: retq 199 %vec2 = load <4 x float>, <4 x float>* %vec2p 200 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 1, i32 3, i32 6, i32 4> 201 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 202 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 203 ret <4 x float> %res 204} 205 206define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) { 207; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mem_mask2: 208; CHECK: # %bb.0: 209; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 210; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 211; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,3],mem[2,0] 212; CHECK-NEXT: retq 213 %vec2 = load <4 x float>, <4 x float>* %vec2p 214 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 1, i32 3, i32 6, i32 4> 215 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 216 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 217 ret <4 x float> %res 218} 219 220define <4 x float> @test_4xfloat_shuff_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p) { 221; CHECK-LABEL: test_4xfloat_shuff_mem_mask3: 222; CHECK: # %bb.0: 223; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,1],mem[3,2] 224; CHECK-NEXT: retq 225 %vec2 = load <4 x float>, <4 x float>* %vec2p 226 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 1, i32 7, i32 6> 227 ret <4 x float> %res 228} 229define <4 x float> @test_4xfloat_masked_shuff_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) { 230; CHECK-LABEL: test_4xfloat_masked_shuff_mem_mask3: 231; CHECK: # %bb.0: 232; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 233; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 234; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[2,1],mem[3,2] 235; CHECK-NEXT: vmovaps %xmm1, %xmm0 236; CHECK-NEXT: retq 237 %vec2 = load <4 x float>, <4 x float>* %vec2p 238 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 1, i32 7, i32 6> 239 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 240 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 241 ret <4 x float> %res 242} 243 244define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) { 245; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mem_mask3: 246; CHECK: # %bb.0: 247; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 248; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 249; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1],mem[3,2] 250; CHECK-NEXT: retq 251 %vec2 = load <4 x float>, <4 x float>* %vec2p 252 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 1, i32 7, i32 6> 253 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 254 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 255 ret <4 x float> %res 256} 257 258define <8 x float> @test_8xfloat_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2) { 259; CHECK-LABEL: test_8xfloat_shuff_mask0: 260; CHECK: # %bb.0: 261; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[0,2],ymm0[5,7],ymm1[4,6] 262; CHECK-NEXT: retq 263 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 1, i32 3, i32 8, i32 10, i32 5, i32 7, i32 12, i32 14> 264 ret <8 x float> %res 265} 266define <8 x float> @test_8xfloat_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { 267; CHECK-LABEL: test_8xfloat_masked_shuff_mask0: 268; CHECK: # %bb.0: 269; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 270; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 271; CHECK-NEXT: vshufps {{.*#+}} ymm2 {%k1} = ymm0[1,3],ymm1[0,2],ymm0[5,7],ymm1[4,6] 272; CHECK-NEXT: vmovaps %ymm2, %ymm0 273; CHECK-NEXT: retq 274 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 1, i32 3, i32 8, i32 10, i32 5, i32 7, i32 12, i32 14> 275 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 276 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 277 ret <8 x float> %res 278} 279 280define <8 x float> @test_8xfloat_zero_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { 281; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask0: 282; CHECK: # %bb.0: 283; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 284; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 285; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3],ymm1[0,2],ymm0[5,7],ymm1[4,6] 286; CHECK-NEXT: retq 287 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 1, i32 3, i32 8, i32 10, i32 5, i32 7, i32 12, i32 14> 288 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 289 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 290 ret <8 x float> %res 291} 292define <8 x float> @test_8xfloat_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { 293; CHECK-LABEL: test_8xfloat_masked_shuff_mask1: 294; CHECK: # %bb.0: 295; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 296; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 297; CHECK-NEXT: vshufps {{.*#+}} ymm2 {%k1} = ymm0[0,3],ymm1[3,1],ymm0[4,7],ymm1[7,5] 298; CHECK-NEXT: vmovaps %ymm2, %ymm0 299; CHECK-NEXT: retq 300 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 3, i32 11, i32 9, i32 4, i32 7, i32 15, i32 13> 301 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 302 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 303 ret <8 x float> %res 304} 305 306define <8 x float> @test_8xfloat_zero_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { 307; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask1: 308; CHECK: # %bb.0: 309; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 310; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 311; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3],ymm1[3,1],ymm0[4,7],ymm1[7,5] 312; CHECK-NEXT: retq 313 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 3, i32 11, i32 9, i32 4, i32 7, i32 15, i32 13> 314 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 315 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 316 ret <8 x float> %res 317} 318define <8 x float> @test_8xfloat_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { 319; CHECK-LABEL: test_8xfloat_masked_shuff_mask2: 320; CHECK: # %bb.0: 321; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 322; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 323; CHECK-NEXT: vshufps {{.*#+}} ymm2 {%k1} = ymm0[0,2],ymm1[2,2],ymm0[4,6],ymm1[6,6] 324; CHECK-NEXT: vmovaps %ymm2, %ymm0 325; CHECK-NEXT: retq 326 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 2, i32 10, i32 10, i32 4, i32 6, i32 14, i32 14> 327 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 328 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 329 ret <8 x float> %res 330} 331 332define <8 x float> @test_8xfloat_zero_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { 333; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask2: 334; CHECK: # %bb.0: 335; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 336; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 337; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,2],ymm1[2,2],ymm0[4,6],ymm1[6,6] 338; CHECK-NEXT: retq 339 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 2, i32 10, i32 10, i32 4, i32 6, i32 14, i32 14> 340 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 341 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 342 ret <8 x float> %res 343} 344define <8 x float> @test_8xfloat_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2) { 345; CHECK-LABEL: test_8xfloat_shuff_mask3: 346; CHECK: # %bb.0: 347; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2],ymm1[3,2],ymm0[7,6],ymm1[7,6] 348; CHECK-NEXT: retq 349 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 3, i32 2, i32 11, i32 10, i32 7, i32 6, i32 15, i32 14> 350 ret <8 x float> %res 351} 352define <8 x float> @test_8xfloat_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { 353; CHECK-LABEL: test_8xfloat_masked_shuff_mask3: 354; CHECK: # %bb.0: 355; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 356; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 357; CHECK-NEXT: vshufps {{.*#+}} ymm2 {%k1} = ymm0[3,2],ymm1[3,2],ymm0[7,6],ymm1[7,6] 358; CHECK-NEXT: vmovaps %ymm2, %ymm0 359; CHECK-NEXT: retq 360 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 3, i32 2, i32 11, i32 10, i32 7, i32 6, i32 15, i32 14> 361 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 362 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 363 ret <8 x float> %res 364} 365 366define <8 x float> @test_8xfloat_zero_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { 367; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask3: 368; CHECK: # %bb.0: 369; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 370; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 371; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2],ymm1[3,2],ymm0[7,6],ymm1[7,6] 372; CHECK-NEXT: retq 373 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 3, i32 2, i32 11, i32 10, i32 7, i32 6, i32 15, i32 14> 374 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 375 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 376 ret <8 x float> %res 377} 378define <8 x float> @test_8xfloat_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) { 379; CHECK-LABEL: test_8xfloat_shuff_mem_mask0: 380; CHECK: # %bb.0: 381; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,1],mem[0,0],ymm0[6,5],mem[4,4] 382; CHECK-NEXT: retq 383 %vec2 = load <8 x float>, <8 x float>* %vec2p 384 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 1, i32 8, i32 8, i32 6, i32 5, i32 12, i32 12> 385 ret <8 x float> %res 386} 387define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { 388; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask0: 389; CHECK: # %bb.0: 390; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 391; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 392; CHECK-NEXT: vshufps {{.*#+}} ymm1 {%k1} = ymm0[2,1],mem[0,0],ymm0[6,5],mem[4,4] 393; CHECK-NEXT: vmovaps %ymm1, %ymm0 394; CHECK-NEXT: retq 395 %vec2 = load <8 x float>, <8 x float>* %vec2p 396 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 1, i32 8, i32 8, i32 6, i32 5, i32 12, i32 12> 397 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 398 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 399 ret <8 x float> %res 400} 401 402define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { 403; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0: 404; CHECK: # %bb.0: 405; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 406; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 407; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1],mem[0,0],ymm0[6,5],mem[4,4] 408; CHECK-NEXT: retq 409 %vec2 = load <8 x float>, <8 x float>* %vec2p 410 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 1, i32 8, i32 8, i32 6, i32 5, i32 12, i32 12> 411 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 412 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 413 ret <8 x float> %res 414} 415 416define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { 417; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask1: 418; CHECK: # %bb.0: 419; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 420; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 421; CHECK-NEXT: vshufps {{.*#+}} ymm1 {%k1} = ymm0[2,2],mem[1,0],ymm0[6,6],mem[5,4] 422; CHECK-NEXT: vmovaps %ymm1, %ymm0 423; CHECK-NEXT: retq 424 %vec2 = load <8 x float>, <8 x float>* %vec2p 425 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 2, i32 9, i32 8, i32 6, i32 6, i32 13, i32 12> 426 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 427 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 428 ret <8 x float> %res 429} 430 431define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { 432; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1: 433; CHECK: # %bb.0: 434; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 435; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 436; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2],mem[1,0],ymm0[6,6],mem[5,4] 437; CHECK-NEXT: retq 438 %vec2 = load <8 x float>, <8 x float>* %vec2p 439 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 2, i32 9, i32 8, i32 6, i32 6, i32 13, i32 12> 440 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 441 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 442 ret <8 x float> %res 443} 444 445define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { 446; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask2: 447; CHECK: # %bb.0: 448; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 449; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 450; CHECK-NEXT: vshufps {{.*#+}} ymm1 {%k1} = ymm0[3,3],mem[3,3],ymm0[7,7],mem[7,7] 451; CHECK-NEXT: vmovaps %ymm1, %ymm0 452; CHECK-NEXT: retq 453 %vec2 = load <8 x float>, <8 x float>* %vec2p 454 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 3, i32 3, i32 11, i32 11, i32 7, i32 7, i32 15, i32 15> 455 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 456 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 457 ret <8 x float> %res 458} 459 460define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { 461; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2: 462; CHECK: # %bb.0: 463; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 464; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 465; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3],mem[3,3],ymm0[7,7],mem[7,7] 466; CHECK-NEXT: retq 467 %vec2 = load <8 x float>, <8 x float>* %vec2p 468 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 3, i32 3, i32 11, i32 11, i32 7, i32 7, i32 15, i32 15> 469 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 470 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 471 ret <8 x float> %res 472} 473 474define <8 x float> @test_8xfloat_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) { 475; CHECK-LABEL: test_8xfloat_shuff_mem_mask3: 476; CHECK: # %bb.0: 477; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,3],mem[2,1],ymm0[7,7],mem[6,5] 478; CHECK-NEXT: retq 479 %vec2 = load <8 x float>, <8 x float>* %vec2p 480 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 3, i32 3, i32 10, i32 9, i32 7, i32 7, i32 14, i32 13> 481 ret <8 x float> %res 482} 483define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { 484; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask3: 485; CHECK: # %bb.0: 486; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 487; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 488; CHECK-NEXT: vshufps {{.*#+}} ymm1 {%k1} = ymm0[3,3],mem[2,1],ymm0[7,7],mem[6,5] 489; CHECK-NEXT: vmovaps %ymm1, %ymm0 490; CHECK-NEXT: retq 491 %vec2 = load <8 x float>, <8 x float>* %vec2p 492 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 3, i32 3, i32 10, i32 9, i32 7, i32 7, i32 14, i32 13> 493 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 494 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 495 ret <8 x float> %res 496} 497 498define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { 499; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3: 500; CHECK: # %bb.0: 501; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 502; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 503; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3],mem[2,1],ymm0[7,7],mem[6,5] 504; CHECK-NEXT: retq 505 %vec2 = load <8 x float>, <8 x float>* %vec2p 506 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 3, i32 3, i32 10, i32 9, i32 7, i32 7, i32 14, i32 13> 507 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 508 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 509 ret <8 x float> %res 510} 511 512define <16 x float> @test_16xfloat_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2) { 513; CHECK-LABEL: test_16xfloat_shuff_mask0: 514; CHECK: # %bb.0: 515; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm0[3,2],zmm1[3,2],zmm0[7,6],zmm1[7,6],zmm0[11,10],zmm1[11,10],zmm0[15,14],zmm1[15,14] 516; CHECK-NEXT: retq 517 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 3, i32 2, i32 19, i32 18, i32 7, i32 6, i32 23, i32 22, i32 11, i32 10, i32 27, i32 26, i32 15, i32 14, i32 31, i32 30> 518 ret <16 x float> %res 519} 520define <16 x float> @test_16xfloat_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { 521; CHECK-LABEL: test_16xfloat_masked_shuff_mask0: 522; CHECK: # %bb.0: 523; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 524; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 525; CHECK-NEXT: vshufps {{.*#+}} zmm2 {%k1} = zmm0[3,2],zmm1[3,2],zmm0[7,6],zmm1[7,6],zmm0[11,10],zmm1[11,10],zmm0[15,14],zmm1[15,14] 526; CHECK-NEXT: vmovaps %zmm2, %zmm0 527; CHECK-NEXT: retq 528 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 3, i32 2, i32 19, i32 18, i32 7, i32 6, i32 23, i32 22, i32 11, i32 10, i32 27, i32 26, i32 15, i32 14, i32 31, i32 30> 529 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 530 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 531 ret <16 x float> %res 532} 533 534define <16 x float> @test_16xfloat_zero_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { 535; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask0: 536; CHECK: # %bb.0: 537; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 538; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 539; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2],zmm1[3,2],zmm0[7,6],zmm1[7,6],zmm0[11,10],zmm1[11,10],zmm0[15,14],zmm1[15,14] 540; CHECK-NEXT: retq 541 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 3, i32 2, i32 19, i32 18, i32 7, i32 6, i32 23, i32 22, i32 11, i32 10, i32 27, i32 26, i32 15, i32 14, i32 31, i32 30> 542 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 543 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 544 ret <16 x float> %res 545} 546define <16 x float> @test_16xfloat_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { 547; CHECK-LABEL: test_16xfloat_masked_shuff_mask1: 548; CHECK: # %bb.0: 549; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 550; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 551; CHECK-NEXT: vshufps {{.*#+}} zmm2 {%k1} = zmm0[1,2],zmm1[3,3],zmm0[5,6],zmm1[7,7],zmm0[9,10],zmm1[11,11],zmm0[13,14],zmm1[15,15] 552; CHECK-NEXT: vmovaps %zmm2, %zmm0 553; CHECK-NEXT: retq 554 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 1, i32 2, i32 19, i32 19, i32 5, i32 6, i32 23, i32 23, i32 9, i32 10, i32 27, i32 27, i32 13, i32 14, i32 31, i32 31> 555 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 556 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 557 ret <16 x float> %res 558} 559 560define <16 x float> @test_16xfloat_zero_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { 561; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask1: 562; CHECK: # %bb.0: 563; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 564; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 565; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2],zmm1[3,3],zmm0[5,6],zmm1[7,7],zmm0[9,10],zmm1[11,11],zmm0[13,14],zmm1[15,15] 566; CHECK-NEXT: retq 567 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 1, i32 2, i32 19, i32 19, i32 5, i32 6, i32 23, i32 23, i32 9, i32 10, i32 27, i32 27, i32 13, i32 14, i32 31, i32 31> 568 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 569 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 570 ret <16 x float> %res 571} 572define <16 x float> @test_16xfloat_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { 573; CHECK-LABEL: test_16xfloat_masked_shuff_mask2: 574; CHECK: # %bb.0: 575; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 576; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 577; CHECK-NEXT: vshufps {{.*#+}} zmm2 {%k1} = zmm0[3,0],zmm1[2,1],zmm0[7,4],zmm1[6,5],zmm0[11,8],zmm1[10,9],zmm0[15,12],zmm1[14,13] 578; CHECK-NEXT: vmovaps %zmm2, %zmm0 579; CHECK-NEXT: retq 580 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 3, i32 0, i32 18, i32 17, i32 7, i32 4, i32 22, i32 21, i32 11, i32 8, i32 26, i32 25, i32 15, i32 12, i32 30, i32 29> 581 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 582 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 583 ret <16 x float> %res 584} 585 586define <16 x float> @test_16xfloat_zero_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { 587; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask2: 588; CHECK: # %bb.0: 589; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 590; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 591; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0],zmm1[2,1],zmm0[7,4],zmm1[6,5],zmm0[11,8],zmm1[10,9],zmm0[15,12],zmm1[14,13] 592; CHECK-NEXT: retq 593 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 3, i32 0, i32 18, i32 17, i32 7, i32 4, i32 22, i32 21, i32 11, i32 8, i32 26, i32 25, i32 15, i32 12, i32 30, i32 29> 594 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 595 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 596 ret <16 x float> %res 597} 598define <16 x float> @test_16xfloat_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2) { 599; CHECK-LABEL: test_16xfloat_shuff_mask3: 600; CHECK: # %bb.0: 601; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm0[2,3],zmm1[0,2],zmm0[6,7],zmm1[4,6],zmm0[10,11],zmm1[8,10],zmm0[14,15],zmm1[12,14] 602; CHECK-NEXT: retq 603 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 3, i32 16, i32 18, i32 6, i32 7, i32 20, i32 22, i32 10, i32 11, i32 24, i32 26, i32 14, i32 15, i32 28, i32 30> 604 ret <16 x float> %res 605} 606define <16 x float> @test_16xfloat_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { 607; CHECK-LABEL: test_16xfloat_masked_shuff_mask3: 608; CHECK: # %bb.0: 609; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 610; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 611; CHECK-NEXT: vshufps {{.*#+}} zmm2 {%k1} = zmm0[2,3],zmm1[0,2],zmm0[6,7],zmm1[4,6],zmm0[10,11],zmm1[8,10],zmm0[14,15],zmm1[12,14] 612; CHECK-NEXT: vmovaps %zmm2, %zmm0 613; CHECK-NEXT: retq 614 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 3, i32 16, i32 18, i32 6, i32 7, i32 20, i32 22, i32 10, i32 11, i32 24, i32 26, i32 14, i32 15, i32 28, i32 30> 615 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 616 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 617 ret <16 x float> %res 618} 619 620define <16 x float> @test_16xfloat_zero_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { 621; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask3: 622; CHECK: # %bb.0: 623; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 624; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 625; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3],zmm1[0,2],zmm0[6,7],zmm1[4,6],zmm0[10,11],zmm1[8,10],zmm0[14,15],zmm1[12,14] 626; CHECK-NEXT: retq 627 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 3, i32 16, i32 18, i32 6, i32 7, i32 20, i32 22, i32 10, i32 11, i32 24, i32 26, i32 14, i32 15, i32 28, i32 30> 628 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 629 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 630 ret <16 x float> %res 631} 632define <16 x float> @test_16xfloat_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) { 633; CHECK-LABEL: test_16xfloat_shuff_mem_mask0: 634; CHECK: # %bb.0: 635; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm0[3,0],mem[0,2],zmm0[7,4],mem[4,6],zmm0[11,8],mem[8,10],zmm0[15,12],mem[12,14] 636; CHECK-NEXT: retq 637 %vec2 = load <16 x float>, <16 x float>* %vec2p 638 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 3, i32 0, i32 16, i32 18, i32 7, i32 4, i32 20, i32 22, i32 11, i32 8, i32 24, i32 26, i32 15, i32 12, i32 28, i32 30> 639 ret <16 x float> %res 640} 641define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { 642; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask0: 643; CHECK: # %bb.0: 644; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 645; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 646; CHECK-NEXT: vshufps {{.*#+}} zmm1 {%k1} = zmm0[3,0],mem[0,2],zmm0[7,4],mem[4,6],zmm0[11,8],mem[8,10],zmm0[15,12],mem[12,14] 647; CHECK-NEXT: vmovaps %zmm1, %zmm0 648; CHECK-NEXT: retq 649 %vec2 = load <16 x float>, <16 x float>* %vec2p 650 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 3, i32 0, i32 16, i32 18, i32 7, i32 4, i32 20, i32 22, i32 11, i32 8, i32 24, i32 26, i32 15, i32 12, i32 28, i32 30> 651 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 652 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 653 ret <16 x float> %res 654} 655 656define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { 657; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0: 658; CHECK: # %bb.0: 659; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 660; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 661; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0],mem[0,2],zmm0[7,4],mem[4,6],zmm0[11,8],mem[8,10],zmm0[15,12],mem[12,14] 662; CHECK-NEXT: retq 663 %vec2 = load <16 x float>, <16 x float>* %vec2p 664 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 3, i32 0, i32 16, i32 18, i32 7, i32 4, i32 20, i32 22, i32 11, i32 8, i32 24, i32 26, i32 15, i32 12, i32 28, i32 30> 665 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 666 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 667 ret <16 x float> %res 668} 669 670define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { 671; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask1: 672; CHECK: # %bb.0: 673; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 674; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 675; CHECK-NEXT: vshufps {{.*#+}} zmm1 {%k1} = zmm0[0,2],mem[3,2],zmm0[4,6],mem[7,6],zmm0[8,10],mem[11,10],zmm0[12,14],mem[15,14] 676; CHECK-NEXT: vmovaps %zmm1, %zmm0 677; CHECK-NEXT: retq 678 %vec2 = load <16 x float>, <16 x float>* %vec2p 679 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 2, i32 19, i32 18, i32 4, i32 6, i32 23, i32 22, i32 8, i32 10, i32 27, i32 26, i32 12, i32 14, i32 31, i32 30> 680 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 681 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 682 ret <16 x float> %res 683} 684 685define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { 686; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1: 687; CHECK: # %bb.0: 688; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 689; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 690; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[0,2],mem[3,2],zmm0[4,6],mem[7,6],zmm0[8,10],mem[11,10],zmm0[12,14],mem[15,14] 691; CHECK-NEXT: retq 692 %vec2 = load <16 x float>, <16 x float>* %vec2p 693 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 2, i32 19, i32 18, i32 4, i32 6, i32 23, i32 22, i32 8, i32 10, i32 27, i32 26, i32 12, i32 14, i32 31, i32 30> 694 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 695 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 696 ret <16 x float> %res 697} 698 699define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { 700; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask2: 701; CHECK: # %bb.0: 702; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 703; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 704; CHECK-NEXT: vshufps {{.*#+}} zmm1 {%k1} = zmm0[2,0],mem[2,2],zmm0[6,4],mem[6,6],zmm0[10,8],mem[10,10],zmm0[14,12],mem[14,14] 705; CHECK-NEXT: vmovaps %zmm1, %zmm0 706; CHECK-NEXT: retq 707 %vec2 = load <16 x float>, <16 x float>* %vec2p 708 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 0, i32 18, i32 18, i32 6, i32 4, i32 22, i32 22, i32 10, i32 8, i32 26, i32 26, i32 14, i32 12, i32 30, i32 30> 709 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 710 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 711 ret <16 x float> %res 712} 713 714define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { 715; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2: 716; CHECK: # %bb.0: 717; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 718; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 719; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0],mem[2,2],zmm0[6,4],mem[6,6],zmm0[10,8],mem[10,10],zmm0[14,12],mem[14,14] 720; CHECK-NEXT: retq 721 %vec2 = load <16 x float>, <16 x float>* %vec2p 722 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 0, i32 18, i32 18, i32 6, i32 4, i32 22, i32 22, i32 10, i32 8, i32 26, i32 26, i32 14, i32 12, i32 30, i32 30> 723 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 724 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 725 ret <16 x float> %res 726} 727 728define <16 x float> @test_16xfloat_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) { 729; CHECK-LABEL: test_16xfloat_shuff_mem_mask3: 730; CHECK: # %bb.0: 731; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm0[2,1],mem[1,3],zmm0[6,5],mem[5,7],zmm0[10,9],mem[9,11],zmm0[14,13],mem[13,15] 732; CHECK-NEXT: retq 733 %vec2 = load <16 x float>, <16 x float>* %vec2p 734 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 1, i32 17, i32 19, i32 6, i32 5, i32 21, i32 23, i32 10, i32 9, i32 25, i32 27, i32 14, i32 13, i32 29, i32 31> 735 ret <16 x float> %res 736} 737define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { 738; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask3: 739; CHECK: # %bb.0: 740; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 741; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 742; CHECK-NEXT: vshufps {{.*#+}} zmm1 {%k1} = zmm0[2,1],mem[1,3],zmm0[6,5],mem[5,7],zmm0[10,9],mem[9,11],zmm0[14,13],mem[13,15] 743; CHECK-NEXT: vmovaps %zmm1, %zmm0 744; CHECK-NEXT: retq 745 %vec2 = load <16 x float>, <16 x float>* %vec2p 746 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 1, i32 17, i32 19, i32 6, i32 5, i32 21, i32 23, i32 10, i32 9, i32 25, i32 27, i32 14, i32 13, i32 29, i32 31> 747 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 748 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 749 ret <16 x float> %res 750} 751 752define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { 753; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3: 754; CHECK: # %bb.0: 755; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 756; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 757; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1],mem[1,3],zmm0[6,5],mem[5,7],zmm0[10,9],mem[9,11],zmm0[14,13],mem[13,15] 758; CHECK-NEXT: retq 759 %vec2 = load <16 x float>, <16 x float>* %vec2p 760 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 1, i32 17, i32 19, i32 6, i32 5, i32 21, i32 23, i32 10, i32 9, i32 25, i32 27, i32 14, i32 13, i32 29, i32 31> 761 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 762 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 763 ret <16 x float> %res 764} 765 766define <2 x double> @test_2xdouble_shuff_mask0(<2 x double> %vec1, <2 x double> %vec2) { 767; CHECK-LABEL: test_2xdouble_shuff_mask0: 768; CHECK: # %bb.0: 769; CHECK-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 770; CHECK-NEXT: retq 771 %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 2> 772 ret <2 x double> %res 773} 774define <2 x double> @test_2xdouble_masked_shuff_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) { 775; CHECK-LABEL: test_2xdouble_masked_shuff_mask0: 776; CHECK: # %bb.0: 777; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 778; CHECK-NEXT: vcmpeqpd %xmm4, %xmm3, %k1 779; CHECK-NEXT: vshufpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[0] 780; CHECK-NEXT: vmovapd %xmm2, %xmm0 781; CHECK-NEXT: retq 782 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 2> 783 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer 784 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 785 ret <2 x double> %res 786} 787 788define <2 x double> @test_2xdouble_zero_masked_shuff_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) { 789; CHECK-LABEL: test_2xdouble_zero_masked_shuff_mask0: 790; CHECK: # %bb.0: 791; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 792; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 793; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[0] 794; CHECK-NEXT: retq 795 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 2> 796 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer 797 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 798 ret <2 x double> %res 799} 800define <2 x double> @test_2xdouble_masked_shuff_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) { 801; CHECK-LABEL: test_2xdouble_masked_shuff_mask1: 802; CHECK: # %bb.0: 803; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 804; CHECK-NEXT: vcmpeqpd %xmm4, %xmm3, %k1 805; CHECK-NEXT: vshufpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[0] 806; CHECK-NEXT: vmovapd %xmm2, %xmm0 807; CHECK-NEXT: retq 808 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 2> 809 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer 810 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 811 ret <2 x double> %res 812} 813 814define <2 x double> @test_2xdouble_zero_masked_shuff_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) { 815; CHECK-LABEL: test_2xdouble_zero_masked_shuff_mask1: 816; CHECK: # %bb.0: 817; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 818; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 819; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[0] 820; CHECK-NEXT: retq 821 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 2> 822 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer 823 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 824 ret <2 x double> %res 825} 826define <2 x double> @test_2xdouble_shuff_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p) { 827; CHECK-LABEL: test_2xdouble_shuff_mem_mask0: 828; CHECK: # %bb.0: 829; CHECK-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],mem[0] 830; CHECK-NEXT: retq 831 %vec2 = load <2 x double>, <2 x double>* %vec2p 832 %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 2> 833 ret <2 x double> %res 834} 835define <2 x double> @test_2xdouble_masked_shuff_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x double> %mask) { 836; CHECK-LABEL: test_2xdouble_masked_shuff_mem_mask0: 837; CHECK: # %bb.0: 838; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 839; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 840; CHECK-NEXT: vshufpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[0] 841; CHECK-NEXT: vmovapd %xmm1, %xmm0 842; CHECK-NEXT: retq 843 %vec2 = load <2 x double>, <2 x double>* %vec2p 844 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 2> 845 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer 846 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 847 ret <2 x double> %res 848} 849 850define <2 x double> @test_2xdouble_zero_masked_shuff_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %mask) { 851; CHECK-LABEL: test_2xdouble_zero_masked_shuff_mem_mask0: 852; CHECK: # %bb.0: 853; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 854; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1 855; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[0] 856; CHECK-NEXT: retq 857 %vec2 = load <2 x double>, <2 x double>* %vec2p 858 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 2> 859 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer 860 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 861 ret <2 x double> %res 862} 863 864define <2 x double> @test_2xdouble_masked_shuff_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x double> %mask) { 865; CHECK-LABEL: test_2xdouble_masked_shuff_mem_mask1: 866; CHECK: # %bb.0: 867; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 868; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 869; CHECK-NEXT: vshufpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[0] 870; CHECK-NEXT: vmovapd %xmm1, %xmm0 871; CHECK-NEXT: retq 872 %vec2 = load <2 x double>, <2 x double>* %vec2p 873 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 2> 874 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer 875 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 876 ret <2 x double> %res 877} 878 879define <2 x double> @test_2xdouble_zero_masked_shuff_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %mask) { 880; CHECK-LABEL: test_2xdouble_zero_masked_shuff_mem_mask1: 881; CHECK: # %bb.0: 882; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 883; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1 884; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[0] 885; CHECK-NEXT: retq 886 %vec2 = load <2 x double>, <2 x double>* %vec2p 887 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 2> 888 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer 889 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 890 ret <2 x double> %res 891} 892 893define <4 x double> @test_4xdouble_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2) { 894; CHECK-LABEL: test_4xdouble_shuff_mask0: 895; CHECK: # %bb.0: 896; CHECK-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3] 897; CHECK-NEXT: retq 898 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 3, i32 7> 899 ret <4 x double> %res 900} 901define <4 x double> @test_4xdouble_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { 902; CHECK-LABEL: test_4xdouble_masked_shuff_mask0: 903; CHECK: # %bb.0: 904; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 905; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 906; CHECK-NEXT: vshufpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[3],ymm1[3] 907; CHECK-NEXT: vmovapd %ymm2, %ymm0 908; CHECK-NEXT: retq 909 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 3, i32 7> 910 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 911 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 912 ret <4 x double> %res 913} 914 915define <4 x double> @test_4xdouble_zero_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { 916; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask0: 917; CHECK: # %bb.0: 918; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 919; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 920; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[3],ymm1[3] 921; CHECK-NEXT: retq 922 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 3, i32 7> 923 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 924 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 925 ret <4 x double> %res 926} 927define <4 x double> @test_4xdouble_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { 928; CHECK-LABEL: test_4xdouble_masked_shuff_mask1: 929; CHECK: # %bb.0: 930; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 931; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 932; CHECK-NEXT: vshufpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[3],ymm1[2] 933; CHECK-NEXT: vmovapd %ymm2, %ymm0 934; CHECK-NEXT: retq 935 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 3, i32 6> 936 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 937 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 938 ret <4 x double> %res 939} 940 941define <4 x double> @test_4xdouble_zero_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { 942; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask1: 943; CHECK: # %bb.0: 944; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 945; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 946; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[3],ymm1[2] 947; CHECK-NEXT: retq 948 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 3, i32 6> 949 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 950 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 951 ret <4 x double> %res 952} 953define <4 x double> @test_4xdouble_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { 954; CHECK-LABEL: test_4xdouble_masked_shuff_mask2: 955; CHECK: # %bb.0: 956; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 957; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 958; CHECK-NEXT: vshufpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[0],ymm0[3],ymm1[2] 959; CHECK-NEXT: vmovapd %ymm2, %ymm0 960; CHECK-NEXT: retq 961 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 4, i32 3, i32 6> 962 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 963 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 964 ret <4 x double> %res 965} 966 967define <4 x double> @test_4xdouble_zero_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { 968; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask2: 969; CHECK: # %bb.0: 970; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 971; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 972; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[0],ymm0[3],ymm1[2] 973; CHECK-NEXT: retq 974 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 4, i32 3, i32 6> 975 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 976 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 977 ret <4 x double> %res 978} 979define <4 x double> @test_4xdouble_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2) { 980; CHECK-LABEL: test_4xdouble_shuff_mask3: 981; CHECK: # %bb.0: 982; CHECK-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[3] 983; CHECK-NEXT: retq 984 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 7> 985 ret <4 x double> %res 986} 987define <4 x double> @test_4xdouble_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { 988; CHECK-LABEL: test_4xdouble_masked_shuff_mask3: 989; CHECK: # %bb.0: 990; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 991; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 992; CHECK-NEXT: vshufpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[3] 993; CHECK-NEXT: vmovapd %ymm2, %ymm0 994; CHECK-NEXT: retq 995 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 7> 996 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 997 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 998 ret <4 x double> %res 999} 1000 1001define <4 x double> @test_4xdouble_zero_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { 1002; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask3: 1003; CHECK: # %bb.0: 1004; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1005; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 1006; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[3] 1007; CHECK-NEXT: retq 1008 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 7> 1009 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 1010 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 1011 ret <4 x double> %res 1012} 1013define <4 x double> @test_4xdouble_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) { 1014; CHECK-LABEL: test_4xdouble_shuff_mem_mask0: 1015; CHECK: # %bb.0: 1016; CHECK-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[2] 1017; CHECK-NEXT: retq 1018 %vec2 = load <4 x double>, <4 x double>* %vec2p 1019 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 6> 1020 ret <4 x double> %res 1021} 1022define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { 1023; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask0: 1024; CHECK: # %bb.0: 1025; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1026; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 1027; CHECK-NEXT: vshufpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[2] 1028; CHECK-NEXT: vmovapd %ymm1, %ymm0 1029; CHECK-NEXT: retq 1030 %vec2 = load <4 x double>, <4 x double>* %vec2p 1031 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 6> 1032 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 1033 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 1034 ret <4 x double> %res 1035} 1036 1037define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { 1038; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0: 1039; CHECK: # %bb.0: 1040; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1041; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 1042; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[2] 1043; CHECK-NEXT: retq 1044 %vec2 = load <4 x double>, <4 x double>* %vec2p 1045 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 6> 1046 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 1047 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 1048 ret <4 x double> %res 1049} 1050 1051define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { 1052; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask1: 1053; CHECK: # %bb.0: 1054; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1055; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 1056; CHECK-NEXT: vshufpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[1],ymm0[2],mem[2] 1057; CHECK-NEXT: vmovapd %ymm1, %ymm0 1058; CHECK-NEXT: retq 1059 %vec2 = load <4 x double>, <4 x double>* %vec2p 1060 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 5, i32 2, i32 6> 1061 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 1062 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 1063 ret <4 x double> %res 1064} 1065 1066define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { 1067; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1: 1068; CHECK: # %bb.0: 1069; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1070; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 1071; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[1],ymm0[2],mem[2] 1072; CHECK-NEXT: retq 1073 %vec2 = load <4 x double>, <4 x double>* %vec2p 1074 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 5, i32 2, i32 6> 1075 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 1076 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 1077 ret <4 x double> %res 1078} 1079 1080define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { 1081; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask2: 1082; CHECK: # %bb.0: 1083; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1084; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 1085; CHECK-NEXT: vshufpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[3],mem[2] 1086; CHECK-NEXT: vmovapd %ymm1, %ymm0 1087; CHECK-NEXT: retq 1088 %vec2 = load <4 x double>, <4 x double>* %vec2p 1089 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 3, i32 6> 1090 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 1091 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 1092 ret <4 x double> %res 1093} 1094 1095define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { 1096; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2: 1097; CHECK: # %bb.0: 1098; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1099; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 1100; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[3],mem[2] 1101; CHECK-NEXT: retq 1102 %vec2 = load <4 x double>, <4 x double>* %vec2p 1103 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 3, i32 6> 1104 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 1105 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 1106 ret <4 x double> %res 1107} 1108 1109define <4 x double> @test_4xdouble_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) { 1110; CHECK-LABEL: test_4xdouble_shuff_mem_mask3: 1111; CHECK: # %bb.0: 1112; CHECK-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[2],mem[2] 1113; CHECK-NEXT: retq 1114 %vec2 = load <4 x double>, <4 x double>* %vec2p 1115 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 2, i32 6> 1116 ret <4 x double> %res 1117} 1118define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { 1119; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask3: 1120; CHECK: # %bb.0: 1121; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1122; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 1123; CHECK-NEXT: vshufpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[2],mem[2] 1124; CHECK-NEXT: vmovapd %ymm1, %ymm0 1125; CHECK-NEXT: retq 1126 %vec2 = load <4 x double>, <4 x double>* %vec2p 1127 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 2, i32 6> 1128 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 1129 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 1130 ret <4 x double> %res 1131} 1132 1133define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { 1134; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3: 1135; CHECK: # %bb.0: 1136; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1137; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 1138; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[2],mem[2] 1139; CHECK-NEXT: retq 1140 %vec2 = load <4 x double>, <4 x double>* %vec2p 1141 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 2, i32 6> 1142 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 1143 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 1144 ret <4 x double> %res 1145} 1146 1147define <8 x double> @test_8xdouble_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2) { 1148; CHECK-LABEL: test_8xdouble_shuff_mask0: 1149; CHECK: # %bb.0: 1150; CHECK-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[4],zmm1[5],zmm0[7],zmm1[7] 1151; CHECK-NEXT: retq 1152 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 11, i32 4, i32 13, i32 7, i32 15> 1153 ret <8 x double> %res 1154} 1155define <8 x double> @test_8xdouble_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { 1156; CHECK-LABEL: test_8xdouble_masked_shuff_mask0: 1157; CHECK: # %bb.0: 1158; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 1159; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 1160; CHECK-NEXT: vshufpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[4],zmm1[5],zmm0[7],zmm1[7] 1161; CHECK-NEXT: vmovapd %zmm2, %zmm0 1162; CHECK-NEXT: retq 1163 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 11, i32 4, i32 13, i32 7, i32 15> 1164 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1165 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 1166 ret <8 x double> %res 1167} 1168 1169define <8 x double> @test_8xdouble_zero_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { 1170; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask0: 1171; CHECK: # %bb.0: 1172; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1173; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 1174; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[4],zmm1[5],zmm0[7],zmm1[7] 1175; CHECK-NEXT: retq 1176 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 11, i32 4, i32 13, i32 7, i32 15> 1177 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1178 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 1179 ret <8 x double> %res 1180} 1181define <8 x double> @test_8xdouble_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { 1182; CHECK-LABEL: test_8xdouble_masked_shuff_mask1: 1183; CHECK: # %bb.0: 1184; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 1185; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 1186; CHECK-NEXT: vshufpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[5],zmm1[5],zmm0[6],zmm1[7] 1187; CHECK-NEXT: vmovapd %zmm2, %zmm0 1188; CHECK-NEXT: retq 1189 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 11, i32 5, i32 13, i32 6, i32 15> 1190 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1191 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 1192 ret <8 x double> %res 1193} 1194 1195define <8 x double> @test_8xdouble_zero_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { 1196; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask1: 1197; CHECK: # %bb.0: 1198; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1199; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 1200; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[5],zmm1[5],zmm0[6],zmm1[7] 1201; CHECK-NEXT: retq 1202 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 11, i32 5, i32 13, i32 6, i32 15> 1203 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1204 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 1205 ret <8 x double> %res 1206} 1207define <8 x double> @test_8xdouble_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { 1208; CHECK-LABEL: test_8xdouble_masked_shuff_mask2: 1209; CHECK: # %bb.0: 1210; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 1211; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 1212; CHECK-NEXT: vshufpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[5],zmm0[6],zmm1[6] 1213; CHECK-NEXT: vmovapd %zmm2, %zmm0 1214; CHECK-NEXT: retq 1215 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 8, i32 3, i32 11, i32 4, i32 13, i32 6, i32 14> 1216 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1217 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 1218 ret <8 x double> %res 1219} 1220 1221define <8 x double> @test_8xdouble_zero_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { 1222; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask2: 1223; CHECK: # %bb.0: 1224; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1225; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 1226; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[5],zmm0[6],zmm1[6] 1227; CHECK-NEXT: retq 1228 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 8, i32 3, i32 11, i32 4, i32 13, i32 6, i32 14> 1229 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1230 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 1231 ret <8 x double> %res 1232} 1233define <8 x double> @test_8xdouble_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2) { 1234; CHECK-LABEL: test_8xdouble_shuff_mask3: 1235; CHECK: # %bb.0: 1236; CHECK-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[7],zmm1[7] 1237; CHECK-NEXT: retq 1238 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 8, i32 3, i32 11, i32 4, i32 12, i32 7, i32 15> 1239 ret <8 x double> %res 1240} 1241define <8 x double> @test_8xdouble_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { 1242; CHECK-LABEL: test_8xdouble_masked_shuff_mask3: 1243; CHECK: # %bb.0: 1244; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 1245; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 1246; CHECK-NEXT: vshufpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[7],zmm1[7] 1247; CHECK-NEXT: vmovapd %zmm2, %zmm0 1248; CHECK-NEXT: retq 1249 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 8, i32 3, i32 11, i32 4, i32 12, i32 7, i32 15> 1250 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1251 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 1252 ret <8 x double> %res 1253} 1254 1255define <8 x double> @test_8xdouble_zero_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { 1256; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask3: 1257; CHECK: # %bb.0: 1258; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1259; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 1260; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[7],zmm1[7] 1261; CHECK-NEXT: retq 1262 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 8, i32 3, i32 11, i32 4, i32 12, i32 7, i32 15> 1263 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1264 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 1265 ret <8 x double> %res 1266} 1267define <8 x double> @test_8xdouble_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) { 1268; CHECK-LABEL: test_8xdouble_shuff_mem_mask0: 1269; CHECK: # %bb.0: 1270; CHECK-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[5],mem[5],zmm0[6],mem[7] 1271; CHECK-NEXT: retq 1272 %vec2 = load <8 x double>, <8 x double>* %vec2p 1273 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 5, i32 13, i32 6, i32 15> 1274 ret <8 x double> %res 1275} 1276define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { 1277; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask0: 1278; CHECK: # %bb.0: 1279; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1280; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 1281; CHECK-NEXT: vshufpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[5],mem[5],zmm0[6],mem[7] 1282; CHECK-NEXT: vmovapd %zmm1, %zmm0 1283; CHECK-NEXT: retq 1284 %vec2 = load <8 x double>, <8 x double>* %vec2p 1285 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 5, i32 13, i32 6, i32 15> 1286 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1287 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 1288 ret <8 x double> %res 1289} 1290 1291define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { 1292; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0: 1293; CHECK: # %bb.0: 1294; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1295; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 1296; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[5],mem[5],zmm0[6],mem[7] 1297; CHECK-NEXT: retq 1298 %vec2 = load <8 x double>, <8 x double>* %vec2p 1299 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 5, i32 13, i32 6, i32 15> 1300 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1301 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 1302 ret <8 x double> %res 1303} 1304 1305define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { 1306; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask1: 1307; CHECK: # %bb.0: 1308; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1309; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 1310; CHECK-NEXT: vshufpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[0],zmm0[3],mem[2],zmm0[4],mem[4],zmm0[7],mem[7] 1311; CHECK-NEXT: vmovapd %zmm1, %zmm0 1312; CHECK-NEXT: retq 1313 %vec2 = load <8 x double>, <8 x double>* %vec2p 1314 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 8, i32 3, i32 10, i32 4, i32 12, i32 7, i32 15> 1315 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1316 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 1317 ret <8 x double> %res 1318} 1319 1320define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { 1321; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1: 1322; CHECK: # %bb.0: 1323; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1324; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 1325; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[0],zmm0[3],mem[2],zmm0[4],mem[4],zmm0[7],mem[7] 1326; CHECK-NEXT: retq 1327 %vec2 = load <8 x double>, <8 x double>* %vec2p 1328 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 8, i32 3, i32 10, i32 4, i32 12, i32 7, i32 15> 1329 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1330 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 1331 ret <8 x double> %res 1332} 1333 1334define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { 1335; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask2: 1336; CHECK: # %bb.0: 1337; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1338; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 1339; CHECK-NEXT: vshufpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[2],zmm0[5],mem[5],zmm0[7],mem[7] 1340; CHECK-NEXT: vmovapd %zmm1, %zmm0 1341; CHECK-NEXT: retq 1342 %vec2 = load <8 x double>, <8 x double>* %vec2p 1343 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 10, i32 5, i32 13, i32 7, i32 15> 1344 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1345 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 1346 ret <8 x double> %res 1347} 1348 1349define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { 1350; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2: 1351; CHECK: # %bb.0: 1352; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1353; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 1354; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[2],zmm0[5],mem[5],zmm0[7],mem[7] 1355; CHECK-NEXT: retq 1356 %vec2 = load <8 x double>, <8 x double>* %vec2p 1357 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 10, i32 5, i32 13, i32 7, i32 15> 1358 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1359 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 1360 ret <8 x double> %res 1361} 1362 1363define <8 x double> @test_8xdouble_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) { 1364; CHECK-LABEL: test_8xdouble_shuff_mem_mask3: 1365; CHECK: # %bb.0: 1366; CHECK-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[2],mem[3],zmm0[4],mem[5],zmm0[6],mem[6] 1367; CHECK-NEXT: retq 1368 %vec2 = load <8 x double>, <8 x double>* %vec2p 1369 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 14> 1370 ret <8 x double> %res 1371} 1372define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { 1373; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask3: 1374; CHECK: # %bb.0: 1375; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1376; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 1377; CHECK-NEXT: vshufpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[2],mem[3],zmm0[4],mem[5],zmm0[6],mem[6] 1378; CHECK-NEXT: vmovapd %zmm1, %zmm0 1379; CHECK-NEXT: retq 1380 %vec2 = load <8 x double>, <8 x double>* %vec2p 1381 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 14> 1382 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1383 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 1384 ret <8 x double> %res 1385} 1386 1387define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { 1388; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3: 1389; CHECK: # %bb.0: 1390; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1391; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 1392; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[2],mem[3],zmm0[4],mem[5],zmm0[6],mem[6] 1393; CHECK-NEXT: retq 1394 %vec2 = load <8 x double>, <8 x double>* %vec2p 1395 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 14> 1396 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1397 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 1398 ret <8 x double> %res 1399} 1400 1401