1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s 3 4define <4 x float> @test_4xfloat_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2) { 5; CHECK-LABEL: test_4xfloat_unpack_low_mask0: 6; CHECK: # %bb.0: 7; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 8; CHECK-NEXT: retq 9 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 10 ret <4 x float> %res 11} 12define <4 x float> @test_4xfloat_masked_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) { 13; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask0: 14; CHECK: # %bb.0: 15; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 16; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1 17; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 18; CHECK-NEXT: vmovaps %xmm2, %xmm0 19; CHECK-NEXT: retq 20 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 21 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 22 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 23 ret <4 x float> %res 24} 25 26define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) { 27; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask0: 28; CHECK: # %bb.0: 29; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 30; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 31; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 32; CHECK-NEXT: retq 33 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 34 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 35 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 36 ret <4 x float> %res 37} 38define <4 x float> @test_4xfloat_masked_unpack_low_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) { 39; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask1: 40; CHECK: # %bb.0: 41; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 42; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1 43; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 44; CHECK-NEXT: vmovaps %xmm2, %xmm0 45; CHECK-NEXT: retq 46 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 47 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 48 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 49 ret <4 x float> %res 50} 51 52define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) { 53; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask1: 54; CHECK: # %bb.0: 55; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 56; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 57; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 58; CHECK-NEXT: retq 59 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 60 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 61 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 62 ret <4 x float> %res 63} 64define <4 x float> @test_4xfloat_masked_unpack_low_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) { 65; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask2: 66; CHECK: # %bb.0: 67; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 68; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1 69; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 70; CHECK-NEXT: vmovaps %xmm2, %xmm0 71; CHECK-NEXT: retq 72 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 73 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 74 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 75 ret <4 x float> %res 76} 77 78define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) { 79; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask2: 80; CHECK: # %bb.0: 81; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 82; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 83; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 84; CHECK-NEXT: retq 85 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 86 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 87 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 88 ret <4 x float> %res 89} 90define <4 x float> @test_4xfloat_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2) { 91; CHECK-LABEL: test_4xfloat_unpack_low_mask3: 92; CHECK: # %bb.0: 93; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 94; CHECK-NEXT: retq 95 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 96 ret <4 x float> %res 97} 98define <4 x float> @test_4xfloat_masked_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) { 99; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask3: 100; CHECK: # %bb.0: 101; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 102; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1 103; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 104; CHECK-NEXT: vmovaps %xmm2, %xmm0 105; CHECK-NEXT: retq 106 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 107 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 108 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 109 ret <4 x float> %res 110} 111 112define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) { 113; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask3: 114; CHECK: # %bb.0: 115; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 116; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 117; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 118; CHECK-NEXT: retq 119 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 120 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 121 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 122 ret <4 x float> %res 123} 124define <4 x float> @test_4xfloat_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p) { 125; CHECK-LABEL: test_4xfloat_unpack_low_mem_mask0: 126; CHECK: # %bb.0: 127; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 128; CHECK-NEXT: retq 129 %vec2 = load <4 x float>, <4 x float>* %vec2p 130 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 131 ret <4 x float> %res 132} 133define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) { 134; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask0: 135; CHECK: # %bb.0: 136; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 137; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 138; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] 139; CHECK-NEXT: vmovaps %xmm1, %xmm0 140; CHECK-NEXT: retq 141 %vec2 = load <4 x float>, <4 x float>* %vec2p 142 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 143 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 144 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 145 ret <4 x float> %res 146} 147 148define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) { 149; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0: 150; CHECK: # %bb.0: 151; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 152; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 153; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] 154; CHECK-NEXT: retq 155 %vec2 = load <4 x float>, <4 x float>* %vec2p 156 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 157 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 158 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 159 ret <4 x float> %res 160} 161 162define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) { 163; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask1: 164; CHECK: # %bb.0: 165; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 166; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 167; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] 168; CHECK-NEXT: vmovaps %xmm1, %xmm0 169; CHECK-NEXT: retq 170 %vec2 = load <4 x float>, <4 x float>* %vec2p 171 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 172 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 173 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 174 ret <4 x float> %res 175} 176 177define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) { 178; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1: 179; CHECK: # %bb.0: 180; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 181; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 182; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] 183; CHECK-NEXT: retq 184 %vec2 = load <4 x float>, <4 x float>* %vec2p 185 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 186 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 187 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 188 ret <4 x float> %res 189} 190 191define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) { 192; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask2: 193; CHECK: # %bb.0: 194; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 195; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 196; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] 197; CHECK-NEXT: vmovaps %xmm1, %xmm0 198; CHECK-NEXT: retq 199 %vec2 = load <4 x float>, <4 x float>* %vec2p 200 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 201 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 202 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 203 ret <4 x float> %res 204} 205 206define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) { 207; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2: 208; CHECK: # %bb.0: 209; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 210; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 211; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] 212; CHECK-NEXT: retq 213 %vec2 = load <4 x float>, <4 x float>* %vec2p 214 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 215 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 216 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 217 ret <4 x float> %res 218} 219 220define <4 x float> @test_4xfloat_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p) { 221; CHECK-LABEL: test_4xfloat_unpack_low_mem_mask3: 222; CHECK: # %bb.0: 223; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 224; CHECK-NEXT: retq 225 %vec2 = load <4 x float>, <4 x float>* %vec2p 226 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 227 ret <4 x float> %res 228} 229define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) { 230; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask3: 231; CHECK: # %bb.0: 232; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 233; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 234; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] 235; CHECK-NEXT: vmovaps %xmm1, %xmm0 236; CHECK-NEXT: retq 237 %vec2 = load <4 x float>, <4 x float>* %vec2p 238 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 239 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 240 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 241 ret <4 x float> %res 242} 243 244define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) { 245; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3: 246; CHECK: # %bb.0: 247; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 248; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 249; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] 250; CHECK-NEXT: retq 251 %vec2 = load <4 x float>, <4 x float>* %vec2p 252 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 253 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 254 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 255 ret <4 x float> %res 256} 257 258define <8 x float> @test_8xfloat_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2) { 259; CHECK-LABEL: test_8xfloat_unpack_low_mask0: 260; CHECK: # %bb.0: 261; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 262; CHECK-NEXT: retq 263 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 264 ret <8 x float> %res 265} 266define <8 x float> @test_8xfloat_masked_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { 267; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask0: 268; CHECK: # %bb.0: 269; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 270; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 271; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 272; CHECK-NEXT: vmovaps %ymm2, %ymm0 273; CHECK-NEXT: retq 274 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 275 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 276 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 277 ret <8 x float> %res 278} 279 280define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { 281; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask0: 282; CHECK: # %bb.0: 283; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 284; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 285; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 286; CHECK-NEXT: retq 287 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 288 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 289 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 290 ret <8 x float> %res 291} 292define <8 x float> @test_8xfloat_masked_unpack_low_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { 293; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask1: 294; CHECK: # %bb.0: 295; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 296; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 297; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 298; CHECK-NEXT: vmovaps %ymm2, %ymm0 299; CHECK-NEXT: retq 300 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 301 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 302 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 303 ret <8 x float> %res 304} 305 306define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { 307; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask1: 308; CHECK: # %bb.0: 309; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 310; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 311; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 312; CHECK-NEXT: retq 313 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 314 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 315 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 316 ret <8 x float> %res 317} 318define <8 x float> @test_8xfloat_masked_unpack_low_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { 319; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask2: 320; CHECK: # %bb.0: 321; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 322; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 323; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 324; CHECK-NEXT: vmovaps %ymm2, %ymm0 325; CHECK-NEXT: retq 326 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 327 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 328 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 329 ret <8 x float> %res 330} 331 332define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { 333; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask2: 334; CHECK: # %bb.0: 335; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 336; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 337; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 338; CHECK-NEXT: retq 339 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 340 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 341 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 342 ret <8 x float> %res 343} 344define <8 x float> @test_8xfloat_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2) { 345; CHECK-LABEL: test_8xfloat_unpack_low_mask3: 346; CHECK: # %bb.0: 347; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 348; CHECK-NEXT: retq 349 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 350 ret <8 x float> %res 351} 352define <8 x float> @test_8xfloat_masked_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { 353; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask3: 354; CHECK: # %bb.0: 355; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 356; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 357; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 358; CHECK-NEXT: vmovaps %ymm2, %ymm0 359; CHECK-NEXT: retq 360 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 361 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 362 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 363 ret <8 x float> %res 364} 365 366define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { 367; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask3: 368; CHECK: # %bb.0: 369; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 370; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 371; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 372; CHECK-NEXT: retq 373 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 374 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 375 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 376 ret <8 x float> %res 377} 378define <8 x float> @test_8xfloat_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) { 379; CHECK-LABEL: test_8xfloat_unpack_low_mem_mask0: 380; CHECK: # %bb.0: 381; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] 382; CHECK-NEXT: retq 383 %vec2 = load <8 x float>, <8 x float>* %vec2p 384 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 385 ret <8 x float> %res 386} 387define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { 388; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask0: 389; CHECK: # %bb.0: 390; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 391; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 392; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] 393; CHECK-NEXT: vmovaps %ymm1, %ymm0 394; CHECK-NEXT: retq 395 %vec2 = load <8 x float>, <8 x float>* %vec2p 396 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 397 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 398 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 399 ret <8 x float> %res 400} 401 402define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { 403; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0: 404; CHECK: # %bb.0: 405; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 406; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 407; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] 408; CHECK-NEXT: retq 409 %vec2 = load <8 x float>, <8 x float>* %vec2p 410 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 411 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 412 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 413 ret <8 x float> %res 414} 415 416define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { 417; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask1: 418; CHECK: # %bb.0: 419; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 420; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 421; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] 422; CHECK-NEXT: vmovaps %ymm1, %ymm0 423; CHECK-NEXT: retq 424 %vec2 = load <8 x float>, <8 x float>* %vec2p 425 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 426 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 427 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 428 ret <8 x float> %res 429} 430 431define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { 432; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1: 433; CHECK: # %bb.0: 434; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 435; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 436; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] 437; CHECK-NEXT: retq 438 %vec2 = load <8 x float>, <8 x float>* %vec2p 439 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 440 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 441 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 442 ret <8 x float> %res 443} 444 445define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { 446; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask2: 447; CHECK: # %bb.0: 448; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 449; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 450; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] 451; CHECK-NEXT: vmovaps %ymm1, %ymm0 452; CHECK-NEXT: retq 453 %vec2 = load <8 x float>, <8 x float>* %vec2p 454 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 455 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 456 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 457 ret <8 x float> %res 458} 459 460define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { 461; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2: 462; CHECK: # %bb.0: 463; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 464; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 465; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] 466; CHECK-NEXT: retq 467 %vec2 = load <8 x float>, <8 x float>* %vec2p 468 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 469 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 470 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 471 ret <8 x float> %res 472} 473 474define <8 x float> @test_8xfloat_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) { 475; CHECK-LABEL: test_8xfloat_unpack_low_mem_mask3: 476; CHECK: # %bb.0: 477; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] 478; CHECK-NEXT: retq 479 %vec2 = load <8 x float>, <8 x float>* %vec2p 480 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 481 ret <8 x float> %res 482} 483define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { 484; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask3: 485; CHECK: # %bb.0: 486; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 487; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 488; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] 489; CHECK-NEXT: vmovaps %ymm1, %ymm0 490; CHECK-NEXT: retq 491 %vec2 = load <8 x float>, <8 x float>* %vec2p 492 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 493 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 494 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 495 ret <8 x float> %res 496} 497 498define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { 499; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3: 500; CHECK: # %bb.0: 501; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 502; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 503; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] 504; CHECK-NEXT: retq 505 %vec2 = load <8 x float>, <8 x float>* %vec2p 506 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 507 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 508 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 509 ret <8 x float> %res 510} 511 512define <16 x float> @test_16xfloat_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2) { 513; CHECK-LABEL: test_16xfloat_unpack_low_mask0: 514; CHECK: # %bb.0: 515; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 516; CHECK-NEXT: retq 517 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 518 ret <16 x float> %res 519} 520define <16 x float> @test_16xfloat_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { 521; CHECK-LABEL: test_16xfloat_masked_unpack_low_mask0: 522; CHECK: # %bb.0: 523; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 524; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 525; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 526; CHECK-NEXT: vmovaps %zmm2, %zmm0 527; CHECK-NEXT: retq 528 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 529 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 530 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 531 ret <16 x float> %res 532} 533 534define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { 535; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mask0: 536; CHECK: # %bb.0: 537; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 538; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 539; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 540; CHECK-NEXT: retq 541 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 542 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 543 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 544 ret <16 x float> %res 545} 546define <16 x float> @test_16xfloat_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { 547; CHECK-LABEL: test_16xfloat_masked_unpack_low_mask1: 548; CHECK: # %bb.0: 549; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 550; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 551; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 552; CHECK-NEXT: vmovaps %zmm2, %zmm0 553; CHECK-NEXT: retq 554 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 555 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 556 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 557 ret <16 x float> %res 558} 559 560define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { 561; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mask1: 562; CHECK: # %bb.0: 563; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 564; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 565; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 566; CHECK-NEXT: retq 567 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 568 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 569 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 570 ret <16 x float> %res 571} 572define <16 x float> @test_16xfloat_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { 573; CHECK-LABEL: test_16xfloat_masked_unpack_low_mask2: 574; CHECK: # %bb.0: 575; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 576; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 577; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 578; CHECK-NEXT: vmovaps %zmm2, %zmm0 579; CHECK-NEXT: retq 580 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 581 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 582 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 583 ret <16 x float> %res 584} 585 586define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { 587; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mask2: 588; CHECK: # %bb.0: 589; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 590; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 591; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 592; CHECK-NEXT: retq 593 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 594 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 595 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 596 ret <16 x float> %res 597} 598define <16 x float> @test_16xfloat_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2) { 599; CHECK-LABEL: test_16xfloat_unpack_low_mask3: 600; CHECK: # %bb.0: 601; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 602; CHECK-NEXT: retq 603 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 604 ret <16 x float> %res 605} 606define <16 x float> @test_16xfloat_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { 607; CHECK-LABEL: test_16xfloat_masked_unpack_low_mask3: 608; CHECK: # %bb.0: 609; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 610; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 611; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 612; CHECK-NEXT: vmovaps %zmm2, %zmm0 613; CHECK-NEXT: retq 614 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 615 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 616 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 617 ret <16 x float> %res 618} 619 620define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { 621; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mask3: 622; CHECK: # %bb.0: 623; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 624; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 625; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 626; CHECK-NEXT: retq 627 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 628 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 629 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 630 ret <16 x float> %res 631} 632define <16 x float> @test_16xfloat_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) { 633; CHECK-LABEL: test_16xfloat_unpack_low_mem_mask0: 634; CHECK: # %bb.0: 635; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] 636; CHECK-NEXT: retq 637 %vec2 = load <16 x float>, <16 x float>* %vec2p 638 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 639 ret <16 x float> %res 640} 641define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { 642; CHECK-LABEL: test_16xfloat_masked_unpack_low_mem_mask0: 643; CHECK: # %bb.0: 644; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 645; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 646; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] 647; CHECK-NEXT: vmovaps %zmm1, %zmm0 648; CHECK-NEXT: retq 649 %vec2 = load <16 x float>, <16 x float>* %vec2p 650 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 651 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 652 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 653 ret <16 x float> %res 654} 655 656define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { 657; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0: 658; CHECK: # %bb.0: 659; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 660; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 661; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] 662; CHECK-NEXT: retq 663 %vec2 = load <16 x float>, <16 x float>* %vec2p 664 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 665 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 666 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 667 ret <16 x float> %res 668} 669 670define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { 671; CHECK-LABEL: test_16xfloat_masked_unpack_low_mem_mask1: 672; CHECK: # %bb.0: 673; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 674; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 675; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] 676; CHECK-NEXT: vmovaps %zmm1, %zmm0 677; CHECK-NEXT: retq 678 %vec2 = load <16 x float>, <16 x float>* %vec2p 679 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 680 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 681 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 682 ret <16 x float> %res 683} 684 685define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { 686; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1: 687; CHECK: # %bb.0: 688; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 689; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 690; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] 691; CHECK-NEXT: retq 692 %vec2 = load <16 x float>, <16 x float>* %vec2p 693 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 694 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 695 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 696 ret <16 x float> %res 697} 698 699define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { 700; CHECK-LABEL: test_16xfloat_masked_unpack_low_mem_mask2: 701; CHECK: # %bb.0: 702; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 703; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 704; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] 705; CHECK-NEXT: vmovaps %zmm1, %zmm0 706; CHECK-NEXT: retq 707 %vec2 = load <16 x float>, <16 x float>* %vec2p 708 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 709 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 710 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 711 ret <16 x float> %res 712} 713 714define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { 715; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2: 716; CHECK: # %bb.0: 717; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 718; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 719; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] 720; CHECK-NEXT: retq 721 %vec2 = load <16 x float>, <16 x float>* %vec2p 722 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 723 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 724 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 725 ret <16 x float> %res 726} 727 728define <16 x float> @test_16xfloat_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) { 729; CHECK-LABEL: test_16xfloat_unpack_low_mem_mask3: 730; CHECK: # %bb.0: 731; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] 732; CHECK-NEXT: retq 733 %vec2 = load <16 x float>, <16 x float>* %vec2p 734 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 735 ret <16 x float> %res 736} 737define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { 738; CHECK-LABEL: test_16xfloat_masked_unpack_low_mem_mask3: 739; CHECK: # %bb.0: 740; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 741; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 742; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] 743; CHECK-NEXT: vmovaps %zmm1, %zmm0 744; CHECK-NEXT: retq 745 %vec2 = load <16 x float>, <16 x float>* %vec2p 746 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 747 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 748 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 749 ret <16 x float> %res 750} 751 752define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { 753; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3: 754; CHECK: # %bb.0: 755; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 756; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 757; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] 758; CHECK-NEXT: retq 759 %vec2 = load <16 x float>, <16 x float>* %vec2p 760 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 761 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 762 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 763 ret <16 x float> %res 764} 765 766define <2 x double> @test_2xdouble_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2) { 767; CHECK-LABEL: test_2xdouble_unpack_low_mask0: 768; CHECK: # %bb.0: 769; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 770; CHECK-NEXT: retq 771 %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 772 ret <2 x double> %res 773} 774define <2 x double> @test_2xdouble_masked_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) { 775; CHECK-LABEL: test_2xdouble_masked_unpack_low_mask0: 776; CHECK: # %bb.0: 777; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 778; CHECK-NEXT: vcmpeqpd %xmm4, %xmm3, %k1 779; CHECK-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] 780; CHECK-NEXT: vmovapd %xmm2, %xmm0 781; CHECK-NEXT: retq 782 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 783 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer 784 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 785 ret <2 x double> %res 786} 787 788define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) { 789; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mask0: 790; CHECK: # %bb.0: 791; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 792; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 793; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] 794; CHECK-NEXT: retq 795 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 796 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer 797 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 798 ret <2 x double> %res 799} 800define <2 x double> @test_2xdouble_masked_unpack_low_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) { 801; CHECK-LABEL: test_2xdouble_masked_unpack_low_mask1: 802; CHECK: # %bb.0: 803; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 804; CHECK-NEXT: vcmpeqpd %xmm4, %xmm3, %k1 805; CHECK-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] 806; CHECK-NEXT: vmovapd %xmm2, %xmm0 807; CHECK-NEXT: retq 808 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 809 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer 810 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 811 ret <2 x double> %res 812} 813 814define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) { 815; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mask1: 816; CHECK: # %bb.0: 817; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 818; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 819; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] 820; CHECK-NEXT: retq 821 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 822 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer 823 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 824 ret <2 x double> %res 825} 826define <2 x double> @test_2xdouble_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p) { 827; CHECK-LABEL: test_2xdouble_unpack_low_mem_mask0: 828; CHECK: # %bb.0: 829; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] 830; CHECK-NEXT: retq 831 %vec2 = load <2 x double>, <2 x double>* %vec2p 832 %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 833 ret <2 x double> %res 834} 835define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x double> %mask) { 836; CHECK-LABEL: test_2xdouble_masked_unpack_low_mem_mask0: 837; CHECK: # %bb.0: 838; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 839; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 840; CHECK-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] 841; CHECK-NEXT: vmovapd %xmm1, %xmm0 842; CHECK-NEXT: retq 843 %vec2 = load <2 x double>, <2 x double>* %vec2p 844 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 845 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer 846 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 847 ret <2 x double> %res 848} 849 850define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %mask) { 851; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0: 852; CHECK: # %bb.0: 853; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 854; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1 855; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] 856; CHECK-NEXT: retq 857 %vec2 = load <2 x double>, <2 x double>* %vec2p 858 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 859 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer 860 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 861 ret <2 x double> %res 862} 863 864define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x double> %mask) { 865; CHECK-LABEL: test_2xdouble_masked_unpack_low_mem_mask1: 866; CHECK: # %bb.0: 867; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 868; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 869; CHECK-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] 870; CHECK-NEXT: vmovapd %xmm1, %xmm0 871; CHECK-NEXT: retq 872 %vec2 = load <2 x double>, <2 x double>* %vec2p 873 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 874 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer 875 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 876 ret <2 x double> %res 877} 878 879define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %mask) { 880; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1: 881; CHECK: # %bb.0: 882; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 883; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1 884; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] 885; CHECK-NEXT: retq 886 %vec2 = load <2 x double>, <2 x double>* %vec2p 887 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 888 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer 889 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 890 ret <2 x double> %res 891} 892 893define <4 x double> @test_4xdouble_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2) { 894; CHECK-LABEL: test_4xdouble_unpack_low_mask0: 895; CHECK: # %bb.0: 896; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 897; CHECK-NEXT: retq 898 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 899 ret <4 x double> %res 900} 901define <4 x double> @test_4xdouble_masked_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { 902; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask0: 903; CHECK: # %bb.0: 904; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 905; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 906; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 907; CHECK-NEXT: vmovapd %ymm2, %ymm0 908; CHECK-NEXT: retq 909 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 910 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 911 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 912 ret <4 x double> %res 913} 914 915define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { 916; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask0: 917; CHECK: # %bb.0: 918; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 919; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 920; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 921; CHECK-NEXT: retq 922 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 923 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 924 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 925 ret <4 x double> %res 926} 927define <4 x double> @test_4xdouble_masked_unpack_low_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { 928; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask1: 929; CHECK: # %bb.0: 930; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 931; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 932; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 933; CHECK-NEXT: vmovapd %ymm2, %ymm0 934; CHECK-NEXT: retq 935 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 936 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 937 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 938 ret <4 x double> %res 939} 940 941define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { 942; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask1: 943; CHECK: # %bb.0: 944; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 945; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 946; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 947; CHECK-NEXT: retq 948 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 949 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 950 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 951 ret <4 x double> %res 952} 953define <4 x double> @test_4xdouble_masked_unpack_low_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { 954; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask2: 955; CHECK: # %bb.0: 956; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 957; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 958; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 959; CHECK-NEXT: vmovapd %ymm2, %ymm0 960; CHECK-NEXT: retq 961 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 962 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 963 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 964 ret <4 x double> %res 965} 966 967define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { 968; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask2: 969; CHECK: # %bb.0: 970; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 971; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 972; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 973; CHECK-NEXT: retq 974 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 975 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 976 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 977 ret <4 x double> %res 978} 979define <4 x double> @test_4xdouble_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2) { 980; CHECK-LABEL: test_4xdouble_unpack_low_mask3: 981; CHECK: # %bb.0: 982; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 983; CHECK-NEXT: retq 984 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 985 ret <4 x double> %res 986} 987define <4 x double> @test_4xdouble_masked_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { 988; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask3: 989; CHECK: # %bb.0: 990; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 991; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 992; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 993; CHECK-NEXT: vmovapd %ymm2, %ymm0 994; CHECK-NEXT: retq 995 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 996 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 997 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 998 ret <4 x double> %res 999} 1000 1001define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { 1002; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask3: 1003; CHECK: # %bb.0: 1004; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1005; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 1006; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 1007; CHECK-NEXT: retq 1008 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 1009 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 1010 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 1011 ret <4 x double> %res 1012} 1013define <4 x double> @test_4xdouble_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) { 1014; CHECK-LABEL: test_4xdouble_unpack_low_mem_mask0: 1015; CHECK: # %bb.0: 1016; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] 1017; CHECK-NEXT: retq 1018 %vec2 = load <4 x double>, <4 x double>* %vec2p 1019 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 1020 ret <4 x double> %res 1021} 1022define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { 1023; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask0: 1024; CHECK: # %bb.0: 1025; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1026; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 1027; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] 1028; CHECK-NEXT: vmovapd %ymm1, %ymm0 1029; CHECK-NEXT: retq 1030 %vec2 = load <4 x double>, <4 x double>* %vec2p 1031 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 1032 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 1033 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 1034 ret <4 x double> %res 1035} 1036 1037define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { 1038; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0: 1039; CHECK: # %bb.0: 1040; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1041; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 1042; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] 1043; CHECK-NEXT: retq 1044 %vec2 = load <4 x double>, <4 x double>* %vec2p 1045 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 1046 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 1047 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 1048 ret <4 x double> %res 1049} 1050 1051define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { 1052; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask1: 1053; CHECK: # %bb.0: 1054; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1055; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 1056; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] 1057; CHECK-NEXT: vmovapd %ymm1, %ymm0 1058; CHECK-NEXT: retq 1059 %vec2 = load <4 x double>, <4 x double>* %vec2p 1060 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 1061 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 1062 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 1063 ret <4 x double> %res 1064} 1065 1066define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { 1067; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1: 1068; CHECK: # %bb.0: 1069; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1070; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 1071; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] 1072; CHECK-NEXT: retq 1073 %vec2 = load <4 x double>, <4 x double>* %vec2p 1074 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 1075 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 1076 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 1077 ret <4 x double> %res 1078} 1079 1080define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { 1081; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask2: 1082; CHECK: # %bb.0: 1083; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1084; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 1085; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] 1086; CHECK-NEXT: vmovapd %ymm1, %ymm0 1087; CHECK-NEXT: retq 1088 %vec2 = load <4 x double>, <4 x double>* %vec2p 1089 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 1090 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 1091 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 1092 ret <4 x double> %res 1093} 1094 1095define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { 1096; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2: 1097; CHECK: # %bb.0: 1098; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1099; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 1100; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] 1101; CHECK-NEXT: retq 1102 %vec2 = load <4 x double>, <4 x double>* %vec2p 1103 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 1104 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 1105 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 1106 ret <4 x double> %res 1107} 1108 1109define <4 x double> @test_4xdouble_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) { 1110; CHECK-LABEL: test_4xdouble_unpack_low_mem_mask3: 1111; CHECK: # %bb.0: 1112; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] 1113; CHECK-NEXT: retq 1114 %vec2 = load <4 x double>, <4 x double>* %vec2p 1115 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 1116 ret <4 x double> %res 1117} 1118define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { 1119; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask3: 1120; CHECK: # %bb.0: 1121; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1122; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 1123; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] 1124; CHECK-NEXT: vmovapd %ymm1, %ymm0 1125; CHECK-NEXT: retq 1126 %vec2 = load <4 x double>, <4 x double>* %vec2p 1127 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 1128 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 1129 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 1130 ret <4 x double> %res 1131} 1132 1133define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { 1134; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3: 1135; CHECK: # %bb.0: 1136; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1137; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 1138; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] 1139; CHECK-NEXT: retq 1140 %vec2 = load <4 x double>, <4 x double>* %vec2p 1141 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 1142 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 1143 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 1144 ret <4 x double> %res 1145} 1146 1147define <8 x double> @test_8xdouble_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2) { 1148; CHECK-LABEL: test_8xdouble_unpack_low_mask0: 1149; CHECK: # %bb.0: 1150; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1151; CHECK-NEXT: retq 1152 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1153 ret <8 x double> %res 1154} 1155define <8 x double> @test_8xdouble_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { 1156; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask0: 1157; CHECK: # %bb.0: 1158; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 1159; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 1160; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1161; CHECK-NEXT: vmovapd %zmm2, %zmm0 1162; CHECK-NEXT: retq 1163 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1164 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1165 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 1166 ret <8 x double> %res 1167} 1168 1169define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { 1170; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask0: 1171; CHECK: # %bb.0: 1172; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1173; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 1174; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1175; CHECK-NEXT: retq 1176 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1177 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1178 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 1179 ret <8 x double> %res 1180} 1181define <8 x double> @test_8xdouble_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { 1182; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask1: 1183; CHECK: # %bb.0: 1184; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 1185; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 1186; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1187; CHECK-NEXT: vmovapd %zmm2, %zmm0 1188; CHECK-NEXT: retq 1189 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1190 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1191 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 1192 ret <8 x double> %res 1193} 1194 1195define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { 1196; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask1: 1197; CHECK: # %bb.0: 1198; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1199; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 1200; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1201; CHECK-NEXT: retq 1202 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1203 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1204 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 1205 ret <8 x double> %res 1206} 1207define <8 x double> @test_8xdouble_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { 1208; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask2: 1209; CHECK: # %bb.0: 1210; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 1211; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 1212; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1213; CHECK-NEXT: vmovapd %zmm2, %zmm0 1214; CHECK-NEXT: retq 1215 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1216 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1217 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 1218 ret <8 x double> %res 1219} 1220 1221define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { 1222; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask2: 1223; CHECK: # %bb.0: 1224; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1225; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 1226; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1227; CHECK-NEXT: retq 1228 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1229 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1230 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 1231 ret <8 x double> %res 1232} 1233define <8 x double> @test_8xdouble_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2) { 1234; CHECK-LABEL: test_8xdouble_unpack_low_mask3: 1235; CHECK: # %bb.0: 1236; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1237; CHECK-NEXT: retq 1238 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1239 ret <8 x double> %res 1240} 1241define <8 x double> @test_8xdouble_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { 1242; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask3: 1243; CHECK: # %bb.0: 1244; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 1245; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 1246; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1247; CHECK-NEXT: vmovapd %zmm2, %zmm0 1248; CHECK-NEXT: retq 1249 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1250 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1251 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 1252 ret <8 x double> %res 1253} 1254 1255define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { 1256; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask3: 1257; CHECK: # %bb.0: 1258; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1259; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 1260; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1261; CHECK-NEXT: retq 1262 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1263 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1264 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 1265 ret <8 x double> %res 1266} 1267define <8 x double> @test_8xdouble_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) { 1268; CHECK-LABEL: test_8xdouble_unpack_low_mem_mask0: 1269; CHECK: # %bb.0: 1270; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] 1271; CHECK-NEXT: retq 1272 %vec2 = load <8 x double>, <8 x double>* %vec2p 1273 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1274 ret <8 x double> %res 1275} 1276define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { 1277; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask0: 1278; CHECK: # %bb.0: 1279; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1280; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 1281; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] 1282; CHECK-NEXT: vmovapd %zmm1, %zmm0 1283; CHECK-NEXT: retq 1284 %vec2 = load <8 x double>, <8 x double>* %vec2p 1285 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1286 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1287 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 1288 ret <8 x double> %res 1289} 1290 1291define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { 1292; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0: 1293; CHECK: # %bb.0: 1294; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1295; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 1296; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] 1297; CHECK-NEXT: retq 1298 %vec2 = load <8 x double>, <8 x double>* %vec2p 1299 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1300 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1301 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 1302 ret <8 x double> %res 1303} 1304 1305define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { 1306; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask1: 1307; CHECK: # %bb.0: 1308; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1309; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 1310; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] 1311; CHECK-NEXT: vmovapd %zmm1, %zmm0 1312; CHECK-NEXT: retq 1313 %vec2 = load <8 x double>, <8 x double>* %vec2p 1314 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1315 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1316 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 1317 ret <8 x double> %res 1318} 1319 1320define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { 1321; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1: 1322; CHECK: # %bb.0: 1323; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1324; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 1325; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] 1326; CHECK-NEXT: retq 1327 %vec2 = load <8 x double>, <8 x double>* %vec2p 1328 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1329 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1330 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 1331 ret <8 x double> %res 1332} 1333 1334define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { 1335; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask2: 1336; CHECK: # %bb.0: 1337; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1338; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 1339; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] 1340; CHECK-NEXT: vmovapd %zmm1, %zmm0 1341; CHECK-NEXT: retq 1342 %vec2 = load <8 x double>, <8 x double>* %vec2p 1343 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1344 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1345 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 1346 ret <8 x double> %res 1347} 1348 1349define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { 1350; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2: 1351; CHECK: # %bb.0: 1352; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1353; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 1354; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] 1355; CHECK-NEXT: retq 1356 %vec2 = load <8 x double>, <8 x double>* %vec2p 1357 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1358 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1359 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 1360 ret <8 x double> %res 1361} 1362 1363define <8 x double> @test_8xdouble_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) { 1364; CHECK-LABEL: test_8xdouble_unpack_low_mem_mask3: 1365; CHECK: # %bb.0: 1366; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] 1367; CHECK-NEXT: retq 1368 %vec2 = load <8 x double>, <8 x double>* %vec2p 1369 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1370 ret <8 x double> %res 1371} 1372define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { 1373; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask3: 1374; CHECK: # %bb.0: 1375; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 1376; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 1377; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] 1378; CHECK-NEXT: vmovapd %zmm1, %zmm0 1379; CHECK-NEXT: retq 1380 %vec2 = load <8 x double>, <8 x double>* %vec2p 1381 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1382 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1383 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 1384 ret <8 x double> %res 1385} 1386 1387define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { 1388; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3: 1389; CHECK: # %bb.0: 1390; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1391; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 1392; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] 1393; CHECK-NEXT: retq 1394 %vec2 = load <8 x double>, <8 x double>* %vec2p 1395 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1396 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1397 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 1398 ret <8 x double> %res 1399} 1400 1401define <4 x float> @test_4xfloat_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2) { 1402; CHECK-LABEL: test_4xfloat_unpack_high_mask0: 1403; CHECK: # %bb.0: 1404; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1405; CHECK-NEXT: retq 1406 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1407 ret <4 x float> %res 1408} 1409define <4 x float> @test_4xfloat_masked_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) { 1410; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask0: 1411; CHECK: # %bb.0: 1412; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 1413; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1 1414; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1415; CHECK-NEXT: vmovaps %xmm2, %xmm0 1416; CHECK-NEXT: retq 1417 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1418 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 1419 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 1420 ret <4 x float> %res 1421} 1422 1423define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) { 1424; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask0: 1425; CHECK: # %bb.0: 1426; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1427; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 1428; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1429; CHECK-NEXT: retq 1430 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1431 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 1432 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 1433 ret <4 x float> %res 1434} 1435define <4 x float> @test_4xfloat_masked_unpack_high_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) { 1436; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask1: 1437; CHECK: # %bb.0: 1438; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 1439; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1 1440; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1441; CHECK-NEXT: vmovaps %xmm2, %xmm0 1442; CHECK-NEXT: retq 1443 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1444 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 1445 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 1446 ret <4 x float> %res 1447} 1448 1449define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) { 1450; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask1: 1451; CHECK: # %bb.0: 1452; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1453; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 1454; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1455; CHECK-NEXT: retq 1456 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1457 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 1458 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 1459 ret <4 x float> %res 1460} 1461define <4 x float> @test_4xfloat_masked_unpack_high_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) { 1462; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask2: 1463; CHECK: # %bb.0: 1464; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 1465; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1 1466; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1467; CHECK-NEXT: vmovaps %xmm2, %xmm0 1468; CHECK-NEXT: retq 1469 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1470 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 1471 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 1472 ret <4 x float> %res 1473} 1474 1475define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) { 1476; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask2: 1477; CHECK: # %bb.0: 1478; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1479; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 1480; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1481; CHECK-NEXT: retq 1482 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1483 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 1484 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 1485 ret <4 x float> %res 1486} 1487define <4 x float> @test_4xfloat_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2) { 1488; CHECK-LABEL: test_4xfloat_unpack_high_mask3: 1489; CHECK: # %bb.0: 1490; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1491; CHECK-NEXT: retq 1492 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1493 ret <4 x float> %res 1494} 1495define <4 x float> @test_4xfloat_masked_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) { 1496; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask3: 1497; CHECK: # %bb.0: 1498; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 1499; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1 1500; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1501; CHECK-NEXT: vmovaps %xmm2, %xmm0 1502; CHECK-NEXT: retq 1503 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1504 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 1505 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 1506 ret <4 x float> %res 1507} 1508 1509define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) { 1510; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask3: 1511; CHECK: # %bb.0: 1512; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1513; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 1514; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1515; CHECK-NEXT: retq 1516 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1517 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 1518 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 1519 ret <4 x float> %res 1520} 1521define <4 x float> @test_4xfloat_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p) { 1522; CHECK-LABEL: test_4xfloat_unpack_high_mem_mask0: 1523; CHECK: # %bb.0: 1524; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] 1525; CHECK-NEXT: retq 1526 %vec2 = load <4 x float>, <4 x float>* %vec2p 1527 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1528 ret <4 x float> %res 1529} 1530define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) { 1531; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask0: 1532; CHECK: # %bb.0: 1533; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1534; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 1535; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] 1536; CHECK-NEXT: vmovaps %xmm1, %xmm0 1537; CHECK-NEXT: retq 1538 %vec2 = load <4 x float>, <4 x float>* %vec2p 1539 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1540 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 1541 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 1542 ret <4 x float> %res 1543} 1544 1545define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) { 1546; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0: 1547; CHECK: # %bb.0: 1548; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 1549; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 1550; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] 1551; CHECK-NEXT: retq 1552 %vec2 = load <4 x float>, <4 x float>* %vec2p 1553 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1554 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 1555 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 1556 ret <4 x float> %res 1557} 1558 1559define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) { 1560; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask1: 1561; CHECK: # %bb.0: 1562; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1563; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 1564; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] 1565; CHECK-NEXT: vmovaps %xmm1, %xmm0 1566; CHECK-NEXT: retq 1567 %vec2 = load <4 x float>, <4 x float>* %vec2p 1568 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1569 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 1570 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 1571 ret <4 x float> %res 1572} 1573 1574define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) { 1575; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1: 1576; CHECK: # %bb.0: 1577; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 1578; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 1579; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] 1580; CHECK-NEXT: retq 1581 %vec2 = load <4 x float>, <4 x float>* %vec2p 1582 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1583 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 1584 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 1585 ret <4 x float> %res 1586} 1587 1588define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) { 1589; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask2: 1590; CHECK: # %bb.0: 1591; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1592; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 1593; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] 1594; CHECK-NEXT: vmovaps %xmm1, %xmm0 1595; CHECK-NEXT: retq 1596 %vec2 = load <4 x float>, <4 x float>* %vec2p 1597 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1598 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 1599 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 1600 ret <4 x float> %res 1601} 1602 1603define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) { 1604; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2: 1605; CHECK: # %bb.0: 1606; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 1607; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 1608; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] 1609; CHECK-NEXT: retq 1610 %vec2 = load <4 x float>, <4 x float>* %vec2p 1611 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1612 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 1613 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 1614 ret <4 x float> %res 1615} 1616 1617define <4 x float> @test_4xfloat_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p) { 1618; CHECK-LABEL: test_4xfloat_unpack_high_mem_mask3: 1619; CHECK: # %bb.0: 1620; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] 1621; CHECK-NEXT: retq 1622 %vec2 = load <4 x float>, <4 x float>* %vec2p 1623 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1624 ret <4 x float> %res 1625} 1626define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) { 1627; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask3: 1628; CHECK: # %bb.0: 1629; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1630; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 1631; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] 1632; CHECK-NEXT: vmovaps %xmm1, %xmm0 1633; CHECK-NEXT: retq 1634 %vec2 = load <4 x float>, <4 x float>* %vec2p 1635 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1636 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 1637 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 1638 ret <4 x float> %res 1639} 1640 1641define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) { 1642; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3: 1643; CHECK: # %bb.0: 1644; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 1645; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 1646; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] 1647; CHECK-NEXT: retq 1648 %vec2 = load <4 x float>, <4 x float>* %vec2p 1649 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1650 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer 1651 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 1652 ret <4 x float> %res 1653} 1654 1655define <8 x float> @test_8xfloat_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2) { 1656; CHECK-LABEL: test_8xfloat_unpack_high_mask0: 1657; CHECK: # %bb.0: 1658; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1659; CHECK-NEXT: retq 1660 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1661 ret <8 x float> %res 1662} 1663define <8 x float> @test_8xfloat_masked_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { 1664; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask0: 1665; CHECK: # %bb.0: 1666; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 1667; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 1668; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1669; CHECK-NEXT: vmovaps %ymm2, %ymm0 1670; CHECK-NEXT: retq 1671 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1672 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1673 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 1674 ret <8 x float> %res 1675} 1676 1677define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { 1678; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask0: 1679; CHECK: # %bb.0: 1680; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1681; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 1682; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1683; CHECK-NEXT: retq 1684 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1685 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1686 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 1687 ret <8 x float> %res 1688} 1689define <8 x float> @test_8xfloat_masked_unpack_high_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { 1690; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask1: 1691; CHECK: # %bb.0: 1692; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 1693; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 1694; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1695; CHECK-NEXT: vmovaps %ymm2, %ymm0 1696; CHECK-NEXT: retq 1697 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1698 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1699 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 1700 ret <8 x float> %res 1701} 1702 1703define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { 1704; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask1: 1705; CHECK: # %bb.0: 1706; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1707; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 1708; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1709; CHECK-NEXT: retq 1710 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1711 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1712 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 1713 ret <8 x float> %res 1714} 1715define <8 x float> @test_8xfloat_masked_unpack_high_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { 1716; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask2: 1717; CHECK: # %bb.0: 1718; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 1719; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 1720; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1721; CHECK-NEXT: vmovaps %ymm2, %ymm0 1722; CHECK-NEXT: retq 1723 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1724 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1725 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 1726 ret <8 x float> %res 1727} 1728 1729define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { 1730; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask2: 1731; CHECK: # %bb.0: 1732; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1733; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 1734; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1735; CHECK-NEXT: retq 1736 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1737 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1738 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 1739 ret <8 x float> %res 1740} 1741define <8 x float> @test_8xfloat_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2) { 1742; CHECK-LABEL: test_8xfloat_unpack_high_mask3: 1743; CHECK: # %bb.0: 1744; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1745; CHECK-NEXT: retq 1746 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1747 ret <8 x float> %res 1748} 1749define <8 x float> @test_8xfloat_masked_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { 1750; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask3: 1751; CHECK: # %bb.0: 1752; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 1753; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 1754; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1755; CHECK-NEXT: vmovaps %ymm2, %ymm0 1756; CHECK-NEXT: retq 1757 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1758 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1759 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 1760 ret <8 x float> %res 1761} 1762 1763define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { 1764; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask3: 1765; CHECK: # %bb.0: 1766; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1767; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 1768; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1769; CHECK-NEXT: retq 1770 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1771 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1772 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 1773 ret <8 x float> %res 1774} 1775define <8 x float> @test_8xfloat_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) { 1776; CHECK-LABEL: test_8xfloat_unpack_high_mem_mask0: 1777; CHECK: # %bb.0: 1778; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] 1779; CHECK-NEXT: retq 1780 %vec2 = load <8 x float>, <8 x float>* %vec2p 1781 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1782 ret <8 x float> %res 1783} 1784define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { 1785; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask0: 1786; CHECK: # %bb.0: 1787; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1788; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 1789; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] 1790; CHECK-NEXT: vmovaps %ymm1, %ymm0 1791; CHECK-NEXT: retq 1792 %vec2 = load <8 x float>, <8 x float>* %vec2p 1793 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1794 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1795 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 1796 ret <8 x float> %res 1797} 1798 1799define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { 1800; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0: 1801; CHECK: # %bb.0: 1802; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 1803; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 1804; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] 1805; CHECK-NEXT: retq 1806 %vec2 = load <8 x float>, <8 x float>* %vec2p 1807 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1808 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1809 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 1810 ret <8 x float> %res 1811} 1812 1813define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { 1814; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask1: 1815; CHECK: # %bb.0: 1816; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1817; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 1818; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] 1819; CHECK-NEXT: vmovaps %ymm1, %ymm0 1820; CHECK-NEXT: retq 1821 %vec2 = load <8 x float>, <8 x float>* %vec2p 1822 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1823 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1824 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 1825 ret <8 x float> %res 1826} 1827 1828define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { 1829; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1: 1830; CHECK: # %bb.0: 1831; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 1832; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 1833; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] 1834; CHECK-NEXT: retq 1835 %vec2 = load <8 x float>, <8 x float>* %vec2p 1836 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1837 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1838 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 1839 ret <8 x float> %res 1840} 1841 1842define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { 1843; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask2: 1844; CHECK: # %bb.0: 1845; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1846; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 1847; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] 1848; CHECK-NEXT: vmovaps %ymm1, %ymm0 1849; CHECK-NEXT: retq 1850 %vec2 = load <8 x float>, <8 x float>* %vec2p 1851 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1852 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1853 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 1854 ret <8 x float> %res 1855} 1856 1857define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { 1858; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2: 1859; CHECK: # %bb.0: 1860; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 1861; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 1862; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] 1863; CHECK-NEXT: retq 1864 %vec2 = load <8 x float>, <8 x float>* %vec2p 1865 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1866 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1867 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 1868 ret <8 x float> %res 1869} 1870 1871define <8 x float> @test_8xfloat_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) { 1872; CHECK-LABEL: test_8xfloat_unpack_high_mem_mask3: 1873; CHECK: # %bb.0: 1874; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] 1875; CHECK-NEXT: retq 1876 %vec2 = load <8 x float>, <8 x float>* %vec2p 1877 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1878 ret <8 x float> %res 1879} 1880define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { 1881; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask3: 1882; CHECK: # %bb.0: 1883; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1884; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 1885; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] 1886; CHECK-NEXT: vmovaps %ymm1, %ymm0 1887; CHECK-NEXT: retq 1888 %vec2 = load <8 x float>, <8 x float>* %vec2p 1889 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1890 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1891 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 1892 ret <8 x float> %res 1893} 1894 1895define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { 1896; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3: 1897; CHECK: # %bb.0: 1898; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 1899; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 1900; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] 1901; CHECK-NEXT: retq 1902 %vec2 = load <8 x float>, <8 x float>* %vec2p 1903 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1904 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1905 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 1906 ret <8 x float> %res 1907} 1908 1909define <16 x float> @test_16xfloat_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2) { 1910; CHECK-LABEL: test_16xfloat_unpack_high_mask0: 1911; CHECK: # %bb.0: 1912; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 1913; CHECK-NEXT: retq 1914 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 1915 ret <16 x float> %res 1916} 1917define <16 x float> @test_16xfloat_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { 1918; CHECK-LABEL: test_16xfloat_masked_unpack_high_mask0: 1919; CHECK: # %bb.0: 1920; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 1921; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 1922; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 1923; CHECK-NEXT: vmovaps %zmm2, %zmm0 1924; CHECK-NEXT: retq 1925 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 1926 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1927 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 1928 ret <16 x float> %res 1929} 1930 1931define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { 1932; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mask0: 1933; CHECK: # %bb.0: 1934; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1935; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 1936; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 1937; CHECK-NEXT: retq 1938 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 1939 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1940 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 1941 ret <16 x float> %res 1942} 1943define <16 x float> @test_16xfloat_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { 1944; CHECK-LABEL: test_16xfloat_masked_unpack_high_mask1: 1945; CHECK: # %bb.0: 1946; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 1947; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 1948; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 1949; CHECK-NEXT: vmovaps %zmm2, %zmm0 1950; CHECK-NEXT: retq 1951 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 1952 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1953 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 1954 ret <16 x float> %res 1955} 1956 1957define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { 1958; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mask1: 1959; CHECK: # %bb.0: 1960; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1961; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 1962; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 1963; CHECK-NEXT: retq 1964 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 1965 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1966 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 1967 ret <16 x float> %res 1968} 1969define <16 x float> @test_16xfloat_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { 1970; CHECK-LABEL: test_16xfloat_masked_unpack_high_mask2: 1971; CHECK: # %bb.0: 1972; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 1973; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 1974; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 1975; CHECK-NEXT: vmovaps %zmm2, %zmm0 1976; CHECK-NEXT: retq 1977 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 1978 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1979 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 1980 ret <16 x float> %res 1981} 1982 1983define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { 1984; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mask2: 1985; CHECK: # %bb.0: 1986; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1987; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 1988; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 1989; CHECK-NEXT: retq 1990 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 1991 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1992 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 1993 ret <16 x float> %res 1994} 1995define <16 x float> @test_16xfloat_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2) { 1996; CHECK-LABEL: test_16xfloat_unpack_high_mask3: 1997; CHECK: # %bb.0: 1998; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 1999; CHECK-NEXT: retq 2000 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 2001 ret <16 x float> %res 2002} 2003define <16 x float> @test_16xfloat_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { 2004; CHECK-LABEL: test_16xfloat_masked_unpack_high_mask3: 2005; CHECK: # %bb.0: 2006; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 2007; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 2008; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 2009; CHECK-NEXT: vmovaps %zmm2, %zmm0 2010; CHECK-NEXT: retq 2011 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 2012 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2013 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 2014 ret <16 x float> %res 2015} 2016 2017define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { 2018; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mask3: 2019; CHECK: # %bb.0: 2020; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 2021; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 2022; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 2023; CHECK-NEXT: retq 2024 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 2025 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2026 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 2027 ret <16 x float> %res 2028} 2029define <16 x float> @test_16xfloat_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) { 2030; CHECK-LABEL: test_16xfloat_unpack_high_mem_mask0: 2031; CHECK: # %bb.0: 2032; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] 2033; CHECK-NEXT: retq 2034 %vec2 = load <16 x float>, <16 x float>* %vec2p 2035 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 2036 ret <16 x float> %res 2037} 2038define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { 2039; CHECK-LABEL: test_16xfloat_masked_unpack_high_mem_mask0: 2040; CHECK: # %bb.0: 2041; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 2042; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 2043; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] 2044; CHECK-NEXT: vmovaps %zmm1, %zmm0 2045; CHECK-NEXT: retq 2046 %vec2 = load <16 x float>, <16 x float>* %vec2p 2047 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 2048 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2049 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 2050 ret <16 x float> %res 2051} 2052 2053define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { 2054; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0: 2055; CHECK: # %bb.0: 2056; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 2057; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 2058; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] 2059; CHECK-NEXT: retq 2060 %vec2 = load <16 x float>, <16 x float>* %vec2p 2061 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 2062 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2063 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 2064 ret <16 x float> %res 2065} 2066 2067define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { 2068; CHECK-LABEL: test_16xfloat_masked_unpack_high_mem_mask1: 2069; CHECK: # %bb.0: 2070; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 2071; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 2072; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] 2073; CHECK-NEXT: vmovaps %zmm1, %zmm0 2074; CHECK-NEXT: retq 2075 %vec2 = load <16 x float>, <16 x float>* %vec2p 2076 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 2077 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2078 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 2079 ret <16 x float> %res 2080} 2081 2082define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { 2083; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1: 2084; CHECK: # %bb.0: 2085; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 2086; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 2087; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] 2088; CHECK-NEXT: retq 2089 %vec2 = load <16 x float>, <16 x float>* %vec2p 2090 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 2091 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2092 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 2093 ret <16 x float> %res 2094} 2095 2096define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { 2097; CHECK-LABEL: test_16xfloat_masked_unpack_high_mem_mask2: 2098; CHECK: # %bb.0: 2099; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 2100; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 2101; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] 2102; CHECK-NEXT: vmovaps %zmm1, %zmm0 2103; CHECK-NEXT: retq 2104 %vec2 = load <16 x float>, <16 x float>* %vec2p 2105 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 2106 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2107 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 2108 ret <16 x float> %res 2109} 2110 2111define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { 2112; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2: 2113; CHECK: # %bb.0: 2114; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 2115; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 2116; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] 2117; CHECK-NEXT: retq 2118 %vec2 = load <16 x float>, <16 x float>* %vec2p 2119 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 2120 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2121 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 2122 ret <16 x float> %res 2123} 2124 2125define <16 x float> @test_16xfloat_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) { 2126; CHECK-LABEL: test_16xfloat_unpack_high_mem_mask3: 2127; CHECK: # %bb.0: 2128; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] 2129; CHECK-NEXT: retq 2130 %vec2 = load <16 x float>, <16 x float>* %vec2p 2131 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 2132 ret <16 x float> %res 2133} 2134define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { 2135; CHECK-LABEL: test_16xfloat_masked_unpack_high_mem_mask3: 2136; CHECK: # %bb.0: 2137; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 2138; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 2139; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] 2140; CHECK-NEXT: vmovaps %zmm1, %zmm0 2141; CHECK-NEXT: retq 2142 %vec2 = load <16 x float>, <16 x float>* %vec2p 2143 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 2144 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2145 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 2146 ret <16 x float> %res 2147} 2148 2149define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { 2150; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3: 2151; CHECK: # %bb.0: 2152; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 2153; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 2154; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] 2155; CHECK-NEXT: retq 2156 %vec2 = load <16 x float>, <16 x float>* %vec2p 2157 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 2158 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2159 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 2160 ret <16 x float> %res 2161} 2162 2163define <2 x double> @test_2xdouble_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2) { 2164; CHECK-LABEL: test_2xdouble_unpack_high_mask0: 2165; CHECK: # %bb.0: 2166; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 2167; CHECK-NEXT: retq 2168 %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 2169 ret <2 x double> %res 2170} 2171define <2 x double> @test_2xdouble_masked_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) { 2172; CHECK-LABEL: test_2xdouble_masked_unpack_high_mask0: 2173; CHECK: # %bb.0: 2174; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 2175; CHECK-NEXT: vcmpeqpd %xmm4, %xmm3, %k1 2176; CHECK-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] 2177; CHECK-NEXT: vmovapd %xmm2, %xmm0 2178; CHECK-NEXT: retq 2179 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 2180 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer 2181 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 2182 ret <2 x double> %res 2183} 2184 2185define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) { 2186; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mask0: 2187; CHECK: # %bb.0: 2188; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2189; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 2190; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] 2191; CHECK-NEXT: retq 2192 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 2193 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer 2194 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 2195 ret <2 x double> %res 2196} 2197define <2 x double> @test_2xdouble_masked_unpack_high_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) { 2198; CHECK-LABEL: test_2xdouble_masked_unpack_high_mask1: 2199; CHECK: # %bb.0: 2200; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 2201; CHECK-NEXT: vcmpeqpd %xmm4, %xmm3, %k1 2202; CHECK-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] 2203; CHECK-NEXT: vmovapd %xmm2, %xmm0 2204; CHECK-NEXT: retq 2205 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 2206 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer 2207 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 2208 ret <2 x double> %res 2209} 2210 2211define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) { 2212; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mask1: 2213; CHECK: # %bb.0: 2214; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2215; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 2216; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] 2217; CHECK-NEXT: retq 2218 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 2219 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer 2220 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 2221 ret <2 x double> %res 2222} 2223define <2 x double> @test_2xdouble_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p) { 2224; CHECK-LABEL: test_2xdouble_unpack_high_mem_mask0: 2225; CHECK: # %bb.0: 2226; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] 2227; CHECK-NEXT: retq 2228 %vec2 = load <2 x double>, <2 x double>* %vec2p 2229 %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 2230 ret <2 x double> %res 2231} 2232define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x double> %mask) { 2233; CHECK-LABEL: test_2xdouble_masked_unpack_high_mem_mask0: 2234; CHECK: # %bb.0: 2235; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2236; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 2237; CHECK-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] 2238; CHECK-NEXT: vmovapd %xmm1, %xmm0 2239; CHECK-NEXT: retq 2240 %vec2 = load <2 x double>, <2 x double>* %vec2p 2241 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 2242 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer 2243 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 2244 ret <2 x double> %res 2245} 2246 2247define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %mask) { 2248; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0: 2249; CHECK: # %bb.0: 2250; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2251; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1 2252; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] 2253; CHECK-NEXT: retq 2254 %vec2 = load <2 x double>, <2 x double>* %vec2p 2255 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 2256 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer 2257 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 2258 ret <2 x double> %res 2259} 2260 2261define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x double> %mask) { 2262; CHECK-LABEL: test_2xdouble_masked_unpack_high_mem_mask1: 2263; CHECK: # %bb.0: 2264; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2265; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 2266; CHECK-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] 2267; CHECK-NEXT: vmovapd %xmm1, %xmm0 2268; CHECK-NEXT: retq 2269 %vec2 = load <2 x double>, <2 x double>* %vec2p 2270 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 2271 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer 2272 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 2273 ret <2 x double> %res 2274} 2275 2276define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %mask) { 2277; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1: 2278; CHECK: # %bb.0: 2279; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2280; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1 2281; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] 2282; CHECK-NEXT: retq 2283 %vec2 = load <2 x double>, <2 x double>* %vec2p 2284 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 2285 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer 2286 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 2287 ret <2 x double> %res 2288} 2289 2290define <4 x double> @test_4xdouble_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2) { 2291; CHECK-LABEL: test_4xdouble_unpack_high_mask0: 2292; CHECK: # %bb.0: 2293; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2294; CHECK-NEXT: retq 2295 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 2296 ret <4 x double> %res 2297} 2298define <4 x double> @test_4xdouble_masked_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { 2299; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask0: 2300; CHECK: # %bb.0: 2301; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 2302; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 2303; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2304; CHECK-NEXT: vmovapd %ymm2, %ymm0 2305; CHECK-NEXT: retq 2306 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 2307 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2308 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 2309 ret <4 x double> %res 2310} 2311 2312define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { 2313; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask0: 2314; CHECK: # %bb.0: 2315; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2316; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 2317; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2318; CHECK-NEXT: retq 2319 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 2320 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2321 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 2322 ret <4 x double> %res 2323} 2324define <4 x double> @test_4xdouble_masked_unpack_high_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { 2325; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask1: 2326; CHECK: # %bb.0: 2327; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 2328; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 2329; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2330; CHECK-NEXT: vmovapd %ymm2, %ymm0 2331; CHECK-NEXT: retq 2332 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 2333 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2334 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 2335 ret <4 x double> %res 2336} 2337 2338define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { 2339; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask1: 2340; CHECK: # %bb.0: 2341; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2342; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 2343; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2344; CHECK-NEXT: retq 2345 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 2346 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2347 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 2348 ret <4 x double> %res 2349} 2350define <4 x double> @test_4xdouble_masked_unpack_high_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { 2351; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask2: 2352; CHECK: # %bb.0: 2353; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 2354; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 2355; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2356; CHECK-NEXT: vmovapd %ymm2, %ymm0 2357; CHECK-NEXT: retq 2358 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 2359 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2360 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 2361 ret <4 x double> %res 2362} 2363 2364define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { 2365; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask2: 2366; CHECK: # %bb.0: 2367; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2368; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 2369; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2370; CHECK-NEXT: retq 2371 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 2372 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2373 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 2374 ret <4 x double> %res 2375} 2376define <4 x double> @test_4xdouble_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2) { 2377; CHECK-LABEL: test_4xdouble_unpack_high_mask3: 2378; CHECK: # %bb.0: 2379; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2380; CHECK-NEXT: retq 2381 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 2382 ret <4 x double> %res 2383} 2384define <4 x double> @test_4xdouble_masked_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { 2385; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask3: 2386; CHECK: # %bb.0: 2387; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 2388; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 2389; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2390; CHECK-NEXT: vmovapd %ymm2, %ymm0 2391; CHECK-NEXT: retq 2392 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 2393 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2394 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 2395 ret <4 x double> %res 2396} 2397 2398define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { 2399; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask3: 2400; CHECK: # %bb.0: 2401; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2402; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 2403; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2404; CHECK-NEXT: retq 2405 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 2406 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2407 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 2408 ret <4 x double> %res 2409} 2410define <4 x double> @test_4xdouble_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) { 2411; CHECK-LABEL: test_4xdouble_unpack_high_mem_mask0: 2412; CHECK: # %bb.0: 2413; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] 2414; CHECK-NEXT: retq 2415 %vec2 = load <4 x double>, <4 x double>* %vec2p 2416 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 2417 ret <4 x double> %res 2418} 2419define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { 2420; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask0: 2421; CHECK: # %bb.0: 2422; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2423; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 2424; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] 2425; CHECK-NEXT: vmovapd %ymm1, %ymm0 2426; CHECK-NEXT: retq 2427 %vec2 = load <4 x double>, <4 x double>* %vec2p 2428 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 2429 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2430 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 2431 ret <4 x double> %res 2432} 2433 2434define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { 2435; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0: 2436; CHECK: # %bb.0: 2437; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2438; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 2439; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] 2440; CHECK-NEXT: retq 2441 %vec2 = load <4 x double>, <4 x double>* %vec2p 2442 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 2443 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2444 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 2445 ret <4 x double> %res 2446} 2447 2448define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { 2449; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask1: 2450; CHECK: # %bb.0: 2451; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2452; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 2453; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] 2454; CHECK-NEXT: vmovapd %ymm1, %ymm0 2455; CHECK-NEXT: retq 2456 %vec2 = load <4 x double>, <4 x double>* %vec2p 2457 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 2458 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2459 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 2460 ret <4 x double> %res 2461} 2462 2463define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { 2464; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1: 2465; CHECK: # %bb.0: 2466; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2467; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 2468; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] 2469; CHECK-NEXT: retq 2470 %vec2 = load <4 x double>, <4 x double>* %vec2p 2471 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 2472 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2473 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 2474 ret <4 x double> %res 2475} 2476 2477define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { 2478; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask2: 2479; CHECK: # %bb.0: 2480; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2481; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 2482; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] 2483; CHECK-NEXT: vmovapd %ymm1, %ymm0 2484; CHECK-NEXT: retq 2485 %vec2 = load <4 x double>, <4 x double>* %vec2p 2486 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 2487 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2488 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 2489 ret <4 x double> %res 2490} 2491 2492define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { 2493; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2: 2494; CHECK: # %bb.0: 2495; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2496; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 2497; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] 2498; CHECK-NEXT: retq 2499 %vec2 = load <4 x double>, <4 x double>* %vec2p 2500 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 2501 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2502 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 2503 ret <4 x double> %res 2504} 2505 2506define <4 x double> @test_4xdouble_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) { 2507; CHECK-LABEL: test_4xdouble_unpack_high_mem_mask3: 2508; CHECK: # %bb.0: 2509; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] 2510; CHECK-NEXT: retq 2511 %vec2 = load <4 x double>, <4 x double>* %vec2p 2512 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 2513 ret <4 x double> %res 2514} 2515define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { 2516; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask3: 2517; CHECK: # %bb.0: 2518; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2519; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 2520; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] 2521; CHECK-NEXT: vmovapd %ymm1, %ymm0 2522; CHECK-NEXT: retq 2523 %vec2 = load <4 x double>, <4 x double>* %vec2p 2524 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 2525 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2526 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 2527 ret <4 x double> %res 2528} 2529 2530define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { 2531; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3: 2532; CHECK: # %bb.0: 2533; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2534; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 2535; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] 2536; CHECK-NEXT: retq 2537 %vec2 = load <4 x double>, <4 x double>* %vec2p 2538 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 2539 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2540 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 2541 ret <4 x double> %res 2542} 2543 2544define <8 x double> @test_8xdouble_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2) { 2545; CHECK-LABEL: test_8xdouble_unpack_high_mask0: 2546; CHECK: # %bb.0: 2547; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 2548; CHECK-NEXT: retq 2549 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 2550 ret <8 x double> %res 2551} 2552define <8 x double> @test_8xdouble_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { 2553; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask0: 2554; CHECK: # %bb.0: 2555; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 2556; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 2557; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 2558; CHECK-NEXT: vmovapd %zmm2, %zmm0 2559; CHECK-NEXT: retq 2560 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 2561 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2562 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 2563 ret <8 x double> %res 2564} 2565 2566define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { 2567; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask0: 2568; CHECK: # %bb.0: 2569; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2570; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 2571; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 2572; CHECK-NEXT: retq 2573 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 2574 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2575 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 2576 ret <8 x double> %res 2577} 2578define <8 x double> @test_8xdouble_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { 2579; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask1: 2580; CHECK: # %bb.0: 2581; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 2582; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 2583; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 2584; CHECK-NEXT: vmovapd %zmm2, %zmm0 2585; CHECK-NEXT: retq 2586 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 2587 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2588 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 2589 ret <8 x double> %res 2590} 2591 2592define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { 2593; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask1: 2594; CHECK: # %bb.0: 2595; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2596; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 2597; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 2598; CHECK-NEXT: retq 2599 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 2600 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2601 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 2602 ret <8 x double> %res 2603} 2604define <8 x double> @test_8xdouble_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { 2605; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask2: 2606; CHECK: # %bb.0: 2607; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 2608; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 2609; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 2610; CHECK-NEXT: vmovapd %zmm2, %zmm0 2611; CHECK-NEXT: retq 2612 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 2613 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2614 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 2615 ret <8 x double> %res 2616} 2617 2618define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { 2619; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask2: 2620; CHECK: # %bb.0: 2621; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2622; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 2623; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 2624; CHECK-NEXT: retq 2625 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 2626 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2627 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 2628 ret <8 x double> %res 2629} 2630define <8 x double> @test_8xdouble_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2) { 2631; CHECK-LABEL: test_8xdouble_unpack_high_mask3: 2632; CHECK: # %bb.0: 2633; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 2634; CHECK-NEXT: retq 2635 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 2636 ret <8 x double> %res 2637} 2638define <8 x double> @test_8xdouble_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { 2639; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask3: 2640; CHECK: # %bb.0: 2641; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 2642; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 2643; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 2644; CHECK-NEXT: vmovapd %zmm2, %zmm0 2645; CHECK-NEXT: retq 2646 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 2647 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2648 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 2649 ret <8 x double> %res 2650} 2651 2652define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { 2653; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask3: 2654; CHECK: # %bb.0: 2655; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2656; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 2657; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 2658; CHECK-NEXT: retq 2659 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 2660 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2661 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 2662 ret <8 x double> %res 2663} 2664define <8 x double> @test_8xdouble_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) { 2665; CHECK-LABEL: test_8xdouble_unpack_high_mem_mask0: 2666; CHECK: # %bb.0: 2667; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] 2668; CHECK-NEXT: retq 2669 %vec2 = load <8 x double>, <8 x double>* %vec2p 2670 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 2671 ret <8 x double> %res 2672} 2673define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { 2674; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask0: 2675; CHECK: # %bb.0: 2676; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2677; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 2678; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] 2679; CHECK-NEXT: vmovapd %zmm1, %zmm0 2680; CHECK-NEXT: retq 2681 %vec2 = load <8 x double>, <8 x double>* %vec2p 2682 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 2683 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2684 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 2685 ret <8 x double> %res 2686} 2687 2688define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { 2689; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0: 2690; CHECK: # %bb.0: 2691; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2692; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 2693; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] 2694; CHECK-NEXT: retq 2695 %vec2 = load <8 x double>, <8 x double>* %vec2p 2696 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 2697 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2698 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 2699 ret <8 x double> %res 2700} 2701 2702define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { 2703; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask1: 2704; CHECK: # %bb.0: 2705; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2706; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 2707; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] 2708; CHECK-NEXT: vmovapd %zmm1, %zmm0 2709; CHECK-NEXT: retq 2710 %vec2 = load <8 x double>, <8 x double>* %vec2p 2711 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 2712 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2713 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 2714 ret <8 x double> %res 2715} 2716 2717define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { 2718; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1: 2719; CHECK: # %bb.0: 2720; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2721; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 2722; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] 2723; CHECK-NEXT: retq 2724 %vec2 = load <8 x double>, <8 x double>* %vec2p 2725 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 2726 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2727 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 2728 ret <8 x double> %res 2729} 2730 2731define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { 2732; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask2: 2733; CHECK: # %bb.0: 2734; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2735; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 2736; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] 2737; CHECK-NEXT: vmovapd %zmm1, %zmm0 2738; CHECK-NEXT: retq 2739 %vec2 = load <8 x double>, <8 x double>* %vec2p 2740 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 2741 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2742 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 2743 ret <8 x double> %res 2744} 2745 2746define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { 2747; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2: 2748; CHECK: # %bb.0: 2749; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2750; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 2751; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] 2752; CHECK-NEXT: retq 2753 %vec2 = load <8 x double>, <8 x double>* %vec2p 2754 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 2755 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2756 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 2757 ret <8 x double> %res 2758} 2759 2760define <8 x double> @test_8xdouble_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) { 2761; CHECK-LABEL: test_8xdouble_unpack_high_mem_mask3: 2762; CHECK: # %bb.0: 2763; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] 2764; CHECK-NEXT: retq 2765 %vec2 = load <8 x double>, <8 x double>* %vec2p 2766 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 2767 ret <8 x double> %res 2768} 2769define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { 2770; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask3: 2771; CHECK: # %bb.0: 2772; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2773; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 2774; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] 2775; CHECK-NEXT: vmovapd %zmm1, %zmm0 2776; CHECK-NEXT: retq 2777 %vec2 = load <8 x double>, <8 x double>* %vec2p 2778 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 2779 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2780 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 2781 ret <8 x double> %res 2782} 2783 2784define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { 2785; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3: 2786; CHECK: # %bb.0: 2787; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2788; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 2789; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] 2790; CHECK-NEXT: retq 2791 %vec2 = load <8 x double>, <8 x double>* %vec2p 2792 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 2793 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2794 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 2795 ret <8 x double> %res 2796} 2797 2798