1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s 3 4; FIXME: 128-bit shuffles of 256-bit vectors cases should be fixed by PR34359 5 6define <8 x float> @test_8xfloat_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2) { 7; CHECK-LABEL: test_8xfloat_shuff_mask0: 8; CHECK: # %bb.0: 9; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 10; CHECK-NEXT: retq 11 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 12 ret <8 x float> %res 13} 14define <8 x float> @test_8xfloat_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { 15; CHECK-LABEL: test_8xfloat_masked_shuff_mask0: 16; CHECK: # %bb.0: 17; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 18; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 19; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] 20; CHECK-NEXT: vmovaps %ymm2, %ymm0 21; CHECK-NEXT: retq 22 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 23 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 24 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 25 ret <8 x float> %res 26} 27 28define <8 x float> @test_8xfloat_zero_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { 29; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask0: 30; CHECK: # %bb.0: 31; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 32; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 33; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] 34; CHECK-NEXT: retq 35 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 36 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 37 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 38 ret <8 x float> %res 39} 40define <8 x float> @test_8xfloat_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { 41; CHECK-LABEL: test_8xfloat_masked_shuff_mask1: 42; CHECK: # %bb.0: 43; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 44; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 45; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] 46; CHECK-NEXT: vmovaps %ymm2, %ymm0 47; CHECK-NEXT: retq 48 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 49 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 50 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 51 ret <8 x float> %res 52} 53 54define <8 x float> @test_8xfloat_zero_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { 55; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask1: 56; CHECK: # %bb.0: 57; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 58; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 59; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] 60; CHECK-NEXT: retq 61 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 62 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 63 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 64 ret <8 x float> %res 65} 66define <8 x float> @test_8xfloat_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { 67; CHECK-LABEL: test_8xfloat_masked_shuff_mask2: 68; CHECK: # %bb.0: 69; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 70; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 71; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] 72; CHECK-NEXT: vmovaps %ymm2, %ymm0 73; CHECK-NEXT: retq 74 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 75 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 76 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 77 ret <8 x float> %res 78} 79 80define <8 x float> @test_8xfloat_zero_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { 81; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask2: 82; CHECK: # %bb.0: 83; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 84; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 85; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] 86; CHECK-NEXT: retq 87 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 88 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 89 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 90 ret <8 x float> %res 91} 92define <8 x float> @test_8xfloat_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2) { 93; CHECK-LABEL: test_8xfloat_shuff_mask3: 94; CHECK: # %bb.0: 95; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 96; CHECK-NEXT: retq 97 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 98 ret <8 x float> %res 99} 100define <8 x float> @test_8xfloat_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { 101; CHECK-LABEL: test_8xfloat_masked_shuff_mask3: 102; CHECK: # %bb.0: 103; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 104; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 105; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] 106; CHECK-NEXT: vmovaps %ymm2, %ymm0 107; CHECK-NEXT: retq 108 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 109 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 110 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 111 ret <8 x float> %res 112} 113 114define <8 x float> @test_8xfloat_zero_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { 115; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask3: 116; CHECK: # %bb.0: 117; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 118; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 119; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] 120; CHECK-NEXT: retq 121 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 122 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 123 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 124 ret <8 x float> %res 125} 126define <8 x float> @test_8xfloat_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) { 127; CHECK-LABEL: test_8xfloat_shuff_mem_mask0: 128; CHECK: # %bb.0: 129; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] 130; CHECK-NEXT: retq 131 %vec2 = load <8 x float>, <8 x float>* %vec2p 132 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 133 ret <8 x float> %res 134} 135define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { 136; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask0: 137; CHECK: # %bb.0: 138; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 139; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 140; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] 141; CHECK-NEXT: vmovaps %ymm1, %ymm0 142; CHECK-NEXT: retq 143 %vec2 = load <8 x float>, <8 x float>* %vec2p 144 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 145 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 146 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 147 ret <8 x float> %res 148} 149 150define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { 151; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0: 152; CHECK: # %bb.0: 153; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 154; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 155; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] 156; CHECK-NEXT: retq 157 %vec2 = load <8 x float>, <8 x float>* %vec2p 158 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 159 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 160 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 161 ret <8 x float> %res 162} 163 164define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { 165; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask1: 166; CHECK: # %bb.0: 167; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 168; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 169; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] 170; CHECK-NEXT: vmovaps %ymm1, %ymm0 171; CHECK-NEXT: retq 172 %vec2 = load <8 x float>, <8 x float>* %vec2p 173 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 174 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 175 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 176 ret <8 x float> %res 177} 178 179define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { 180; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1: 181; CHECK: # %bb.0: 182; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 183; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 184; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] 185; CHECK-NEXT: retq 186 %vec2 = load <8 x float>, <8 x float>* %vec2p 187 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 188 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 189 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 190 ret <8 x float> %res 191} 192 193define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { 194; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask2: 195; CHECK: # %bb.0: 196; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 197; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 198; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] 199; CHECK-NEXT: vmovaps %ymm1, %ymm0 200; CHECK-NEXT: retq 201 %vec2 = load <8 x float>, <8 x float>* %vec2p 202 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 203 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 204 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 205 ret <8 x float> %res 206} 207 208define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { 209; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2: 210; CHECK: # %bb.0: 211; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 212; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 213; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] 214; CHECK-NEXT: retq 215 %vec2 = load <8 x float>, <8 x float>* %vec2p 216 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 217 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 218 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 219 ret <8 x float> %res 220} 221 222define <8 x float> @test_8xfloat_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) { 223; CHECK-LABEL: test_8xfloat_shuff_mem_mask3: 224; CHECK: # %bb.0: 225; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] 226; CHECK-NEXT: retq 227 %vec2 = load <8 x float>, <8 x float>* %vec2p 228 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 229 ret <8 x float> %res 230} 231define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { 232; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask3: 233; CHECK: # %bb.0: 234; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 235; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 236; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] 237; CHECK-NEXT: vmovaps %ymm1, %ymm0 238; CHECK-NEXT: retq 239 %vec2 = load <8 x float>, <8 x float>* %vec2p 240 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 241 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 242 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 243 ret <8 x float> %res 244} 245 246define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { 247; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3: 248; CHECK: # %bb.0: 249; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 250; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 251; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] 252; CHECK-NEXT: retq 253 %vec2 = load <8 x float>, <8 x float>* %vec2p 254 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 255 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 256 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 257 ret <8 x float> %res 258} 259 260define <16 x float> @test_16xfloat_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2) { 261; CHECK-LABEL: test_16xfloat_shuff_mask0: 262; CHECK: # %bb.0: 263; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],zmm1[2,3,6,7] 264; CHECK-NEXT: retq 265 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 266 ret <16 x float> %res 267} 268define <16 x float> @test_16xfloat_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { 269; CHECK-LABEL: test_16xfloat_masked_shuff_mask0: 270; CHECK: # %bb.0: 271; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 272; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 273; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] 274; CHECK-NEXT: vmovaps %zmm2, %zmm0 275; CHECK-NEXT: retq 276 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 277 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 278 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 279 ret <16 x float> %res 280} 281 282define <16 x float> @test_16xfloat_zero_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { 283; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask0: 284; CHECK: # %bb.0: 285; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 286; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 287; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] 288; CHECK-NEXT: retq 289 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 290 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 291 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 292 ret <16 x float> %res 293} 294define <16 x float> @test_16xfloat_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { 295; CHECK-LABEL: test_16xfloat_masked_shuff_mask1: 296; CHECK: # %bb.0: 297; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 298; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 299; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] 300; CHECK-NEXT: vmovaps %zmm2, %zmm0 301; CHECK-NEXT: retq 302 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 28, i32 29, i32 30, i32 31> 303 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 304 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 305 ret <16 x float> %res 306} 307 308define <16 x float> @test_16xfloat_zero_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { 309; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask1: 310; CHECK: # %bb.0: 311; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 312; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 313; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] 314; CHECK-NEXT: retq 315 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 28, i32 29, i32 30, i32 31> 316 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 317 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 318 ret <16 x float> %res 319} 320define <16 x float> @test_16xfloat_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { 321; CHECK-LABEL: test_16xfloat_masked_shuff_mask2: 322; CHECK: # %bb.0: 323; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 324; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 325; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] 326; CHECK-NEXT: vmovaps %zmm2, %zmm0 327; CHECK-NEXT: retq 328 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 329 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 330 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 331 ret <16 x float> %res 332} 333 334define <16 x float> @test_16xfloat_zero_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { 335; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask2: 336; CHECK: # %bb.0: 337; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 338; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 339; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] 340; CHECK-NEXT: retq 341 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 342 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 343 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 344 ret <16 x float> %res 345} 346define <16 x float> @test_16xfloat_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2) { 347; CHECK-LABEL: test_16xfloat_shuff_mask3: 348; CHECK: # %bb.0: 349; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,6,7],zmm1[0,1,4,5] 350; CHECK-NEXT: retq 351 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27> 352 ret <16 x float> %res 353} 354define <16 x float> @test_16xfloat_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { 355; CHECK-LABEL: test_16xfloat_masked_shuff_mask3: 356; CHECK: # %bb.0: 357; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 358; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 359; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] 360; CHECK-NEXT: vmovaps %zmm2, %zmm0 361; CHECK-NEXT: retq 362 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27> 363 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 364 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 365 ret <16 x float> %res 366} 367 368define <16 x float> @test_16xfloat_zero_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { 369; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask3: 370; CHECK: # %bb.0: 371; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 372; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 373; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] 374; CHECK-NEXT: retq 375 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27> 376 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 377 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 378 ret <16 x float> %res 379} 380define <16 x float> @test_16xfloat_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) { 381; CHECK-LABEL: test_16xfloat_shuff_mem_mask0: 382; CHECK: # %bb.0: 383; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5],mem[4,5,2,3] 384; CHECK-NEXT: retq 385 %vec2 = load <16 x float>, <16 x float>* %vec2p 386 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 387 ret <16 x float> %res 388} 389define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { 390; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask0: 391; CHECK: # %bb.0: 392; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 393; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 394; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] 395; CHECK-NEXT: vmovaps %zmm1, %zmm0 396; CHECK-NEXT: retq 397 %vec2 = load <16 x float>, <16 x float>* %vec2p 398 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 399 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 400 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 401 ret <16 x float> %res 402} 403 404define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { 405; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0: 406; CHECK: # %bb.0: 407; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 408; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 409; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] 410; CHECK-NEXT: retq 411 %vec2 = load <16 x float>, <16 x float>* %vec2p 412 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 413 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 414 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 415 ret <16 x float> %res 416} 417 418define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { 419; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask1: 420; CHECK: # %bb.0: 421; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 422; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 423; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] 424; CHECK-NEXT: vmovaps %zmm1, %zmm0 425; CHECK-NEXT: retq 426 %vec2 = load <16 x float>, <16 x float>* %vec2p 427 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 428 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 429 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 430 ret <16 x float> %res 431} 432 433define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { 434; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1: 435; CHECK: # %bb.0: 436; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 437; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 438; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] 439; CHECK-NEXT: retq 440 %vec2 = load <16 x float>, <16 x float>* %vec2p 441 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 442 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 443 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 444 ret <16 x float> %res 445} 446 447define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { 448; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask2: 449; CHECK: # %bb.0: 450; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 451; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 452; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] 453; CHECK-NEXT: vmovaps %zmm1, %zmm0 454; CHECK-NEXT: retq 455 %vec2 = load <16 x float>, <16 x float>* %vec2p 456 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 24, i32 25, i32 26, i32 27> 457 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 458 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 459 ret <16 x float> %res 460} 461 462define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { 463; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2: 464; CHECK: # %bb.0: 465; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 466; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 467; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] 468; CHECK-NEXT: retq 469 %vec2 = load <16 x float>, <16 x float>* %vec2p 470 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 24, i32 25, i32 26, i32 27> 471 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 472 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 473 ret <16 x float> %res 474} 475 476define <16 x float> @test_16xfloat_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) { 477; CHECK-LABEL: test_16xfloat_shuff_mem_mask3: 478; CHECK: # %bb.0: 479; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,6,7] 480; CHECK-NEXT: retq 481 %vec2 = load <16 x float>, <16 x float>* %vec2p 482 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31> 483 ret <16 x float> %res 484} 485define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { 486; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask3: 487; CHECK: # %bb.0: 488; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 489; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 490; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] 491; CHECK-NEXT: vmovaps %zmm1, %zmm0 492; CHECK-NEXT: retq 493 %vec2 = load <16 x float>, <16 x float>* %vec2p 494 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31> 495 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 496 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 497 ret <16 x float> %res 498} 499 500define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { 501; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3: 502; CHECK: # %bb.0: 503; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 504; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 505; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] 506; CHECK-NEXT: retq 507 %vec2 = load <16 x float>, <16 x float>* %vec2p 508 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31> 509 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 510 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 511 ret <16 x float> %res 512} 513 514define <4 x double> @test_4xdouble_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2) { 515; CHECK-LABEL: test_4xdouble_shuff_mask0: 516; CHECK: # %bb.0: 517; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 518; CHECK-NEXT: retq 519 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 520 ret <4 x double> %res 521} 522define <4 x double> @test_4xdouble_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { 523; CHECK-LABEL: test_4xdouble_masked_shuff_mask0: 524; CHECK: # %bb.0: 525; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 526; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 527; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] 528; CHECK-NEXT: vmovapd %ymm2, %ymm0 529; CHECK-NEXT: retq 530 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 531 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 532 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 533 ret <4 x double> %res 534} 535 536define <4 x double> @test_4xdouble_zero_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { 537; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask0: 538; CHECK: # %bb.0: 539; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 540; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 541; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] 542; CHECK-NEXT: retq 543 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 544 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 545 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 546 ret <4 x double> %res 547} 548define <4 x double> @test_4xdouble_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { 549; CHECK-LABEL: test_4xdouble_masked_shuff_mask1: 550; CHECK: # %bb.0: 551; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 552; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 553; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] 554; CHECK-NEXT: vmovapd %ymm2, %ymm0 555; CHECK-NEXT: retq 556 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 557 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 558 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 559 ret <4 x double> %res 560} 561 562define <4 x double> @test_4xdouble_zero_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { 563; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask1: 564; CHECK: # %bb.0: 565; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 566; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 567; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] 568; CHECK-NEXT: retq 569 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 570 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 571 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 572 ret <4 x double> %res 573} 574define <4 x double> @test_4xdouble_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { 575; CHECK-LABEL: test_4xdouble_masked_shuff_mask2: 576; CHECK: # %bb.0: 577; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 578; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 579; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] 580; CHECK-NEXT: vmovapd %ymm2, %ymm0 581; CHECK-NEXT: retq 582 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 583 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 584 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 585 ret <4 x double> %res 586} 587 588define <4 x double> @test_4xdouble_zero_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { 589; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask2: 590; CHECK: # %bb.0: 591; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 592; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 593; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] 594; CHECK-NEXT: retq 595 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 596 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 597 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 598 ret <4 x double> %res 599} 600define <4 x double> @test_4xdouble_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2) { 601; CHECK-LABEL: test_4xdouble_shuff_mask3: 602; CHECK: # %bb.0: 603; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 604; CHECK-NEXT: retq 605 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 606 ret <4 x double> %res 607} 608define <4 x double> @test_4xdouble_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { 609; CHECK-LABEL: test_4xdouble_masked_shuff_mask3: 610; CHECK: # %bb.0: 611; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 612; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 613; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] 614; CHECK-NEXT: vmovapd %ymm2, %ymm0 615; CHECK-NEXT: retq 616 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 617 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 618 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 619 ret <4 x double> %res 620} 621 622define <4 x double> @test_4xdouble_zero_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { 623; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask3: 624; CHECK: # %bb.0: 625; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 626; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 627; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] 628; CHECK-NEXT: retq 629 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 630 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 631 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 632 ret <4 x double> %res 633} 634define <4 x double> @test_4xdouble_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) { 635; CHECK-LABEL: test_4xdouble_shuff_mem_mask0: 636; CHECK: # %bb.0: 637; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] 638; CHECK-NEXT: retq 639 %vec2 = load <4 x double>, <4 x double>* %vec2p 640 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 641 ret <4 x double> %res 642} 643define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { 644; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask0: 645; CHECK: # %bb.0: 646; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 647; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 648; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] 649; CHECK-NEXT: vmovapd %ymm1, %ymm0 650; CHECK-NEXT: retq 651 %vec2 = load <4 x double>, <4 x double>* %vec2p 652 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 653 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 654 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 655 ret <4 x double> %res 656} 657 658define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { 659; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0: 660; CHECK: # %bb.0: 661; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 662; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 663; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] 664; CHECK-NEXT: retq 665 %vec2 = load <4 x double>, <4 x double>* %vec2p 666 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 667 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 668 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 669 ret <4 x double> %res 670} 671 672define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { 673; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask1: 674; CHECK: # %bb.0: 675; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 676; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 677; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] 678; CHECK-NEXT: vmovapd %ymm1, %ymm0 679; CHECK-NEXT: retq 680 %vec2 = load <4 x double>, <4 x double>* %vec2p 681 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 682 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 683 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 684 ret <4 x double> %res 685} 686 687define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { 688; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1: 689; CHECK: # %bb.0: 690; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 691; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 692; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] 693; CHECK-NEXT: retq 694 %vec2 = load <4 x double>, <4 x double>* %vec2p 695 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 696 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 697 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 698 ret <4 x double> %res 699} 700 701define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { 702; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask2: 703; CHECK: # %bb.0: 704; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 705; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 706; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] 707; CHECK-NEXT: vmovapd %ymm1, %ymm0 708; CHECK-NEXT: retq 709 %vec2 = load <4 x double>, <4 x double>* %vec2p 710 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 711 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 712 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 713 ret <4 x double> %res 714} 715 716define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { 717; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2: 718; CHECK: # %bb.0: 719; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 720; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 721; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] 722; CHECK-NEXT: retq 723 %vec2 = load <4 x double>, <4 x double>* %vec2p 724 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 725 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 726 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 727 ret <4 x double> %res 728} 729 730define <4 x double> @test_4xdouble_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) { 731; CHECK-LABEL: test_4xdouble_shuff_mem_mask3: 732; CHECK: # %bb.0: 733; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] 734; CHECK-NEXT: retq 735 %vec2 = load <4 x double>, <4 x double>* %vec2p 736 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 737 ret <4 x double> %res 738} 739define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { 740; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask3: 741; CHECK: # %bb.0: 742; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 743; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 744; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] 745; CHECK-NEXT: vmovapd %ymm1, %ymm0 746; CHECK-NEXT: retq 747 %vec2 = load <4 x double>, <4 x double>* %vec2p 748 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 749 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 750 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 751 ret <4 x double> %res 752} 753 754define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { 755; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3: 756; CHECK: # %bb.0: 757; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 758; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 759; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] 760; CHECK-NEXT: retq 761 %vec2 = load <4 x double>, <4 x double>* %vec2p 762 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 763 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 764 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 765 ret <4 x double> %res 766} 767 768define <8 x double> @test_8xdouble_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2) { 769; CHECK-LABEL: test_8xdouble_shuff_mask0: 770; CHECK: # %bb.0: 771; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,2,3],zmm1[6,7,0,1] 772; CHECK-NEXT: retq 773 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 2, i32 3, i32 14, i32 15, i32 8, i32 9> 774 ret <8 x double> %res 775} 776define <8 x double> @test_8xdouble_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { 777; CHECK-LABEL: test_8xdouble_masked_shuff_mask0: 778; CHECK: # %bb.0: 779; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 780; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 781; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1] 782; CHECK-NEXT: vmovapd %zmm2, %zmm0 783; CHECK-NEXT: retq 784 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 2, i32 3, i32 14, i32 15, i32 8, i32 9> 785 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 786 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 787 ret <8 x double> %res 788} 789 790define <8 x double> @test_8xdouble_zero_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { 791; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask0: 792; CHECK: # %bb.0: 793; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 794; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 795; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1] 796; CHECK-NEXT: retq 797 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 2, i32 3, i32 14, i32 15, i32 8, i32 9> 798 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 799 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 800 ret <8 x double> %res 801} 802define <8 x double> @test_8xdouble_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { 803; CHECK-LABEL: test_8xdouble_masked_shuff_mask1: 804; CHECK: # %bb.0: 805; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 806; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 807; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5] 808; CHECK-NEXT: vmovapd %zmm2, %zmm0 809; CHECK-NEXT: retq 810 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13> 811 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 812 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 813 ret <8 x double> %res 814} 815 816define <8 x double> @test_8xdouble_zero_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { 817; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask1: 818; CHECK: # %bb.0: 819; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 820; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 821; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5] 822; CHECK-NEXT: retq 823 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13> 824 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 825 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 826 ret <8 x double> %res 827} 828define <8 x double> @test_8xdouble_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { 829; CHECK-LABEL: test_8xdouble_masked_shuff_mask2: 830; CHECK: # %bb.0: 831; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 832; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 833; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1] 834; CHECK-NEXT: vmovapd %zmm2, %zmm0 835; CHECK-NEXT: retq 836 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 12, i32 13, i32 8, i32 9> 837 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 838 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 839 ret <8 x double> %res 840} 841 842define <8 x double> @test_8xdouble_zero_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { 843; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask2: 844; CHECK: # %bb.0: 845; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 846; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 847; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1] 848; CHECK-NEXT: retq 849 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 12, i32 13, i32 8, i32 9> 850 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 851 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 852 ret <8 x double> %res 853} 854define <8 x double> @test_8xdouble_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2) { 855; CHECK-LABEL: test_8xdouble_shuff_mask3: 856; CHECK: # %bb.0: 857; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,2,3] 858; CHECK-NEXT: retq 859 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 10, i32 11> 860 ret <8 x double> %res 861} 862define <8 x double> @test_8xdouble_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { 863; CHECK-LABEL: test_8xdouble_masked_shuff_mask3: 864; CHECK: # %bb.0: 865; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 866; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 867; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3] 868; CHECK-NEXT: vmovapd %zmm2, %zmm0 869; CHECK-NEXT: retq 870 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 10, i32 11> 871 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 872 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 873 ret <8 x double> %res 874} 875 876define <8 x double> @test_8xdouble_zero_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { 877; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask3: 878; CHECK: # %bb.0: 879; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 880; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 881; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3] 882; CHECK-NEXT: retq 883 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 10, i32 11> 884 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 885 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 886 ret <8 x double> %res 887} 888define <8 x double> @test_8xdouble_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) { 889; CHECK-LABEL: test_8xdouble_shuff_mem_mask0: 890; CHECK: # %bb.0: 891; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1] 892; CHECK-NEXT: retq 893 %vec2 = load <8 x double>, <8 x double>* %vec2p 894 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 8, i32 9, i32 8, i32 9> 895 ret <8 x double> %res 896} 897define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { 898; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask0: 899; CHECK: # %bb.0: 900; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 901; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 902; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] 903; CHECK-NEXT: vmovapd %zmm1, %zmm0 904; CHECK-NEXT: retq 905 %vec2 = load <8 x double>, <8 x double>* %vec2p 906 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 8, i32 9, i32 8, i32 9> 907 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 908 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 909 ret <8 x double> %res 910} 911 912define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { 913; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0: 914; CHECK: # %bb.0: 915; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 916; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 917; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] 918; CHECK-NEXT: retq 919 %vec2 = load <8 x double>, <8 x double>* %vec2p 920 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 8, i32 9, i32 8, i32 9> 921 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 922 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 923 ret <8 x double> %res 924} 925 926define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { 927; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask1: 928; CHECK: # %bb.0: 929; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 930; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 931; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] 932; CHECK-NEXT: vmovapd %zmm1, %zmm0 933; CHECK-NEXT: retq 934 %vec2 = load <8 x double>, <8 x double>* %vec2p 935 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 936 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 937 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 938 ret <8 x double> %res 939} 940 941define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { 942; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1: 943; CHECK: # %bb.0: 944; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 945; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 946; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] 947; CHECK-NEXT: retq 948 %vec2 = load <8 x double>, <8 x double>* %vec2p 949 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 950 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 951 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 952 ret <8 x double> %res 953} 954 955define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { 956; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask2: 957; CHECK: # %bb.0: 958; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 959; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 960; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] 961; CHECK-NEXT: vmovapd %zmm1, %zmm0 962; CHECK-NEXT: retq 963 %vec2 = load <8 x double>, <8 x double>* %vec2p 964 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 12, i32 13> 965 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 966 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 967 ret <8 x double> %res 968} 969 970define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { 971; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2: 972; CHECK: # %bb.0: 973; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 974; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 975; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] 976; CHECK-NEXT: retq 977 %vec2 = load <8 x double>, <8 x double>* %vec2p 978 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 12, i32 13> 979 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 980 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 981 ret <8 x double> %res 982} 983 984define <8 x double> @test_8xdouble_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) { 985; CHECK-LABEL: test_8xdouble_shuff_mem_mask3: 986; CHECK: # %bb.0: 987; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1] 988; CHECK-NEXT: retq 989 %vec2 = load <8 x double>, <8 x double>* %vec2p 990 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 12, i32 13, i32 8, i32 9> 991 ret <8 x double> %res 992} 993define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { 994; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask3: 995; CHECK: # %bb.0: 996; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 997; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 998; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] 999; CHECK-NEXT: vmovapd %zmm1, %zmm0 1000; CHECK-NEXT: retq 1001 %vec2 = load <8 x double>, <8 x double>* %vec2p 1002 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 12, i32 13, i32 8, i32 9> 1003 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1004 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 1005 ret <8 x double> %res 1006} 1007 1008define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { 1009; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3: 1010; CHECK: # %bb.0: 1011; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1012; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 1013; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] 1014; CHECK-NEXT: retq 1015 %vec2 = load <8 x double>, <8 x double>* %vec2p 1016 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 12, i32 13, i32 8, i32 9> 1017 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 1018 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 1019 ret <8 x double> %res 1020} 1021 1022define <8 x i32> @test_8xi32_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2) { 1023; CHECK-LABEL: test_8xi32_shuff_mask0: 1024; CHECK: # %bb.0: 1025; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1026; CHECK-NEXT: retq 1027 %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 1028 ret <8 x i32> %res 1029} 1030define <8 x i32> @test_8xi32_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) { 1031; CHECK-LABEL: test_8xi32_masked_shuff_mask0: 1032; CHECK: # %bb.0: 1033; CHECK-NEXT: vptestnmd %ymm3, %ymm3, %k1 1034; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] 1035; CHECK-NEXT: vmovdqa %ymm2, %ymm0 1036; CHECK-NEXT: retq 1037 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 1038 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1039 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 1040 ret <8 x i32> %res 1041} 1042 1043define <8 x i32> @test_8xi32_zero_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) { 1044; CHECK-LABEL: test_8xi32_zero_masked_shuff_mask0: 1045; CHECK: # %bb.0: 1046; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 1047; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] 1048; CHECK-NEXT: retq 1049 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 1050 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1051 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 1052 ret <8 x i32> %res 1053} 1054define <8 x i32> @test_8xi32_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) { 1055; CHECK-LABEL: test_8xi32_masked_shuff_mask1: 1056; CHECK: # %bb.0: 1057; CHECK-NEXT: vptestnmd %ymm3, %ymm3, %k1 1058; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] 1059; CHECK-NEXT: vmovdqa %ymm2, %ymm0 1060; CHECK-NEXT: retq 1061 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 1062 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1063 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 1064 ret <8 x i32> %res 1065} 1066 1067define <8 x i32> @test_8xi32_zero_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) { 1068; CHECK-LABEL: test_8xi32_zero_masked_shuff_mask1: 1069; CHECK: # %bb.0: 1070; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 1071; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] 1072; CHECK-NEXT: retq 1073 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 1074 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1075 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 1076 ret <8 x i32> %res 1077} 1078define <8 x i32> @test_8xi32_masked_shuff_mask2(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) { 1079; CHECK-LABEL: test_8xi32_masked_shuff_mask2: 1080; CHECK: # %bb.0: 1081; CHECK-NEXT: vptestnmd %ymm3, %ymm3, %k1 1082; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] 1083; CHECK-NEXT: vmovdqa %ymm2, %ymm0 1084; CHECK-NEXT: retq 1085 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 1086 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1087 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 1088 ret <8 x i32> %res 1089} 1090 1091define <8 x i32> @test_8xi32_zero_masked_shuff_mask2(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) { 1092; CHECK-LABEL: test_8xi32_zero_masked_shuff_mask2: 1093; CHECK: # %bb.0: 1094; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 1095; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] 1096; CHECK-NEXT: retq 1097 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 1098 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1099 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 1100 ret <8 x i32> %res 1101} 1102define <8 x i32> @test_8xi32_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2) { 1103; CHECK-LABEL: test_8xi32_shuff_mask3: 1104; CHECK: # %bb.0: 1105; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 1106; CHECK-NEXT: retq 1107 %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 1108 ret <8 x i32> %res 1109} 1110define <8 x i32> @test_8xi32_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) { 1111; CHECK-LABEL: test_8xi32_masked_shuff_mask3: 1112; CHECK: # %bb.0: 1113; CHECK-NEXT: vptestnmd %ymm3, %ymm3, %k1 1114; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] 1115; CHECK-NEXT: vmovdqa %ymm2, %ymm0 1116; CHECK-NEXT: retq 1117 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 1118 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1119 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 1120 ret <8 x i32> %res 1121} 1122 1123define <8 x i32> @test_8xi32_zero_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) { 1124; CHECK-LABEL: test_8xi32_zero_masked_shuff_mask3: 1125; CHECK: # %bb.0: 1126; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 1127; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] 1128; CHECK-NEXT: retq 1129 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 1130 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1131 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 1132 ret <8 x i32> %res 1133} 1134define <8 x i32> @test_8xi32_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p) { 1135; CHECK-LABEL: test_8xi32_shuff_mem_mask0: 1136; CHECK: # %bb.0: 1137; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] 1138; CHECK-NEXT: retq 1139 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 1140 %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 1141 ret <8 x i32> %res 1142} 1143define <8 x i32> @test_8xi32_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) { 1144; CHECK-LABEL: test_8xi32_masked_shuff_mem_mask0: 1145; CHECK: # %bb.0: 1146; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 1147; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] 1148; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1149; CHECK-NEXT: retq 1150 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 1151 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 1152 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1153 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 1154 ret <8 x i32> %res 1155} 1156 1157define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) { 1158; CHECK-LABEL: test_8xi32_zero_masked_shuff_mem_mask0: 1159; CHECK: # %bb.0: 1160; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 1161; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] 1162; CHECK-NEXT: retq 1163 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 1164 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 1165 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1166 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 1167 ret <8 x i32> %res 1168} 1169 1170define <8 x i32> @test_8xi32_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) { 1171; CHECK-LABEL: test_8xi32_masked_shuff_mem_mask1: 1172; CHECK: # %bb.0: 1173; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 1174; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] 1175; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1176; CHECK-NEXT: retq 1177 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 1178 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 1179 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1180 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 1181 ret <8 x i32> %res 1182} 1183 1184define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) { 1185; CHECK-LABEL: test_8xi32_zero_masked_shuff_mem_mask1: 1186; CHECK: # %bb.0: 1187; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 1188; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] 1189; CHECK-NEXT: retq 1190 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 1191 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 1192 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1193 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 1194 ret <8 x i32> %res 1195} 1196 1197define <8 x i32> @test_8xi32_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) { 1198; CHECK-LABEL: test_8xi32_masked_shuff_mem_mask2: 1199; CHECK: # %bb.0: 1200; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 1201; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] 1202; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1203; CHECK-NEXT: retq 1204 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 1205 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 1206 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1207 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 1208 ret <8 x i32> %res 1209} 1210 1211define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) { 1212; CHECK-LABEL: test_8xi32_zero_masked_shuff_mem_mask2: 1213; CHECK: # %bb.0: 1214; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 1215; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] 1216; CHECK-NEXT: retq 1217 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 1218 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 1219 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1220 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 1221 ret <8 x i32> %res 1222} 1223 1224define <8 x i32> @test_8xi32_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p) { 1225; CHECK-LABEL: test_8xi32_shuff_mem_mask3: 1226; CHECK: # %bb.0: 1227; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] 1228; CHECK-NEXT: retq 1229 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 1230 %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 1231 ret <8 x i32> %res 1232} 1233define <8 x i32> @test_8xi32_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) { 1234; CHECK-LABEL: test_8xi32_masked_shuff_mem_mask3: 1235; CHECK: # %bb.0: 1236; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 1237; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] 1238; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1239; CHECK-NEXT: retq 1240 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 1241 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 1242 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1243 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 1244 ret <8 x i32> %res 1245} 1246 1247define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) { 1248; CHECK-LABEL: test_8xi32_zero_masked_shuff_mem_mask3: 1249; CHECK: # %bb.0: 1250; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 1251; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] 1252; CHECK-NEXT: retq 1253 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 1254 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 1255 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1256 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 1257 ret <8 x i32> %res 1258} 1259 1260define <16 x i32> @test_16xi32_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2) { 1261; CHECK-LABEL: test_16xi32_shuff_mask0: 1262; CHECK: # %bb.0: 1263; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],zmm1[2,3,6,7] 1264; CHECK-NEXT: retq 1265 %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 1266 ret <16 x i32> %res 1267} 1268define <16 x i32> @test_16xi32_masked_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { 1269; CHECK-LABEL: test_16xi32_masked_shuff_mask0: 1270; CHECK: # %bb.0: 1271; CHECK-NEXT: vptestnmd %zmm3, %zmm3, %k1 1272; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] 1273; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 1274; CHECK-NEXT: retq 1275 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 1276 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1277 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 1278 ret <16 x i32> %res 1279} 1280 1281define <16 x i32> @test_16xi32_zero_masked_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { 1282; CHECK-LABEL: test_16xi32_zero_masked_shuff_mask0: 1283; CHECK: # %bb.0: 1284; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 1285; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] 1286; CHECK-NEXT: retq 1287 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 1288 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1289 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1290 ret <16 x i32> %res 1291} 1292define <16 x i32> @test_16xi32_masked_shuff_mask1(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { 1293; CHECK-LABEL: test_16xi32_masked_shuff_mask1: 1294; CHECK: # %bb.0: 1295; CHECK-NEXT: vptestnmd %zmm3, %zmm3, %k1 1296; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] 1297; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 1298; CHECK-NEXT: retq 1299 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 1300 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1301 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 1302 ret <16 x i32> %res 1303} 1304 1305define <16 x i32> @test_16xi32_zero_masked_shuff_mask1(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { 1306; CHECK-LABEL: test_16xi32_zero_masked_shuff_mask1: 1307; CHECK: # %bb.0: 1308; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 1309; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] 1310; CHECK-NEXT: retq 1311 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 1312 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1313 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1314 ret <16 x i32> %res 1315} 1316define <16 x i32> @test_16xi32_masked_shuff_mask2(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { 1317; CHECK-LABEL: test_16xi32_masked_shuff_mask2: 1318; CHECK: # %bb.0: 1319; CHECK-NEXT: vptestnmd %zmm3, %zmm3, %k1 1320; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] 1321; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 1322; CHECK-NEXT: retq 1323 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19> 1324 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1325 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 1326 ret <16 x i32> %res 1327} 1328 1329define <16 x i32> @test_16xi32_zero_masked_shuff_mask2(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { 1330; CHECK-LABEL: test_16xi32_zero_masked_shuff_mask2: 1331; CHECK: # %bb.0: 1332; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 1333; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] 1334; CHECK-NEXT: retq 1335 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19> 1336 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1337 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1338 ret <16 x i32> %res 1339} 1340define <16 x i32> @test_16xi32_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2) { 1341; CHECK-LABEL: test_16xi32_shuff_mask3: 1342; CHECK: # %bb.0: 1343; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],zmm1[4,5,2,3] 1344; CHECK-NEXT: retq 1345 %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 1346 ret <16 x i32> %res 1347} 1348define <16 x i32> @test_16xi32_masked_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { 1349; CHECK-LABEL: test_16xi32_masked_shuff_mask3: 1350; CHECK: # %bb.0: 1351; CHECK-NEXT: vptestnmd %zmm3, %zmm3, %k1 1352; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] 1353; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 1354; CHECK-NEXT: retq 1355 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 1356 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1357 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 1358 ret <16 x i32> %res 1359} 1360 1361define <16 x i32> @test_16xi32_zero_masked_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { 1362; CHECK-LABEL: test_16xi32_zero_masked_shuff_mask3: 1363; CHECK: # %bb.0: 1364; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 1365; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] 1366; CHECK-NEXT: retq 1367 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 1368 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1369 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1370 ret <16 x i32> %res 1371} 1372define <16 x i32> @test_16xi32_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p) { 1373; CHECK-LABEL: test_16xi32_shuff_mem_mask0: 1374; CHECK: # %bb.0: 1375; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],mem[4,5,0,1] 1376; CHECK-NEXT: retq 1377 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 1378 %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 16, i32 17, i32 18, i32 19> 1379 ret <16 x i32> %res 1380} 1381define <16 x i32> @test_16xi32_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { 1382; CHECK-LABEL: test_16xi32_masked_shuff_mem_mask0: 1383; CHECK: # %bb.0: 1384; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 1385; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] 1386; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1387; CHECK-NEXT: retq 1388 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 1389 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 16, i32 17, i32 18, i32 19> 1390 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1391 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 1392 ret <16 x i32> %res 1393} 1394 1395define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { 1396; CHECK-LABEL: test_16xi32_zero_masked_shuff_mem_mask0: 1397; CHECK: # %bb.0: 1398; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 1399; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] 1400; CHECK-NEXT: retq 1401 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 1402 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 16, i32 17, i32 18, i32 19> 1403 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1404 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1405 ret <16 x i32> %res 1406} 1407 1408define <16 x i32> @test_16xi32_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { 1409; CHECK-LABEL: test_16xi32_masked_shuff_mem_mask1: 1410; CHECK: # %bb.0: 1411; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 1412; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] 1413; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1414; CHECK-NEXT: retq 1415 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 1416 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27> 1417 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1418 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 1419 ret <16 x i32> %res 1420} 1421 1422define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { 1423; CHECK-LABEL: test_16xi32_zero_masked_shuff_mem_mask1: 1424; CHECK: # %bb.0: 1425; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 1426; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] 1427; CHECK-NEXT: retq 1428 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 1429 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27> 1430 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1431 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1432 ret <16 x i32> %res 1433} 1434 1435define <16 x i32> @test_16xi32_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { 1436; CHECK-LABEL: test_16xi32_masked_shuff_mem_mask2: 1437; CHECK: # %bb.0: 1438; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 1439; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] 1440; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1441; CHECK-NEXT: retq 1442 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 1443 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31> 1444 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1445 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 1446 ret <16 x i32> %res 1447} 1448 1449define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { 1450; CHECK-LABEL: test_16xi32_zero_masked_shuff_mem_mask2: 1451; CHECK: # %bb.0: 1452; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 1453; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] 1454; CHECK-NEXT: retq 1455 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 1456 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31> 1457 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1458 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1459 ret <16 x i32> %res 1460} 1461 1462define <16 x i32> @test_16xi32_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p) { 1463; CHECK-LABEL: test_16xi32_shuff_mem_mask3: 1464; CHECK: # %bb.0: 1465; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[2,3,6,7] 1466; CHECK-NEXT: retq 1467 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 1468 %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 1469 ret <16 x i32> %res 1470} 1471define <16 x i32> @test_16xi32_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { 1472; CHECK-LABEL: test_16xi32_masked_shuff_mem_mask3: 1473; CHECK: # %bb.0: 1474; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 1475; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] 1476; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1477; CHECK-NEXT: retq 1478 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 1479 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 1480 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1481 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 1482 ret <16 x i32> %res 1483} 1484 1485define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { 1486; CHECK-LABEL: test_16xi32_zero_masked_shuff_mem_mask3: 1487; CHECK: # %bb.0: 1488; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 1489; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] 1490; CHECK-NEXT: retq 1491 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 1492 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 1493 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1494 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1495 ret <16 x i32> %res 1496} 1497 1498define <4 x i64> @test_4xi64_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2) { 1499; CHECK-LABEL: test_4xi64_shuff_mask0: 1500; CHECK: # %bb.0: 1501; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 1502; CHECK-NEXT: retq 1503 %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 1504 ret <4 x i64> %res 1505} 1506define <4 x i64> @test_4xi64_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) { 1507; CHECK-LABEL: test_4xi64_masked_shuff_mask0: 1508; CHECK: # %bb.0: 1509; CHECK-NEXT: vptestnmq %ymm3, %ymm3, %k1 1510; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] 1511; CHECK-NEXT: vmovdqa %ymm2, %ymm0 1512; CHECK-NEXT: retq 1513 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 1514 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1515 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 1516 ret <4 x i64> %res 1517} 1518 1519define <4 x i64> @test_4xi64_zero_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) { 1520; CHECK-LABEL: test_4xi64_zero_masked_shuff_mask0: 1521; CHECK: # %bb.0: 1522; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1 1523; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] 1524; CHECK-NEXT: retq 1525 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 1526 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1527 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1528 ret <4 x i64> %res 1529} 1530define <4 x i64> @test_4xi64_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) { 1531; CHECK-LABEL: test_4xi64_masked_shuff_mask1: 1532; CHECK: # %bb.0: 1533; CHECK-NEXT: vptestnmq %ymm3, %ymm3, %k1 1534; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] 1535; CHECK-NEXT: vmovdqa %ymm2, %ymm0 1536; CHECK-NEXT: retq 1537 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 1538 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1539 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 1540 ret <4 x i64> %res 1541} 1542 1543define <4 x i64> @test_4xi64_zero_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) { 1544; CHECK-LABEL: test_4xi64_zero_masked_shuff_mask1: 1545; CHECK: # %bb.0: 1546; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1 1547; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] 1548; CHECK-NEXT: retq 1549 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 1550 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1551 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1552 ret <4 x i64> %res 1553} 1554define <4 x i64> @test_4xi64_masked_shuff_mask2(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) { 1555; CHECK-LABEL: test_4xi64_masked_shuff_mask2: 1556; CHECK: # %bb.0: 1557; CHECK-NEXT: vptestnmq %ymm3, %ymm3, %k1 1558; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] 1559; CHECK-NEXT: vmovdqa %ymm2, %ymm0 1560; CHECK-NEXT: retq 1561 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 1562 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1563 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 1564 ret <4 x i64> %res 1565} 1566 1567define <4 x i64> @test_4xi64_zero_masked_shuff_mask2(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) { 1568; CHECK-LABEL: test_4xi64_zero_masked_shuff_mask2: 1569; CHECK: # %bb.0: 1570; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1 1571; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] 1572; CHECK-NEXT: retq 1573 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 1574 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1575 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1576 ret <4 x i64> %res 1577} 1578define <4 x i64> @test_4xi64_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2) { 1579; CHECK-LABEL: test_4xi64_shuff_mask3: 1580; CHECK: # %bb.0: 1581; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1582; CHECK-NEXT: retq 1583 %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 1584 ret <4 x i64> %res 1585} 1586define <4 x i64> @test_4xi64_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) { 1587; CHECK-LABEL: test_4xi64_masked_shuff_mask3: 1588; CHECK: # %bb.0: 1589; CHECK-NEXT: vptestnmq %ymm3, %ymm3, %k1 1590; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] 1591; CHECK-NEXT: vmovdqa %ymm2, %ymm0 1592; CHECK-NEXT: retq 1593 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 1594 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1595 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 1596 ret <4 x i64> %res 1597} 1598 1599define <4 x i64> @test_4xi64_zero_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) { 1600; CHECK-LABEL: test_4xi64_zero_masked_shuff_mask3: 1601; CHECK: # %bb.0: 1602; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1 1603; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] 1604; CHECK-NEXT: retq 1605 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 1606 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1607 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1608 ret <4 x i64> %res 1609} 1610define <4 x i64> @test_4xi64_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p) { 1611; CHECK-LABEL: test_4xi64_shuff_mem_mask0: 1612; CHECK: # %bb.0: 1613; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] 1614; CHECK-NEXT: retq 1615 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 1616 %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 1617 ret <4 x i64> %res 1618} 1619define <4 x i64> @test_4xi64_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) { 1620; CHECK-LABEL: test_4xi64_masked_shuff_mem_mask0: 1621; CHECK: # %bb.0: 1622; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1 1623; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] 1624; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1625; CHECK-NEXT: retq 1626 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 1627 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 1628 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1629 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 1630 ret <4 x i64> %res 1631} 1632 1633define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) { 1634; CHECK-LABEL: test_4xi64_zero_masked_shuff_mem_mask0: 1635; CHECK: # %bb.0: 1636; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1637; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] 1638; CHECK-NEXT: retq 1639 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 1640 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 1641 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1642 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1643 ret <4 x i64> %res 1644} 1645 1646define <4 x i64> @test_4xi64_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) { 1647; CHECK-LABEL: test_4xi64_masked_shuff_mem_mask1: 1648; CHECK: # %bb.0: 1649; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1 1650; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] 1651; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1652; CHECK-NEXT: retq 1653 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 1654 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 1655 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1656 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 1657 ret <4 x i64> %res 1658} 1659 1660define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) { 1661; CHECK-LABEL: test_4xi64_zero_masked_shuff_mem_mask1: 1662; CHECK: # %bb.0: 1663; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1664; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] 1665; CHECK-NEXT: retq 1666 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 1667 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 1668 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1669 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1670 ret <4 x i64> %res 1671} 1672 1673define <4 x i64> @test_4xi64_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) { 1674; CHECK-LABEL: test_4xi64_masked_shuff_mem_mask2: 1675; CHECK: # %bb.0: 1676; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1 1677; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] 1678; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1679; CHECK-NEXT: retq 1680 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 1681 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 1682 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1683 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 1684 ret <4 x i64> %res 1685} 1686 1687define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) { 1688; CHECK-LABEL: test_4xi64_zero_masked_shuff_mem_mask2: 1689; CHECK: # %bb.0: 1690; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1691; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] 1692; CHECK-NEXT: retq 1693 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 1694 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 1695 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1696 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1697 ret <4 x i64> %res 1698} 1699 1700define <4 x i64> @test_4xi64_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p) { 1701; CHECK-LABEL: test_4xi64_shuff_mem_mask3: 1702; CHECK: # %bb.0: 1703; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] 1704; CHECK-NEXT: retq 1705 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 1706 %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 1707 ret <4 x i64> %res 1708} 1709define <4 x i64> @test_4xi64_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) { 1710; CHECK-LABEL: test_4xi64_masked_shuff_mem_mask3: 1711; CHECK: # %bb.0: 1712; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1 1713; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] 1714; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1715; CHECK-NEXT: retq 1716 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 1717 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 1718 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1719 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 1720 ret <4 x i64> %res 1721} 1722 1723define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) { 1724; CHECK-LABEL: test_4xi64_zero_masked_shuff_mem_mask3: 1725; CHECK: # %bb.0: 1726; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1727; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] 1728; CHECK-NEXT: retq 1729 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 1730 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 1731 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1732 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1733 ret <4 x i64> %res 1734} 1735 1736define <8 x i64> @test_8xi64_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2) { 1737; CHECK-LABEL: test_8xi64_shuff_mask0: 1738; CHECK: # %bb.0: 1739; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,4,5] 1740; CHECK-NEXT: retq 1741 %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 12, i32 13> 1742 ret <8 x i64> %res 1743} 1744define <8 x i64> @test_8xi64_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { 1745; CHECK-LABEL: test_8xi64_masked_shuff_mask0: 1746; CHECK: # %bb.0: 1747; CHECK-NEXT: vptestnmq %zmm3, %zmm3, %k1 1748; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,4,5] 1749; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 1750; CHECK-NEXT: retq 1751 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 12, i32 13> 1752 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1753 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 1754 ret <8 x i64> %res 1755} 1756 1757define <8 x i64> @test_8xi64_zero_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { 1758; CHECK-LABEL: test_8xi64_zero_masked_shuff_mask0: 1759; CHECK: # %bb.0: 1760; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 1761; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,4,5] 1762; CHECK-NEXT: retq 1763 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 12, i32 13> 1764 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1765 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1766 ret <8 x i64> %res 1767} 1768define <8 x i64> @test_8xi64_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { 1769; CHECK-LABEL: test_8xi64_masked_shuff_mask1: 1770; CHECK: # %bb.0: 1771; CHECK-NEXT: vptestnmq %zmm3, %zmm3, %k1 1772; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[2,3,4,5] 1773; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 1774; CHECK-NEXT: retq 1775 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 12, i32 13> 1776 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1777 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 1778 ret <8 x i64> %res 1779} 1780 1781define <8 x i64> @test_8xi64_zero_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { 1782; CHECK-LABEL: test_8xi64_zero_masked_shuff_mask1: 1783; CHECK: # %bb.0: 1784; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 1785; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[2,3,4,5] 1786; CHECK-NEXT: retq 1787 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 12, i32 13> 1788 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1789 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1790 ret <8 x i64> %res 1791} 1792define <8 x i64> @test_8xi64_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { 1793; CHECK-LABEL: test_8xi64_masked_shuff_mask2: 1794; CHECK: # %bb.0: 1795; CHECK-NEXT: vptestnmq %zmm3, %zmm3, %k1 1796; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,0,1] 1797; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 1798; CHECK-NEXT: retq 1799 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 8, i32 9> 1800 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1801 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 1802 ret <8 x i64> %res 1803} 1804 1805define <8 x i64> @test_8xi64_zero_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { 1806; CHECK-LABEL: test_8xi64_zero_masked_shuff_mask2: 1807; CHECK: # %bb.0: 1808; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 1809; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,0,1] 1810; CHECK-NEXT: retq 1811 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 8, i32 9> 1812 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1813 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1814 ret <8 x i64> %res 1815} 1816define <8 x i64> @test_8xi64_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2) { 1817; CHECK-LABEL: test_8xi64_shuff_mask3: 1818; CHECK: # %bb.0: 1819; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,6,7],zmm1[4,5,2,3] 1820; CHECK-NEXT: retq 1821 %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 6, i32 7, i32 12, i32 13, i32 10, i32 11> 1822 ret <8 x i64> %res 1823} 1824define <8 x i64> @test_8xi64_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { 1825; CHECK-LABEL: test_8xi64_masked_shuff_mask3: 1826; CHECK: # %bb.0: 1827; CHECK-NEXT: vptestnmq %zmm3, %zmm3, %k1 1828; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[2,3,6,7],zmm1[4,5,2,3] 1829; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 1830; CHECK-NEXT: retq 1831 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 6, i32 7, i32 12, i32 13, i32 10, i32 11> 1832 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1833 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 1834 ret <8 x i64> %res 1835} 1836 1837define <8 x i64> @test_8xi64_zero_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { 1838; CHECK-LABEL: test_8xi64_zero_masked_shuff_mask3: 1839; CHECK: # %bb.0: 1840; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 1841; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,6,7],zmm1[4,5,2,3] 1842; CHECK-NEXT: retq 1843 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 6, i32 7, i32 12, i32 13, i32 10, i32 11> 1844 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1845 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1846 ret <8 x i64> %res 1847} 1848define <8 x i64> @test_8xi64_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p) { 1849; CHECK-LABEL: test_8xi64_shuff_mem_mask0: 1850; CHECK: # %bb.0: 1851; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3] 1852; CHECK-NEXT: retq 1853 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 1854 %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 12, i32 13, i32 10, i32 11> 1855 ret <8 x i64> %res 1856} 1857define <8 x i64> @test_8xi64_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { 1858; CHECK-LABEL: test_8xi64_masked_shuff_mem_mask0: 1859; CHECK: # %bb.0: 1860; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 1861; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] 1862; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1863; CHECK-NEXT: retq 1864 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 1865 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 12, i32 13, i32 10, i32 11> 1866 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1867 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 1868 ret <8 x i64> %res 1869} 1870 1871define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { 1872; CHECK-LABEL: test_8xi64_zero_masked_shuff_mem_mask0: 1873; CHECK: # %bb.0: 1874; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1875; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] 1876; CHECK-NEXT: retq 1877 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 1878 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 12, i32 13, i32 10, i32 11> 1879 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1880 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1881 ret <8 x i64> %res 1882} 1883 1884define <8 x i64> @test_8xi64_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { 1885; CHECK-LABEL: test_8xi64_masked_shuff_mem_mask1: 1886; CHECK: # %bb.0: 1887; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 1888; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] 1889; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1890; CHECK-NEXT: retq 1891 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 1892 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 8, i32 9, i32 8, i32 9> 1893 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1894 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 1895 ret <8 x i64> %res 1896} 1897 1898define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { 1899; CHECK-LABEL: test_8xi64_zero_masked_shuff_mem_mask1: 1900; CHECK: # %bb.0: 1901; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1902; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] 1903; CHECK-NEXT: retq 1904 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 1905 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 8, i32 9, i32 8, i32 9> 1906 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1907 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1908 ret <8 x i64> %res 1909} 1910 1911define <8 x i64> @test_8xi64_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { 1912; CHECK-LABEL: test_8xi64_masked_shuff_mem_mask2: 1913; CHECK: # %bb.0: 1914; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 1915; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] 1916; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1917; CHECK-NEXT: retq 1918 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 1919 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 0, i32 1, i32 10, i32 11, i32 10, i32 11> 1920 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1921 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 1922 ret <8 x i64> %res 1923} 1924 1925define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { 1926; CHECK-LABEL: test_8xi64_zero_masked_shuff_mem_mask2: 1927; CHECK: # %bb.0: 1928; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1929; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] 1930; CHECK-NEXT: retq 1931 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 1932 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 0, i32 1, i32 10, i32 11, i32 10, i32 11> 1933 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1934 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1935 ret <8 x i64> %res 1936} 1937 1938define <8 x i64> @test_8xi64_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p) { 1939; CHECK-LABEL: test_8xi64_shuff_mem_mask3: 1940; CHECK: # %bb.0: 1941; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3] 1942; CHECK-NEXT: retq 1943 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 1944 %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 14, i32 15, i32 10, i32 11> 1945 ret <8 x i64> %res 1946} 1947define <8 x i64> @test_8xi64_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { 1948; CHECK-LABEL: test_8xi64_masked_shuff_mem_mask3: 1949; CHECK: # %bb.0: 1950; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 1951; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] 1952; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1953; CHECK-NEXT: retq 1954 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 1955 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 14, i32 15, i32 10, i32 11> 1956 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1957 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 1958 ret <8 x i64> %res 1959} 1960 1961define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { 1962; CHECK-LABEL: test_8xi64_zero_masked_shuff_mem_mask3: 1963; CHECK: # %bb.0: 1964; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1965; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] 1966; CHECK-NEXT: retq 1967 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 1968 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 14, i32 15, i32 10, i32 11> 1969 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1970 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1971 ret <8 x i64> %res 1972} 1973 1974