1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw %s -o - | FileCheck %s 3 4define <16 x i8> @test_i8_to_16(i8 %s) { 5; CHECK-LABEL: test_i8_to_16: 6; CHECK: # %bb.0: 7; CHECK-NEXT: vpbroadcastb %edi, %xmm0 8; CHECK-NEXT: retq 9 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 10 %res = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 11 ret <16 x i8> %res 12} 13define <16 x i8> @test_masked_i8_to_16_mask0(i8 %s, <16 x i8> %default, <16 x i8> %mask) { 14; CHECK-LABEL: test_masked_i8_to_16_mask0: 15; CHECK: # %bb.0: 16; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 17; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1} 18; CHECK-NEXT: retq 19 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 20 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 21 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 22 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default 23 ret <16 x i8> %res 24} 25 26define <16 x i8> @test_masked_z_i8_to_16_mask0(i8 %s, <16 x i8> %mask) { 27; CHECK-LABEL: test_masked_z_i8_to_16_mask0: 28; CHECK: # %bb.0: 29; CHECK-NEXT: vptestnmb %xmm0, %xmm0, %k1 30; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1} {z} 31; CHECK-NEXT: retq 32 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 33 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 34 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 35 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 36 ret <16 x i8> %res 37} 38define <16 x i8> @test_masked_i8_to_16_mask1(i8 %s, <16 x i8> %default, <16 x i8> %mask) { 39; CHECK-LABEL: test_masked_i8_to_16_mask1: 40; CHECK: # %bb.0: 41; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 42; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1} 43; CHECK-NEXT: retq 44 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 45 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 46 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 47 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default 48 ret <16 x i8> %res 49} 50 51define <16 x i8> @test_masked_z_i8_to_16_mask1(i8 %s, <16 x i8> %mask) { 52; CHECK-LABEL: test_masked_z_i8_to_16_mask1: 53; CHECK: # %bb.0: 54; CHECK-NEXT: vptestnmb %xmm0, %xmm0, %k1 55; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1} {z} 56; CHECK-NEXT: retq 57 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 58 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 59 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 60 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 61 ret <16 x i8> %res 62} 63define <16 x i8> @test_masked_i8_to_16_mask2(i8 %s, <16 x i8> %default, <16 x i8> %mask) { 64; CHECK-LABEL: test_masked_i8_to_16_mask2: 65; CHECK: # %bb.0: 66; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 67; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1} 68; CHECK-NEXT: retq 69 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 70 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 71 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 72 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default 73 ret <16 x i8> %res 74} 75 76define <16 x i8> @test_masked_z_i8_to_16_mask2(i8 %s, <16 x i8> %mask) { 77; CHECK-LABEL: test_masked_z_i8_to_16_mask2: 78; CHECK: # %bb.0: 79; CHECK-NEXT: vptestnmb %xmm0, %xmm0, %k1 80; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1} {z} 81; CHECK-NEXT: retq 82 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 83 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 84 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 85 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 86 ret <16 x i8> %res 87} 88define <16 x i8> @test_masked_i8_to_16_mask3(i8 %s, <16 x i8> %default, <16 x i8> %mask) { 89; CHECK-LABEL: test_masked_i8_to_16_mask3: 90; CHECK: # %bb.0: 91; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 92; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1} 93; CHECK-NEXT: retq 94 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 95 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 96 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 97 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default 98 ret <16 x i8> %res 99} 100 101define <16 x i8> @test_masked_z_i8_to_16_mask3(i8 %s, <16 x i8> %mask) { 102; CHECK-LABEL: test_masked_z_i8_to_16_mask3: 103; CHECK: # %bb.0: 104; CHECK-NEXT: vptestnmb %xmm0, %xmm0, %k1 105; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1} {z} 106; CHECK-NEXT: retq 107 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 108 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 109 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 110 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 111 ret <16 x i8> %res 112} 113define <32 x i8> @test_i8_to_32(i8 %s) { 114; CHECK-LABEL: test_i8_to_32: 115; CHECK: # %bb.0: 116; CHECK-NEXT: vpbroadcastb %edi, %ymm0 117; CHECK-NEXT: retq 118 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 119 %res = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 120 ret <32 x i8> %res 121} 122define <32 x i8> @test_masked_i8_to_32_mask0(i8 %s, <32 x i8> %default, <32 x i8> %mask) { 123; CHECK-LABEL: test_masked_i8_to_32_mask0: 124; CHECK: # %bb.0: 125; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 126; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1} 127; CHECK-NEXT: retq 128 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 129 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 130 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 131 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default 132 ret <32 x i8> %res 133} 134 135define <32 x i8> @test_masked_z_i8_to_32_mask0(i8 %s, <32 x i8> %mask) { 136; CHECK-LABEL: test_masked_z_i8_to_32_mask0: 137; CHECK: # %bb.0: 138; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1 139; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1} {z} 140; CHECK-NEXT: retq 141 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 142 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 143 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 144 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 145 ret <32 x i8> %res 146} 147define <32 x i8> @test_masked_i8_to_32_mask1(i8 %s, <32 x i8> %default, <32 x i8> %mask) { 148; CHECK-LABEL: test_masked_i8_to_32_mask1: 149; CHECK: # %bb.0: 150; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 151; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1} 152; CHECK-NEXT: retq 153 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 154 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 155 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 156 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default 157 ret <32 x i8> %res 158} 159 160define <32 x i8> @test_masked_z_i8_to_32_mask1(i8 %s, <32 x i8> %mask) { 161; CHECK-LABEL: test_masked_z_i8_to_32_mask1: 162; CHECK: # %bb.0: 163; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1 164; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1} {z} 165; CHECK-NEXT: retq 166 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 167 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 168 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 169 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 170 ret <32 x i8> %res 171} 172define <32 x i8> @test_masked_i8_to_32_mask2(i8 %s, <32 x i8> %default, <32 x i8> %mask) { 173; CHECK-LABEL: test_masked_i8_to_32_mask2: 174; CHECK: # %bb.0: 175; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 176; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1} 177; CHECK-NEXT: retq 178 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 179 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 180 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 181 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default 182 ret <32 x i8> %res 183} 184 185define <32 x i8> @test_masked_z_i8_to_32_mask2(i8 %s, <32 x i8> %mask) { 186; CHECK-LABEL: test_masked_z_i8_to_32_mask2: 187; CHECK: # %bb.0: 188; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1 189; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1} {z} 190; CHECK-NEXT: retq 191 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 192 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 193 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 194 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 195 ret <32 x i8> %res 196} 197define <32 x i8> @test_masked_i8_to_32_mask3(i8 %s, <32 x i8> %default, <32 x i8> %mask) { 198; CHECK-LABEL: test_masked_i8_to_32_mask3: 199; CHECK: # %bb.0: 200; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 201; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1} 202; CHECK-NEXT: retq 203 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 204 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 205 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 206 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default 207 ret <32 x i8> %res 208} 209 210define <32 x i8> @test_masked_z_i8_to_32_mask3(i8 %s, <32 x i8> %mask) { 211; CHECK-LABEL: test_masked_z_i8_to_32_mask3: 212; CHECK: # %bb.0: 213; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1 214; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1} {z} 215; CHECK-NEXT: retq 216 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 217 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 218 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 219 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 220 ret <32 x i8> %res 221} 222define <64 x i8> @test_i8_to_64(i8 %s) { 223; CHECK-LABEL: test_i8_to_64: 224; CHECK: # %bb.0: 225; CHECK-NEXT: vpbroadcastb %edi, %zmm0 226; CHECK-NEXT: retq 227 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 228 %res = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 229 ret <64 x i8> %res 230} 231define <64 x i8> @test_masked_i8_to_64_mask0(i8 %s, <64 x i8> %default, <64 x i8> %mask) { 232; CHECK-LABEL: test_masked_i8_to_64_mask0: 233; CHECK: # %bb.0: 234; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 235; CHECK-NEXT: vpbroadcastb %edi, %zmm0 {%k1} 236; CHECK-NEXT: retq 237 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 238 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 239 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 240 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default 241 ret <64 x i8> %res 242} 243 244define <64 x i8> @test_masked_z_i8_to_64_mask0(i8 %s, <64 x i8> %mask) { 245; CHECK-LABEL: test_masked_z_i8_to_64_mask0: 246; CHECK: # %bb.0: 247; CHECK-NEXT: vptestnmb %zmm0, %zmm0, %k1 248; CHECK-NEXT: vpbroadcastb %edi, %zmm0 {%k1} {z} 249; CHECK-NEXT: retq 250 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 251 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 252 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 253 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 254 ret <64 x i8> %res 255} 256define <64 x i8> @test_masked_i8_to_64_mask1(i8 %s, <64 x i8> %default, <64 x i8> %mask) { 257; CHECK-LABEL: test_masked_i8_to_64_mask1: 258; CHECK: # %bb.0: 259; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 260; CHECK-NEXT: vpbroadcastb %edi, %zmm0 {%k1} 261; CHECK-NEXT: retq 262 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 263 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 264 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 265 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default 266 ret <64 x i8> %res 267} 268 269define <64 x i8> @test_masked_z_i8_to_64_mask1(i8 %s, <64 x i8> %mask) { 270; CHECK-LABEL: test_masked_z_i8_to_64_mask1: 271; CHECK: # %bb.0: 272; CHECK-NEXT: vptestnmb %zmm0, %zmm0, %k1 273; CHECK-NEXT: vpbroadcastb %edi, %zmm0 {%k1} {z} 274; CHECK-NEXT: retq 275 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 276 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 277 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 278 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 279 ret <64 x i8> %res 280} 281define <64 x i8> @test_masked_i8_to_64_mask2(i8 %s, <64 x i8> %default, <64 x i8> %mask) { 282; CHECK-LABEL: test_masked_i8_to_64_mask2: 283; CHECK: # %bb.0: 284; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 285; CHECK-NEXT: vpbroadcastb %edi, %zmm0 {%k1} 286; CHECK-NEXT: retq 287 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 288 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 289 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 290 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default 291 ret <64 x i8> %res 292} 293 294define <64 x i8> @test_masked_z_i8_to_64_mask2(i8 %s, <64 x i8> %mask) { 295; CHECK-LABEL: test_masked_z_i8_to_64_mask2: 296; CHECK: # %bb.0: 297; CHECK-NEXT: vptestnmb %zmm0, %zmm0, %k1 298; CHECK-NEXT: vpbroadcastb %edi, %zmm0 {%k1} {z} 299; CHECK-NEXT: retq 300 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 301 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 302 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 303 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 304 ret <64 x i8> %res 305} 306define <64 x i8> @test_masked_i8_to_64_mask3(i8 %s, <64 x i8> %default, <64 x i8> %mask) { 307; CHECK-LABEL: test_masked_i8_to_64_mask3: 308; CHECK: # %bb.0: 309; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 310; CHECK-NEXT: vpbroadcastb %edi, %zmm0 {%k1} 311; CHECK-NEXT: retq 312 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 313 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 314 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 315 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default 316 ret <64 x i8> %res 317} 318 319define <64 x i8> @test_masked_z_i8_to_64_mask3(i8 %s, <64 x i8> %mask) { 320; CHECK-LABEL: test_masked_z_i8_to_64_mask3: 321; CHECK: # %bb.0: 322; CHECK-NEXT: vptestnmb %zmm0, %zmm0, %k1 323; CHECK-NEXT: vpbroadcastb %edi, %zmm0 {%k1} {z} 324; CHECK-NEXT: retq 325 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 326 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 327 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 328 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 329 ret <64 x i8> %res 330} 331define <8 x i16> @test_i16_to_8(i16 %s) { 332; CHECK-LABEL: test_i16_to_8: 333; CHECK: # %bb.0: 334; CHECK-NEXT: vpbroadcastw %edi, %xmm0 335; CHECK-NEXT: retq 336 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 337 %res = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 338 ret <8 x i16> %res 339} 340define <8 x i16> @test_masked_i16_to_8_mask0(i16 %s, <8 x i16> %default, <8 x i16> %mask) { 341; CHECK-LABEL: test_masked_i16_to_8_mask0: 342; CHECK: # %bb.0: 343; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 344; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1} 345; CHECK-NEXT: retq 346 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 347 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 348 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 349 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default 350 ret <8 x i16> %res 351} 352 353define <8 x i16> @test_masked_z_i16_to_8_mask0(i16 %s, <8 x i16> %mask) { 354; CHECK-LABEL: test_masked_z_i16_to_8_mask0: 355; CHECK: # %bb.0: 356; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 357; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1} {z} 358; CHECK-NEXT: retq 359 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 360 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 361 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 362 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 363 ret <8 x i16> %res 364} 365define <8 x i16> @test_masked_i16_to_8_mask1(i16 %s, <8 x i16> %default, <8 x i16> %mask) { 366; CHECK-LABEL: test_masked_i16_to_8_mask1: 367; CHECK: # %bb.0: 368; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 369; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1} 370; CHECK-NEXT: retq 371 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 372 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 373 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 374 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default 375 ret <8 x i16> %res 376} 377 378define <8 x i16> @test_masked_z_i16_to_8_mask1(i16 %s, <8 x i16> %mask) { 379; CHECK-LABEL: test_masked_z_i16_to_8_mask1: 380; CHECK: # %bb.0: 381; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 382; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1} {z} 383; CHECK-NEXT: retq 384 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 385 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 386 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 387 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 388 ret <8 x i16> %res 389} 390define <8 x i16> @test_masked_i16_to_8_mask2(i16 %s, <8 x i16> %default, <8 x i16> %mask) { 391; CHECK-LABEL: test_masked_i16_to_8_mask2: 392; CHECK: # %bb.0: 393; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 394; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1} 395; CHECK-NEXT: retq 396 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 397 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 398 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 399 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default 400 ret <8 x i16> %res 401} 402 403define <8 x i16> @test_masked_z_i16_to_8_mask2(i16 %s, <8 x i16> %mask) { 404; CHECK-LABEL: test_masked_z_i16_to_8_mask2: 405; CHECK: # %bb.0: 406; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 407; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1} {z} 408; CHECK-NEXT: retq 409 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 410 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 411 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 412 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 413 ret <8 x i16> %res 414} 415define <8 x i16> @test_masked_i16_to_8_mask3(i16 %s, <8 x i16> %default, <8 x i16> %mask) { 416; CHECK-LABEL: test_masked_i16_to_8_mask3: 417; CHECK: # %bb.0: 418; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 419; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1} 420; CHECK-NEXT: retq 421 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 422 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 423 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 424 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default 425 ret <8 x i16> %res 426} 427 428define <8 x i16> @test_masked_z_i16_to_8_mask3(i16 %s, <8 x i16> %mask) { 429; CHECK-LABEL: test_masked_z_i16_to_8_mask3: 430; CHECK: # %bb.0: 431; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 432; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1} {z} 433; CHECK-NEXT: retq 434 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 435 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 436 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 437 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 438 ret <8 x i16> %res 439} 440define <16 x i16> @test_i16_to_16(i16 %s) { 441; CHECK-LABEL: test_i16_to_16: 442; CHECK: # %bb.0: 443; CHECK-NEXT: vpbroadcastw %edi, %ymm0 444; CHECK-NEXT: retq 445 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 446 %res = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 447 ret <16 x i16> %res 448} 449define <16 x i16> @test_masked_i16_to_16_mask0(i16 %s, <16 x i16> %default, <16 x i16> %mask) { 450; CHECK-LABEL: test_masked_i16_to_16_mask0: 451; CHECK: # %bb.0: 452; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 453; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1} 454; CHECK-NEXT: retq 455 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 456 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 457 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 458 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default 459 ret <16 x i16> %res 460} 461 462define <16 x i16> @test_masked_z_i16_to_16_mask0(i16 %s, <16 x i16> %mask) { 463; CHECK-LABEL: test_masked_z_i16_to_16_mask0: 464; CHECK: # %bb.0: 465; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 466; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1} {z} 467; CHECK-NEXT: retq 468 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 469 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 470 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 471 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 472 ret <16 x i16> %res 473} 474define <16 x i16> @test_masked_i16_to_16_mask1(i16 %s, <16 x i16> %default, <16 x i16> %mask) { 475; CHECK-LABEL: test_masked_i16_to_16_mask1: 476; CHECK: # %bb.0: 477; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 478; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1} 479; CHECK-NEXT: retq 480 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 481 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 482 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 483 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default 484 ret <16 x i16> %res 485} 486 487define <16 x i16> @test_masked_z_i16_to_16_mask1(i16 %s, <16 x i16> %mask) { 488; CHECK-LABEL: test_masked_z_i16_to_16_mask1: 489; CHECK: # %bb.0: 490; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 491; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1} {z} 492; CHECK-NEXT: retq 493 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 494 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 495 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 496 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 497 ret <16 x i16> %res 498} 499define <16 x i16> @test_masked_i16_to_16_mask2(i16 %s, <16 x i16> %default, <16 x i16> %mask) { 500; CHECK-LABEL: test_masked_i16_to_16_mask2: 501; CHECK: # %bb.0: 502; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 503; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1} 504; CHECK-NEXT: retq 505 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 506 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 507 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 508 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default 509 ret <16 x i16> %res 510} 511 512define <16 x i16> @test_masked_z_i16_to_16_mask2(i16 %s, <16 x i16> %mask) { 513; CHECK-LABEL: test_masked_z_i16_to_16_mask2: 514; CHECK: # %bb.0: 515; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 516; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1} {z} 517; CHECK-NEXT: retq 518 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 519 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 520 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 521 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 522 ret <16 x i16> %res 523} 524define <16 x i16> @test_masked_i16_to_16_mask3(i16 %s, <16 x i16> %default, <16 x i16> %mask) { 525; CHECK-LABEL: test_masked_i16_to_16_mask3: 526; CHECK: # %bb.0: 527; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 528; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1} 529; CHECK-NEXT: retq 530 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 531 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 532 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 533 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default 534 ret <16 x i16> %res 535} 536 537define <16 x i16> @test_masked_z_i16_to_16_mask3(i16 %s, <16 x i16> %mask) { 538; CHECK-LABEL: test_masked_z_i16_to_16_mask3: 539; CHECK: # %bb.0: 540; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 541; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1} {z} 542; CHECK-NEXT: retq 543 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 544 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 545 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 546 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 547 ret <16 x i16> %res 548} 549define <32 x i16> @test_i16_to_32(i16 %s) { 550; CHECK-LABEL: test_i16_to_32: 551; CHECK: # %bb.0: 552; CHECK-NEXT: vpbroadcastw %edi, %zmm0 553; CHECK-NEXT: retq 554 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 555 %res = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 556 ret <32 x i16> %res 557} 558define <32 x i16> @test_masked_i16_to_32_mask0(i16 %s, <32 x i16> %default, <32 x i16> %mask) { 559; CHECK-LABEL: test_masked_i16_to_32_mask0: 560; CHECK: # %bb.0: 561; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 562; CHECK-NEXT: vpbroadcastw %edi, %zmm0 {%k1} 563; CHECK-NEXT: retq 564 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 565 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 566 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 567 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default 568 ret <32 x i16> %res 569} 570 571define <32 x i16> @test_masked_z_i16_to_32_mask0(i16 %s, <32 x i16> %mask) { 572; CHECK-LABEL: test_masked_z_i16_to_32_mask0: 573; CHECK: # %bb.0: 574; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 575; CHECK-NEXT: vpbroadcastw %edi, %zmm0 {%k1} {z} 576; CHECK-NEXT: retq 577 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 578 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 579 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 580 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 581 ret <32 x i16> %res 582} 583define <32 x i16> @test_masked_i16_to_32_mask1(i16 %s, <32 x i16> %default, <32 x i16> %mask) { 584; CHECK-LABEL: test_masked_i16_to_32_mask1: 585; CHECK: # %bb.0: 586; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 587; CHECK-NEXT: vpbroadcastw %edi, %zmm0 {%k1} 588; CHECK-NEXT: retq 589 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 590 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 591 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 592 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default 593 ret <32 x i16> %res 594} 595 596define <32 x i16> @test_masked_z_i16_to_32_mask1(i16 %s, <32 x i16> %mask) { 597; CHECK-LABEL: test_masked_z_i16_to_32_mask1: 598; CHECK: # %bb.0: 599; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 600; CHECK-NEXT: vpbroadcastw %edi, %zmm0 {%k1} {z} 601; CHECK-NEXT: retq 602 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 603 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 604 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 605 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 606 ret <32 x i16> %res 607} 608define <32 x i16> @test_masked_i16_to_32_mask2(i16 %s, <32 x i16> %default, <32 x i16> %mask) { 609; CHECK-LABEL: test_masked_i16_to_32_mask2: 610; CHECK: # %bb.0: 611; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 612; CHECK-NEXT: vpbroadcastw %edi, %zmm0 {%k1} 613; CHECK-NEXT: retq 614 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 615 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 616 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 617 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default 618 ret <32 x i16> %res 619} 620 621define <32 x i16> @test_masked_z_i16_to_32_mask2(i16 %s, <32 x i16> %mask) { 622; CHECK-LABEL: test_masked_z_i16_to_32_mask2: 623; CHECK: # %bb.0: 624; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 625; CHECK-NEXT: vpbroadcastw %edi, %zmm0 {%k1} {z} 626; CHECK-NEXT: retq 627 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 628 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 629 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 630 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 631 ret <32 x i16> %res 632} 633define <32 x i16> @test_masked_i16_to_32_mask3(i16 %s, <32 x i16> %default, <32 x i16> %mask) { 634; CHECK-LABEL: test_masked_i16_to_32_mask3: 635; CHECK: # %bb.0: 636; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 637; CHECK-NEXT: vpbroadcastw %edi, %zmm0 {%k1} 638; CHECK-NEXT: retq 639 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 640 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 641 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 642 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default 643 ret <32 x i16> %res 644} 645 646define <32 x i16> @test_masked_z_i16_to_32_mask3(i16 %s, <32 x i16> %mask) { 647; CHECK-LABEL: test_masked_z_i16_to_32_mask3: 648; CHECK: # %bb.0: 649; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 650; CHECK-NEXT: vpbroadcastw %edi, %zmm0 {%k1} {z} 651; CHECK-NEXT: retq 652 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 653 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 654 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 655 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 656 ret <32 x i16> %res 657} 658define <4 x i32> @test_i32_to_4(i32 %s) { 659; CHECK-LABEL: test_i32_to_4: 660; CHECK: # %bb.0: 661; CHECK-NEXT: vpbroadcastd %edi, %xmm0 662; CHECK-NEXT: retq 663 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 664 %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 665 ret <4 x i32> %res 666} 667define <4 x i32> @test_masked_i32_to_4_mask0(i32 %s, <4 x i32> %default, <4 x i32> %mask) { 668; CHECK-LABEL: test_masked_i32_to_4_mask0: 669; CHECK: # %bb.0: 670; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 671; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1} 672; CHECK-NEXT: retq 673 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 674 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 675 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 676 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default 677 ret <4 x i32> %res 678} 679 680define <4 x i32> @test_masked_z_i32_to_4_mask0(i32 %s, <4 x i32> %mask) { 681; CHECK-LABEL: test_masked_z_i32_to_4_mask0: 682; CHECK: # %bb.0: 683; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 684; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1} {z} 685; CHECK-NEXT: retq 686 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 687 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 688 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 689 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 690 ret <4 x i32> %res 691} 692define <4 x i32> @test_masked_i32_to_4_mask1(i32 %s, <4 x i32> %default, <4 x i32> %mask) { 693; CHECK-LABEL: test_masked_i32_to_4_mask1: 694; CHECK: # %bb.0: 695; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 696; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1} 697; CHECK-NEXT: retq 698 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 699 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 700 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 701 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default 702 ret <4 x i32> %res 703} 704 705define <4 x i32> @test_masked_z_i32_to_4_mask1(i32 %s, <4 x i32> %mask) { 706; CHECK-LABEL: test_masked_z_i32_to_4_mask1: 707; CHECK: # %bb.0: 708; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 709; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1} {z} 710; CHECK-NEXT: retq 711 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 712 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 713 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 714 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 715 ret <4 x i32> %res 716} 717define <4 x i32> @test_masked_i32_to_4_mask2(i32 %s, <4 x i32> %default, <4 x i32> %mask) { 718; CHECK-LABEL: test_masked_i32_to_4_mask2: 719; CHECK: # %bb.0: 720; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 721; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1} 722; CHECK-NEXT: retq 723 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 724 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 725 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 726 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default 727 ret <4 x i32> %res 728} 729 730define <4 x i32> @test_masked_z_i32_to_4_mask2(i32 %s, <4 x i32> %mask) { 731; CHECK-LABEL: test_masked_z_i32_to_4_mask2: 732; CHECK: # %bb.0: 733; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 734; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1} {z} 735; CHECK-NEXT: retq 736 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 737 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 738 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 739 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 740 ret <4 x i32> %res 741} 742define <4 x i32> @test_masked_i32_to_4_mask3(i32 %s, <4 x i32> %default, <4 x i32> %mask) { 743; CHECK-LABEL: test_masked_i32_to_4_mask3: 744; CHECK: # %bb.0: 745; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 746; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1} 747; CHECK-NEXT: retq 748 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 749 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 750 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 751 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default 752 ret <4 x i32> %res 753} 754 755define <4 x i32> @test_masked_z_i32_to_4_mask3(i32 %s, <4 x i32> %mask) { 756; CHECK-LABEL: test_masked_z_i32_to_4_mask3: 757; CHECK: # %bb.0: 758; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 759; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1} {z} 760; CHECK-NEXT: retq 761 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 762 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 763 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 764 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 765 ret <4 x i32> %res 766} 767define <8 x i32> @test_i32_to_8(i32 %s) { 768; CHECK-LABEL: test_i32_to_8: 769; CHECK: # %bb.0: 770; CHECK-NEXT: vpbroadcastd %edi, %ymm0 771; CHECK-NEXT: retq 772 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 773 %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 774 ret <8 x i32> %res 775} 776define <8 x i32> @test_masked_i32_to_8_mask0(i32 %s, <8 x i32> %default, <8 x i32> %mask) { 777; CHECK-LABEL: test_masked_i32_to_8_mask0: 778; CHECK: # %bb.0: 779; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 780; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1} 781; CHECK-NEXT: retq 782 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 783 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 784 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 785 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default 786 ret <8 x i32> %res 787} 788 789define <8 x i32> @test_masked_z_i32_to_8_mask0(i32 %s, <8 x i32> %mask) { 790; CHECK-LABEL: test_masked_z_i32_to_8_mask0: 791; CHECK: # %bb.0: 792; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 793; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1} {z} 794; CHECK-NEXT: retq 795 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 796 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 797 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 798 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 799 ret <8 x i32> %res 800} 801define <8 x i32> @test_masked_i32_to_8_mask1(i32 %s, <8 x i32> %default, <8 x i32> %mask) { 802; CHECK-LABEL: test_masked_i32_to_8_mask1: 803; CHECK: # %bb.0: 804; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 805; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1} 806; CHECK-NEXT: retq 807 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 808 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 809 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 810 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default 811 ret <8 x i32> %res 812} 813 814define <8 x i32> @test_masked_z_i32_to_8_mask1(i32 %s, <8 x i32> %mask) { 815; CHECK-LABEL: test_masked_z_i32_to_8_mask1: 816; CHECK: # %bb.0: 817; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 818; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1} {z} 819; CHECK-NEXT: retq 820 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 821 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 822 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 823 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 824 ret <8 x i32> %res 825} 826define <8 x i32> @test_masked_i32_to_8_mask2(i32 %s, <8 x i32> %default, <8 x i32> %mask) { 827; CHECK-LABEL: test_masked_i32_to_8_mask2: 828; CHECK: # %bb.0: 829; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 830; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1} 831; CHECK-NEXT: retq 832 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 833 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 834 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 835 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default 836 ret <8 x i32> %res 837} 838 839define <8 x i32> @test_masked_z_i32_to_8_mask2(i32 %s, <8 x i32> %mask) { 840; CHECK-LABEL: test_masked_z_i32_to_8_mask2: 841; CHECK: # %bb.0: 842; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 843; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1} {z} 844; CHECK-NEXT: retq 845 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 846 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 847 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 848 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 849 ret <8 x i32> %res 850} 851define <8 x i32> @test_masked_i32_to_8_mask3(i32 %s, <8 x i32> %default, <8 x i32> %mask) { 852; CHECK-LABEL: test_masked_i32_to_8_mask3: 853; CHECK: # %bb.0: 854; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 855; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1} 856; CHECK-NEXT: retq 857 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 858 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 859 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 860 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default 861 ret <8 x i32> %res 862} 863 864define <8 x i32> @test_masked_z_i32_to_8_mask3(i32 %s, <8 x i32> %mask) { 865; CHECK-LABEL: test_masked_z_i32_to_8_mask3: 866; CHECK: # %bb.0: 867; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 868; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1} {z} 869; CHECK-NEXT: retq 870 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 871 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 872 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 873 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 874 ret <8 x i32> %res 875} 876define <16 x i32> @test_i32_to_16(i32 %s) { 877; CHECK-LABEL: test_i32_to_16: 878; CHECK: # %bb.0: 879; CHECK-NEXT: vpbroadcastd %edi, %zmm0 880; CHECK-NEXT: retq 881 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 882 %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 883 ret <16 x i32> %res 884} 885define <16 x i32> @test_masked_i32_to_16_mask0(i32 %s, <16 x i32> %default, <16 x i32> %mask) { 886; CHECK-LABEL: test_masked_i32_to_16_mask0: 887; CHECK: # %bb.0: 888; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 889; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1} 890; CHECK-NEXT: retq 891 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 892 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 893 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 894 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default 895 ret <16 x i32> %res 896} 897 898define <16 x i32> @test_masked_z_i32_to_16_mask0(i32 %s, <16 x i32> %mask) { 899; CHECK-LABEL: test_masked_z_i32_to_16_mask0: 900; CHECK: # %bb.0: 901; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 902; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1} {z} 903; CHECK-NEXT: retq 904 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 905 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 906 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 907 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 908 ret <16 x i32> %res 909} 910define <16 x i32> @test_masked_i32_to_16_mask1(i32 %s, <16 x i32> %default, <16 x i32> %mask) { 911; CHECK-LABEL: test_masked_i32_to_16_mask1: 912; CHECK: # %bb.0: 913; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 914; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1} 915; CHECK-NEXT: retq 916 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 917 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 918 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 919 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default 920 ret <16 x i32> %res 921} 922 923define <16 x i32> @test_masked_z_i32_to_16_mask1(i32 %s, <16 x i32> %mask) { 924; CHECK-LABEL: test_masked_z_i32_to_16_mask1: 925; CHECK: # %bb.0: 926; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 927; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1} {z} 928; CHECK-NEXT: retq 929 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 930 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 931 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 932 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 933 ret <16 x i32> %res 934} 935define <16 x i32> @test_masked_i32_to_16_mask2(i32 %s, <16 x i32> %default, <16 x i32> %mask) { 936; CHECK-LABEL: test_masked_i32_to_16_mask2: 937; CHECK: # %bb.0: 938; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 939; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1} 940; CHECK-NEXT: retq 941 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 942 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 943 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 944 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default 945 ret <16 x i32> %res 946} 947 948define <16 x i32> @test_masked_z_i32_to_16_mask2(i32 %s, <16 x i32> %mask) { 949; CHECK-LABEL: test_masked_z_i32_to_16_mask2: 950; CHECK: # %bb.0: 951; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 952; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1} {z} 953; CHECK-NEXT: retq 954 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 955 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 956 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 957 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 958 ret <16 x i32> %res 959} 960define <16 x i32> @test_masked_i32_to_16_mask3(i32 %s, <16 x i32> %default, <16 x i32> %mask) { 961; CHECK-LABEL: test_masked_i32_to_16_mask3: 962; CHECK: # %bb.0: 963; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 964; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1} 965; CHECK-NEXT: retq 966 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 967 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 968 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 969 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default 970 ret <16 x i32> %res 971} 972 973define <16 x i32> @test_masked_z_i32_to_16_mask3(i32 %s, <16 x i32> %mask) { 974; CHECK-LABEL: test_masked_z_i32_to_16_mask3: 975; CHECK: # %bb.0: 976; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 977; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1} {z} 978; CHECK-NEXT: retq 979 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 980 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 981 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 982 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 983 ret <16 x i32> %res 984} 985define <2 x i64> @test_i64_to_2(i64 %s) { 986; CHECK-LABEL: test_i64_to_2: 987; CHECK: # %bb.0: 988; CHECK-NEXT: vpbroadcastq %rdi, %xmm0 989; CHECK-NEXT: retq 990 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 991 %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 992 ret <2 x i64> %res 993} 994define <2 x i64> @test_masked_i64_to_2_mask0(i64 %s, <2 x i64> %default, <2 x i64> %mask) { 995; CHECK-LABEL: test_masked_i64_to_2_mask0: 996; CHECK: # %bb.0: 997; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 998; CHECK-NEXT: vpbroadcastq %rdi, %xmm0 {%k1} 999; CHECK-NEXT: retq 1000 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 1001 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 1002 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 1003 %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> %default 1004 ret <2 x i64> %res 1005} 1006 1007define <2 x i64> @test_masked_z_i64_to_2_mask0(i64 %s, <2 x i64> %mask) { 1008; CHECK-LABEL: test_masked_z_i64_to_2_mask0: 1009; CHECK: # %bb.0: 1010; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 1011; CHECK-NEXT: vpbroadcastq %rdi, %xmm0 {%k1} {z} 1012; CHECK-NEXT: retq 1013 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 1014 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 1015 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 1016 %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> zeroinitializer 1017 ret <2 x i64> %res 1018} 1019define <2 x i64> @test_masked_i64_to_2_mask1(i64 %s, <2 x i64> %default, <2 x i64> %mask) { 1020; CHECK-LABEL: test_masked_i64_to_2_mask1: 1021; CHECK: # %bb.0: 1022; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 1023; CHECK-NEXT: vpbroadcastq %rdi, %xmm0 {%k1} 1024; CHECK-NEXT: retq 1025 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 1026 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 1027 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 1028 %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> %default 1029 ret <2 x i64> %res 1030} 1031 1032define <2 x i64> @test_masked_z_i64_to_2_mask1(i64 %s, <2 x i64> %mask) { 1033; CHECK-LABEL: test_masked_z_i64_to_2_mask1: 1034; CHECK: # %bb.0: 1035; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 1036; CHECK-NEXT: vpbroadcastq %rdi, %xmm0 {%k1} {z} 1037; CHECK-NEXT: retq 1038 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 1039 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 1040 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 1041 %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> zeroinitializer 1042 ret <2 x i64> %res 1043} 1044define <4 x i64> @test_i64_to_4(i64 %s) { 1045; CHECK-LABEL: test_i64_to_4: 1046; CHECK: # %bb.0: 1047; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 1048; CHECK-NEXT: retq 1049 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 1050 %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1051 ret <4 x i64> %res 1052} 1053define <4 x i64> @test_masked_i64_to_4_mask0(i64 %s, <4 x i64> %default, <4 x i64> %mask) { 1054; CHECK-LABEL: test_masked_i64_to_4_mask0: 1055; CHECK: # %bb.0: 1056; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1057; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} 1058; CHECK-NEXT: retq 1059 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 1060 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1061 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1062 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default 1063 ret <4 x i64> %res 1064} 1065 1066define <4 x i64> @test_masked_z_i64_to_4_mask0(i64 %s, <4 x i64> %mask) { 1067; CHECK-LABEL: test_masked_z_i64_to_4_mask0: 1068; CHECK: # %bb.0: 1069; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 1070; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} {z} 1071; CHECK-NEXT: retq 1072 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 1073 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1074 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1075 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1076 ret <4 x i64> %res 1077} 1078define <4 x i64> @test_masked_i64_to_4_mask1(i64 %s, <4 x i64> %default, <4 x i64> %mask) { 1079; CHECK-LABEL: test_masked_i64_to_4_mask1: 1080; CHECK: # %bb.0: 1081; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1082; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} 1083; CHECK-NEXT: retq 1084 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 1085 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1086 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1087 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default 1088 ret <4 x i64> %res 1089} 1090 1091define <4 x i64> @test_masked_z_i64_to_4_mask1(i64 %s, <4 x i64> %mask) { 1092; CHECK-LABEL: test_masked_z_i64_to_4_mask1: 1093; CHECK: # %bb.0: 1094; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 1095; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} {z} 1096; CHECK-NEXT: retq 1097 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 1098 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1099 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1100 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1101 ret <4 x i64> %res 1102} 1103define <4 x i64> @test_masked_i64_to_4_mask2(i64 %s, <4 x i64> %default, <4 x i64> %mask) { 1104; CHECK-LABEL: test_masked_i64_to_4_mask2: 1105; CHECK: # %bb.0: 1106; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1107; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} 1108; CHECK-NEXT: retq 1109 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 1110 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1111 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1112 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default 1113 ret <4 x i64> %res 1114} 1115 1116define <4 x i64> @test_masked_z_i64_to_4_mask2(i64 %s, <4 x i64> %mask) { 1117; CHECK-LABEL: test_masked_z_i64_to_4_mask2: 1118; CHECK: # %bb.0: 1119; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 1120; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} {z} 1121; CHECK-NEXT: retq 1122 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 1123 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1124 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1125 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1126 ret <4 x i64> %res 1127} 1128define <4 x i64> @test_masked_i64_to_4_mask3(i64 %s, <4 x i64> %default, <4 x i64> %mask) { 1129; CHECK-LABEL: test_masked_i64_to_4_mask3: 1130; CHECK: # %bb.0: 1131; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1132; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} 1133; CHECK-NEXT: retq 1134 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 1135 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1136 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1137 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default 1138 ret <4 x i64> %res 1139} 1140 1141define <4 x i64> @test_masked_z_i64_to_4_mask3(i64 %s, <4 x i64> %mask) { 1142; CHECK-LABEL: test_masked_z_i64_to_4_mask3: 1143; CHECK: # %bb.0: 1144; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 1145; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} {z} 1146; CHECK-NEXT: retq 1147 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 1148 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1149 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1150 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1151 ret <4 x i64> %res 1152} 1153define <8 x i64> @test_i64_to_8(i64 %s) { 1154; CHECK-LABEL: test_i64_to_8: 1155; CHECK: # %bb.0: 1156; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 1157; CHECK-NEXT: retq 1158 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 1159 %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1160 ret <8 x i64> %res 1161} 1162define <8 x i64> @test_masked_i64_to_8_mask0(i64 %s, <8 x i64> %default, <8 x i64> %mask) { 1163; CHECK-LABEL: test_masked_i64_to_8_mask0: 1164; CHECK: # %bb.0: 1165; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1166; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} 1167; CHECK-NEXT: retq 1168 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 1169 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1170 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1171 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default 1172 ret <8 x i64> %res 1173} 1174 1175define <8 x i64> @test_masked_z_i64_to_8_mask0(i64 %s, <8 x i64> %mask) { 1176; CHECK-LABEL: test_masked_z_i64_to_8_mask0: 1177; CHECK: # %bb.0: 1178; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 1179; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} {z} 1180; CHECK-NEXT: retq 1181 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 1182 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1183 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1184 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1185 ret <8 x i64> %res 1186} 1187define <8 x i64> @test_masked_i64_to_8_mask1(i64 %s, <8 x i64> %default, <8 x i64> %mask) { 1188; CHECK-LABEL: test_masked_i64_to_8_mask1: 1189; CHECK: # %bb.0: 1190; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1191; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} 1192; CHECK-NEXT: retq 1193 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 1194 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1195 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1196 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default 1197 ret <8 x i64> %res 1198} 1199 1200define <8 x i64> @test_masked_z_i64_to_8_mask1(i64 %s, <8 x i64> %mask) { 1201; CHECK-LABEL: test_masked_z_i64_to_8_mask1: 1202; CHECK: # %bb.0: 1203; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 1204; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} {z} 1205; CHECK-NEXT: retq 1206 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 1207 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1208 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1209 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1210 ret <8 x i64> %res 1211} 1212define <8 x i64> @test_masked_i64_to_8_mask2(i64 %s, <8 x i64> %default, <8 x i64> %mask) { 1213; CHECK-LABEL: test_masked_i64_to_8_mask2: 1214; CHECK: # %bb.0: 1215; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1216; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} 1217; CHECK-NEXT: retq 1218 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 1219 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1220 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1221 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default 1222 ret <8 x i64> %res 1223} 1224 1225define <8 x i64> @test_masked_z_i64_to_8_mask2(i64 %s, <8 x i64> %mask) { 1226; CHECK-LABEL: test_masked_z_i64_to_8_mask2: 1227; CHECK: # %bb.0: 1228; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 1229; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} {z} 1230; CHECK-NEXT: retq 1231 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 1232 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1233 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1234 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1235 ret <8 x i64> %res 1236} 1237define <8 x i64> @test_masked_i64_to_8_mask3(i64 %s, <8 x i64> %default, <8 x i64> %mask) { 1238; CHECK-LABEL: test_masked_i64_to_8_mask3: 1239; CHECK: # %bb.0: 1240; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1241; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} 1242; CHECK-NEXT: retq 1243 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 1244 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1245 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1246 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default 1247 ret <8 x i64> %res 1248} 1249 1250define <8 x i64> @test_masked_z_i64_to_8_mask3(i64 %s, <8 x i64> %mask) { 1251; CHECK-LABEL: test_masked_z_i64_to_8_mask3: 1252; CHECK: # %bb.0: 1253; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 1254; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} {z} 1255; CHECK-NEXT: retq 1256 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 1257 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1258 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1259 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1260 ret <8 x i64> %res 1261} 1262define <16 x i8> @test_i8_to_16_mem(i8* %p) { 1263; CHECK-LABEL: test_i8_to_16_mem: 1264; CHECK: # %bb.0: 1265; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 1266; CHECK-NEXT: retq 1267 %s = load i8, i8* %p 1268 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1269 %res = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1270 ret <16 x i8> %res 1271} 1272define <16 x i8> @test_masked_i8_to_16_mem_mask0(i8* %p, <16 x i8> %default, <16 x i8> %mask) { 1273; CHECK-LABEL: test_masked_i8_to_16_mem_mask0: 1274; CHECK: # %bb.0: 1275; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 1276; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1} 1277; CHECK-NEXT: retq 1278 %s = load i8, i8* %p 1279 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1280 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1281 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 1282 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default 1283 ret <16 x i8> %res 1284} 1285 1286define <16 x i8> @test_masked_z_i8_to_16_mem_mask0(i8* %p, <16 x i8> %mask) { 1287; CHECK-LABEL: test_masked_z_i8_to_16_mem_mask0: 1288; CHECK: # %bb.0: 1289; CHECK-NEXT: vptestnmb %xmm0, %xmm0, %k1 1290; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1} {z} 1291; CHECK-NEXT: retq 1292 %s = load i8, i8* %p 1293 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1294 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1295 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 1296 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 1297 ret <16 x i8> %res 1298} 1299define <16 x i8> @test_masked_i8_to_16_mem_mask1(i8* %p, <16 x i8> %default, <16 x i8> %mask) { 1300; CHECK-LABEL: test_masked_i8_to_16_mem_mask1: 1301; CHECK: # %bb.0: 1302; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 1303; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1} 1304; CHECK-NEXT: retq 1305 %s = load i8, i8* %p 1306 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1307 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1308 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 1309 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default 1310 ret <16 x i8> %res 1311} 1312 1313define <16 x i8> @test_masked_z_i8_to_16_mem_mask1(i8* %p, <16 x i8> %mask) { 1314; CHECK-LABEL: test_masked_z_i8_to_16_mem_mask1: 1315; CHECK: # %bb.0: 1316; CHECK-NEXT: vptestnmb %xmm0, %xmm0, %k1 1317; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1} {z} 1318; CHECK-NEXT: retq 1319 %s = load i8, i8* %p 1320 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1321 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1322 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 1323 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 1324 ret <16 x i8> %res 1325} 1326define <16 x i8> @test_masked_i8_to_16_mem_mask2(i8* %p, <16 x i8> %default, <16 x i8> %mask) { 1327; CHECK-LABEL: test_masked_i8_to_16_mem_mask2: 1328; CHECK: # %bb.0: 1329; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 1330; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1} 1331; CHECK-NEXT: retq 1332 %s = load i8, i8* %p 1333 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1334 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1335 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 1336 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default 1337 ret <16 x i8> %res 1338} 1339 1340define <16 x i8> @test_masked_z_i8_to_16_mem_mask2(i8* %p, <16 x i8> %mask) { 1341; CHECK-LABEL: test_masked_z_i8_to_16_mem_mask2: 1342; CHECK: # %bb.0: 1343; CHECK-NEXT: vptestnmb %xmm0, %xmm0, %k1 1344; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1} {z} 1345; CHECK-NEXT: retq 1346 %s = load i8, i8* %p 1347 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1348 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1349 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 1350 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 1351 ret <16 x i8> %res 1352} 1353define <16 x i8> @test_masked_i8_to_16_mem_mask3(i8* %p, <16 x i8> %default, <16 x i8> %mask) { 1354; CHECK-LABEL: test_masked_i8_to_16_mem_mask3: 1355; CHECK: # %bb.0: 1356; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 1357; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1} 1358; CHECK-NEXT: retq 1359 %s = load i8, i8* %p 1360 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1361 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1362 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 1363 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default 1364 ret <16 x i8> %res 1365} 1366 1367define <16 x i8> @test_masked_z_i8_to_16_mem_mask3(i8* %p, <16 x i8> %mask) { 1368; CHECK-LABEL: test_masked_z_i8_to_16_mem_mask3: 1369; CHECK: # %bb.0: 1370; CHECK-NEXT: vptestnmb %xmm0, %xmm0, %k1 1371; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1} {z} 1372; CHECK-NEXT: retq 1373 %s = load i8, i8* %p 1374 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1375 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1376 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 1377 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 1378 ret <16 x i8> %res 1379} 1380define <32 x i8> @test_i8_to_32_mem(i8* %p) { 1381; CHECK-LABEL: test_i8_to_32_mem: 1382; CHECK: # %bb.0: 1383; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 1384; CHECK-NEXT: retq 1385 %s = load i8, i8* %p 1386 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1387 %res = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1388 ret <32 x i8> %res 1389} 1390define <32 x i8> @test_masked_i8_to_32_mem_mask0(i8* %p, <32 x i8> %default, <32 x i8> %mask) { 1391; CHECK-LABEL: test_masked_i8_to_32_mem_mask0: 1392; CHECK: # %bb.0: 1393; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 1394; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1} 1395; CHECK-NEXT: retq 1396 %s = load i8, i8* %p 1397 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1398 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1399 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 1400 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default 1401 ret <32 x i8> %res 1402} 1403 1404define <32 x i8> @test_masked_z_i8_to_32_mem_mask0(i8* %p, <32 x i8> %mask) { 1405; CHECK-LABEL: test_masked_z_i8_to_32_mem_mask0: 1406; CHECK: # %bb.0: 1407; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1 1408; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1} {z} 1409; CHECK-NEXT: retq 1410 %s = load i8, i8* %p 1411 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1412 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1413 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 1414 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 1415 ret <32 x i8> %res 1416} 1417define <32 x i8> @test_masked_i8_to_32_mem_mask1(i8* %p, <32 x i8> %default, <32 x i8> %mask) { 1418; CHECK-LABEL: test_masked_i8_to_32_mem_mask1: 1419; CHECK: # %bb.0: 1420; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 1421; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1} 1422; CHECK-NEXT: retq 1423 %s = load i8, i8* %p 1424 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1425 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1426 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 1427 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default 1428 ret <32 x i8> %res 1429} 1430 1431define <32 x i8> @test_masked_z_i8_to_32_mem_mask1(i8* %p, <32 x i8> %mask) { 1432; CHECK-LABEL: test_masked_z_i8_to_32_mem_mask1: 1433; CHECK: # %bb.0: 1434; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1 1435; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1} {z} 1436; CHECK-NEXT: retq 1437 %s = load i8, i8* %p 1438 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1439 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1440 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 1441 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 1442 ret <32 x i8> %res 1443} 1444define <32 x i8> @test_masked_i8_to_32_mem_mask2(i8* %p, <32 x i8> %default, <32 x i8> %mask) { 1445; CHECK-LABEL: test_masked_i8_to_32_mem_mask2: 1446; CHECK: # %bb.0: 1447; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 1448; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1} 1449; CHECK-NEXT: retq 1450 %s = load i8, i8* %p 1451 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1452 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1453 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 1454 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default 1455 ret <32 x i8> %res 1456} 1457 1458define <32 x i8> @test_masked_z_i8_to_32_mem_mask2(i8* %p, <32 x i8> %mask) { 1459; CHECK-LABEL: test_masked_z_i8_to_32_mem_mask2: 1460; CHECK: # %bb.0: 1461; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1 1462; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1} {z} 1463; CHECK-NEXT: retq 1464 %s = load i8, i8* %p 1465 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1466 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1467 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 1468 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 1469 ret <32 x i8> %res 1470} 1471define <32 x i8> @test_masked_i8_to_32_mem_mask3(i8* %p, <32 x i8> %default, <32 x i8> %mask) { 1472; CHECK-LABEL: test_masked_i8_to_32_mem_mask3: 1473; CHECK: # %bb.0: 1474; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 1475; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1} 1476; CHECK-NEXT: retq 1477 %s = load i8, i8* %p 1478 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1479 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1480 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 1481 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default 1482 ret <32 x i8> %res 1483} 1484 1485define <32 x i8> @test_masked_z_i8_to_32_mem_mask3(i8* %p, <32 x i8> %mask) { 1486; CHECK-LABEL: test_masked_z_i8_to_32_mem_mask3: 1487; CHECK: # %bb.0: 1488; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1 1489; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1} {z} 1490; CHECK-NEXT: retq 1491 %s = load i8, i8* %p 1492 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1493 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1494 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 1495 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 1496 ret <32 x i8> %res 1497} 1498define <64 x i8> @test_i8_to_64_mem(i8* %p) { 1499; CHECK-LABEL: test_i8_to_64_mem: 1500; CHECK: # %bb.0: 1501; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0 1502; CHECK-NEXT: retq 1503 %s = load i8, i8* %p 1504 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1505 %res = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1506 ret <64 x i8> %res 1507} 1508define <64 x i8> @test_masked_i8_to_64_mem_mask0(i8* %p, <64 x i8> %default, <64 x i8> %mask) { 1509; CHECK-LABEL: test_masked_i8_to_64_mem_mask0: 1510; CHECK: # %bb.0: 1511; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 1512; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0 {%k1} 1513; CHECK-NEXT: retq 1514 %s = load i8, i8* %p 1515 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1516 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1517 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 1518 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default 1519 ret <64 x i8> %res 1520} 1521 1522define <64 x i8> @test_masked_z_i8_to_64_mem_mask0(i8* %p, <64 x i8> %mask) { 1523; CHECK-LABEL: test_masked_z_i8_to_64_mem_mask0: 1524; CHECK: # %bb.0: 1525; CHECK-NEXT: vptestnmb %zmm0, %zmm0, %k1 1526; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0 {%k1} {z} 1527; CHECK-NEXT: retq 1528 %s = load i8, i8* %p 1529 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1530 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1531 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 1532 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 1533 ret <64 x i8> %res 1534} 1535define <64 x i8> @test_masked_i8_to_64_mem_mask1(i8* %p, <64 x i8> %default, <64 x i8> %mask) { 1536; CHECK-LABEL: test_masked_i8_to_64_mem_mask1: 1537; CHECK: # %bb.0: 1538; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 1539; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0 {%k1} 1540; CHECK-NEXT: retq 1541 %s = load i8, i8* %p 1542 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1543 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1544 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 1545 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default 1546 ret <64 x i8> %res 1547} 1548 1549define <64 x i8> @test_masked_z_i8_to_64_mem_mask1(i8* %p, <64 x i8> %mask) { 1550; CHECK-LABEL: test_masked_z_i8_to_64_mem_mask1: 1551; CHECK: # %bb.0: 1552; CHECK-NEXT: vptestnmb %zmm0, %zmm0, %k1 1553; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0 {%k1} {z} 1554; CHECK-NEXT: retq 1555 %s = load i8, i8* %p 1556 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1557 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1558 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 1559 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 1560 ret <64 x i8> %res 1561} 1562define <64 x i8> @test_masked_i8_to_64_mem_mask2(i8* %p, <64 x i8> %default, <64 x i8> %mask) { 1563; CHECK-LABEL: test_masked_i8_to_64_mem_mask2: 1564; CHECK: # %bb.0: 1565; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 1566; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0 {%k1} 1567; CHECK-NEXT: retq 1568 %s = load i8, i8* %p 1569 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1570 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1571 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 1572 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default 1573 ret <64 x i8> %res 1574} 1575 1576define <64 x i8> @test_masked_z_i8_to_64_mem_mask2(i8* %p, <64 x i8> %mask) { 1577; CHECK-LABEL: test_masked_z_i8_to_64_mem_mask2: 1578; CHECK: # %bb.0: 1579; CHECK-NEXT: vptestnmb %zmm0, %zmm0, %k1 1580; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0 {%k1} {z} 1581; CHECK-NEXT: retq 1582 %s = load i8, i8* %p 1583 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1584 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1585 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 1586 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 1587 ret <64 x i8> %res 1588} 1589define <64 x i8> @test_masked_i8_to_64_mem_mask3(i8* %p, <64 x i8> %default, <64 x i8> %mask) { 1590; CHECK-LABEL: test_masked_i8_to_64_mem_mask3: 1591; CHECK: # %bb.0: 1592; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 1593; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0 {%k1} 1594; CHECK-NEXT: retq 1595 %s = load i8, i8* %p 1596 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1597 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1598 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 1599 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default 1600 ret <64 x i8> %res 1601} 1602 1603define <64 x i8> @test_masked_z_i8_to_64_mem_mask3(i8* %p, <64 x i8> %mask) { 1604; CHECK-LABEL: test_masked_z_i8_to_64_mem_mask3: 1605; CHECK: # %bb.0: 1606; CHECK-NEXT: vptestnmb %zmm0, %zmm0, %k1 1607; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0 {%k1} {z} 1608; CHECK-NEXT: retq 1609 %s = load i8, i8* %p 1610 %vec = insertelement <2 x i8> undef, i8 %s, i32 0 1611 %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1612 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 1613 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 1614 ret <64 x i8> %res 1615} 1616define <8 x i16> @test_i16_to_8_mem(i16* %p) { 1617; CHECK-LABEL: test_i16_to_8_mem: 1618; CHECK: # %bb.0: 1619; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 1620; CHECK-NEXT: retq 1621 %s = load i16, i16* %p 1622 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1623 %res = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1624 ret <8 x i16> %res 1625} 1626define <8 x i16> @test_masked_i16_to_8_mem_mask0(i16* %p, <8 x i16> %default, <8 x i16> %mask) { 1627; CHECK-LABEL: test_masked_i16_to_8_mem_mask0: 1628; CHECK: # %bb.0: 1629; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 1630; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1} 1631; CHECK-NEXT: retq 1632 %s = load i16, i16* %p 1633 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1634 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1635 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1636 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default 1637 ret <8 x i16> %res 1638} 1639 1640define <8 x i16> @test_masked_z_i16_to_8_mem_mask0(i16* %p, <8 x i16> %mask) { 1641; CHECK-LABEL: test_masked_z_i16_to_8_mem_mask0: 1642; CHECK: # %bb.0: 1643; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 1644; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1} {z} 1645; CHECK-NEXT: retq 1646 %s = load i16, i16* %p 1647 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1648 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1649 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1650 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 1651 ret <8 x i16> %res 1652} 1653define <8 x i16> @test_masked_i16_to_8_mem_mask1(i16* %p, <8 x i16> %default, <8 x i16> %mask) { 1654; CHECK-LABEL: test_masked_i16_to_8_mem_mask1: 1655; CHECK: # %bb.0: 1656; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 1657; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1} 1658; CHECK-NEXT: retq 1659 %s = load i16, i16* %p 1660 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1661 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1662 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1663 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default 1664 ret <8 x i16> %res 1665} 1666 1667define <8 x i16> @test_masked_z_i16_to_8_mem_mask1(i16* %p, <8 x i16> %mask) { 1668; CHECK-LABEL: test_masked_z_i16_to_8_mem_mask1: 1669; CHECK: # %bb.0: 1670; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 1671; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1} {z} 1672; CHECK-NEXT: retq 1673 %s = load i16, i16* %p 1674 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1675 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1676 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1677 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 1678 ret <8 x i16> %res 1679} 1680define <8 x i16> @test_masked_i16_to_8_mem_mask2(i16* %p, <8 x i16> %default, <8 x i16> %mask) { 1681; CHECK-LABEL: test_masked_i16_to_8_mem_mask2: 1682; CHECK: # %bb.0: 1683; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 1684; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1} 1685; CHECK-NEXT: retq 1686 %s = load i16, i16* %p 1687 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1688 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1689 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1690 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default 1691 ret <8 x i16> %res 1692} 1693 1694define <8 x i16> @test_masked_z_i16_to_8_mem_mask2(i16* %p, <8 x i16> %mask) { 1695; CHECK-LABEL: test_masked_z_i16_to_8_mem_mask2: 1696; CHECK: # %bb.0: 1697; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 1698; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1} {z} 1699; CHECK-NEXT: retq 1700 %s = load i16, i16* %p 1701 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1702 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1703 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1704 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 1705 ret <8 x i16> %res 1706} 1707define <8 x i16> @test_masked_i16_to_8_mem_mask3(i16* %p, <8 x i16> %default, <8 x i16> %mask) { 1708; CHECK-LABEL: test_masked_i16_to_8_mem_mask3: 1709; CHECK: # %bb.0: 1710; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 1711; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1} 1712; CHECK-NEXT: retq 1713 %s = load i16, i16* %p 1714 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1715 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1716 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1717 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default 1718 ret <8 x i16> %res 1719} 1720 1721define <8 x i16> @test_masked_z_i16_to_8_mem_mask3(i16* %p, <8 x i16> %mask) { 1722; CHECK-LABEL: test_masked_z_i16_to_8_mem_mask3: 1723; CHECK: # %bb.0: 1724; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 1725; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1} {z} 1726; CHECK-NEXT: retq 1727 %s = load i16, i16* %p 1728 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1729 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1730 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1731 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 1732 ret <8 x i16> %res 1733} 1734define <16 x i16> @test_i16_to_16_mem(i16* %p) { 1735; CHECK-LABEL: test_i16_to_16_mem: 1736; CHECK: # %bb.0: 1737; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 1738; CHECK-NEXT: retq 1739 %s = load i16, i16* %p 1740 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1741 %res = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1742 ret <16 x i16> %res 1743} 1744define <16 x i16> @test_masked_i16_to_16_mem_mask0(i16* %p, <16 x i16> %default, <16 x i16> %mask) { 1745; CHECK-LABEL: test_masked_i16_to_16_mem_mask0: 1746; CHECK: # %bb.0: 1747; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1748; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1} 1749; CHECK-NEXT: retq 1750 %s = load i16, i16* %p 1751 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1752 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1753 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1754 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default 1755 ret <16 x i16> %res 1756} 1757 1758define <16 x i16> @test_masked_z_i16_to_16_mem_mask0(i16* %p, <16 x i16> %mask) { 1759; CHECK-LABEL: test_masked_z_i16_to_16_mem_mask0: 1760; CHECK: # %bb.0: 1761; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 1762; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1} {z} 1763; CHECK-NEXT: retq 1764 %s = load i16, i16* %p 1765 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1766 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1767 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1768 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1769 ret <16 x i16> %res 1770} 1771define <16 x i16> @test_masked_i16_to_16_mem_mask1(i16* %p, <16 x i16> %default, <16 x i16> %mask) { 1772; CHECK-LABEL: test_masked_i16_to_16_mem_mask1: 1773; CHECK: # %bb.0: 1774; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1775; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1} 1776; CHECK-NEXT: retq 1777 %s = load i16, i16* %p 1778 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1779 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1780 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1781 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default 1782 ret <16 x i16> %res 1783} 1784 1785define <16 x i16> @test_masked_z_i16_to_16_mem_mask1(i16* %p, <16 x i16> %mask) { 1786; CHECK-LABEL: test_masked_z_i16_to_16_mem_mask1: 1787; CHECK: # %bb.0: 1788; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 1789; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1} {z} 1790; CHECK-NEXT: retq 1791 %s = load i16, i16* %p 1792 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1793 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1794 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1795 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1796 ret <16 x i16> %res 1797} 1798define <16 x i16> @test_masked_i16_to_16_mem_mask2(i16* %p, <16 x i16> %default, <16 x i16> %mask) { 1799; CHECK-LABEL: test_masked_i16_to_16_mem_mask2: 1800; CHECK: # %bb.0: 1801; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1802; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1} 1803; CHECK-NEXT: retq 1804 %s = load i16, i16* %p 1805 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1806 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1807 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1808 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default 1809 ret <16 x i16> %res 1810} 1811 1812define <16 x i16> @test_masked_z_i16_to_16_mem_mask2(i16* %p, <16 x i16> %mask) { 1813; CHECK-LABEL: test_masked_z_i16_to_16_mem_mask2: 1814; CHECK: # %bb.0: 1815; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 1816; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1} {z} 1817; CHECK-NEXT: retq 1818 %s = load i16, i16* %p 1819 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1820 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1821 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1822 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1823 ret <16 x i16> %res 1824} 1825define <16 x i16> @test_masked_i16_to_16_mem_mask3(i16* %p, <16 x i16> %default, <16 x i16> %mask) { 1826; CHECK-LABEL: test_masked_i16_to_16_mem_mask3: 1827; CHECK: # %bb.0: 1828; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1829; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1} 1830; CHECK-NEXT: retq 1831 %s = load i16, i16* %p 1832 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1833 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1834 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1835 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default 1836 ret <16 x i16> %res 1837} 1838 1839define <16 x i16> @test_masked_z_i16_to_16_mem_mask3(i16* %p, <16 x i16> %mask) { 1840; CHECK-LABEL: test_masked_z_i16_to_16_mem_mask3: 1841; CHECK: # %bb.0: 1842; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 1843; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1} {z} 1844; CHECK-NEXT: retq 1845 %s = load i16, i16* %p 1846 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1847 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1848 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1849 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1850 ret <16 x i16> %res 1851} 1852define <32 x i16> @test_i16_to_32_mem(i16* %p) { 1853; CHECK-LABEL: test_i16_to_32_mem: 1854; CHECK: # %bb.0: 1855; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0 1856; CHECK-NEXT: retq 1857 %s = load i16, i16* %p 1858 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1859 %res = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1860 ret <32 x i16> %res 1861} 1862define <32 x i16> @test_masked_i16_to_32_mem_mask0(i16* %p, <32 x i16> %default, <32 x i16> %mask) { 1863; CHECK-LABEL: test_masked_i16_to_32_mem_mask0: 1864; CHECK: # %bb.0: 1865; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1866; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0 {%k1} 1867; CHECK-NEXT: retq 1868 %s = load i16, i16* %p 1869 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1870 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1871 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1872 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default 1873 ret <32 x i16> %res 1874} 1875 1876define <32 x i16> @test_masked_z_i16_to_32_mem_mask0(i16* %p, <32 x i16> %mask) { 1877; CHECK-LABEL: test_masked_z_i16_to_32_mem_mask0: 1878; CHECK: # %bb.0: 1879; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 1880; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0 {%k1} {z} 1881; CHECK-NEXT: retq 1882 %s = load i16, i16* %p 1883 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1884 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1885 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1886 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1887 ret <32 x i16> %res 1888} 1889define <32 x i16> @test_masked_i16_to_32_mem_mask1(i16* %p, <32 x i16> %default, <32 x i16> %mask) { 1890; CHECK-LABEL: test_masked_i16_to_32_mem_mask1: 1891; CHECK: # %bb.0: 1892; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1893; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0 {%k1} 1894; CHECK-NEXT: retq 1895 %s = load i16, i16* %p 1896 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1897 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1898 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1899 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default 1900 ret <32 x i16> %res 1901} 1902 1903define <32 x i16> @test_masked_z_i16_to_32_mem_mask1(i16* %p, <32 x i16> %mask) { 1904; CHECK-LABEL: test_masked_z_i16_to_32_mem_mask1: 1905; CHECK: # %bb.0: 1906; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 1907; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0 {%k1} {z} 1908; CHECK-NEXT: retq 1909 %s = load i16, i16* %p 1910 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1911 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1912 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1913 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1914 ret <32 x i16> %res 1915} 1916define <32 x i16> @test_masked_i16_to_32_mem_mask2(i16* %p, <32 x i16> %default, <32 x i16> %mask) { 1917; CHECK-LABEL: test_masked_i16_to_32_mem_mask2: 1918; CHECK: # %bb.0: 1919; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1920; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0 {%k1} 1921; CHECK-NEXT: retq 1922 %s = load i16, i16* %p 1923 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1924 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1925 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1926 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default 1927 ret <32 x i16> %res 1928} 1929 1930define <32 x i16> @test_masked_z_i16_to_32_mem_mask2(i16* %p, <32 x i16> %mask) { 1931; CHECK-LABEL: test_masked_z_i16_to_32_mem_mask2: 1932; CHECK: # %bb.0: 1933; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 1934; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0 {%k1} {z} 1935; CHECK-NEXT: retq 1936 %s = load i16, i16* %p 1937 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1938 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1939 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1940 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1941 ret <32 x i16> %res 1942} 1943define <32 x i16> @test_masked_i16_to_32_mem_mask3(i16* %p, <32 x i16> %default, <32 x i16> %mask) { 1944; CHECK-LABEL: test_masked_i16_to_32_mem_mask3: 1945; CHECK: # %bb.0: 1946; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1947; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0 {%k1} 1948; CHECK-NEXT: retq 1949 %s = load i16, i16* %p 1950 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1951 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1952 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1953 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default 1954 ret <32 x i16> %res 1955} 1956 1957define <32 x i16> @test_masked_z_i16_to_32_mem_mask3(i16* %p, <32 x i16> %mask) { 1958; CHECK-LABEL: test_masked_z_i16_to_32_mem_mask3: 1959; CHECK: # %bb.0: 1960; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 1961; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0 {%k1} {z} 1962; CHECK-NEXT: retq 1963 %s = load i16, i16* %p 1964 %vec = insertelement <2 x i16> undef, i16 %s, i32 0 1965 %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1966 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1967 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1968 ret <32 x i16> %res 1969} 1970define <4 x i32> @test_i32_to_4_mem(i32* %p) { 1971; CHECK-LABEL: test_i32_to_4_mem: 1972; CHECK: # %bb.0: 1973; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 1974; CHECK-NEXT: retq 1975 %s = load i32, i32* %p 1976 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 1977 %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1978 ret <4 x i32> %res 1979} 1980define <4 x i32> @test_masked_i32_to_4_mem_mask0(i32* %p, <4 x i32> %default, <4 x i32> %mask) { 1981; CHECK-LABEL: test_masked_i32_to_4_mem_mask0: 1982; CHECK: # %bb.0: 1983; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 1984; CHECK-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1} 1985; CHECK-NEXT: retq 1986 %s = load i32, i32* %p 1987 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 1988 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1989 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 1990 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default 1991 ret <4 x i32> %res 1992} 1993 1994define <4 x i32> @test_masked_z_i32_to_4_mem_mask0(i32* %p, <4 x i32> %mask) { 1995; CHECK-LABEL: test_masked_z_i32_to_4_mem_mask0: 1996; CHECK: # %bb.0: 1997; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 1998; CHECK-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1} {z} 1999; CHECK-NEXT: retq 2000 %s = load i32, i32* %p 2001 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 2002 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 2003 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2004 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 2005 ret <4 x i32> %res 2006} 2007define <4 x i32> @test_masked_i32_to_4_mem_mask1(i32* %p, <4 x i32> %default, <4 x i32> %mask) { 2008; CHECK-LABEL: test_masked_i32_to_4_mem_mask1: 2009; CHECK: # %bb.0: 2010; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2011; CHECK-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1} 2012; CHECK-NEXT: retq 2013 %s = load i32, i32* %p 2014 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 2015 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 2016 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2017 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default 2018 ret <4 x i32> %res 2019} 2020 2021define <4 x i32> @test_masked_z_i32_to_4_mem_mask1(i32* %p, <4 x i32> %mask) { 2022; CHECK-LABEL: test_masked_z_i32_to_4_mem_mask1: 2023; CHECK: # %bb.0: 2024; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 2025; CHECK-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1} {z} 2026; CHECK-NEXT: retq 2027 %s = load i32, i32* %p 2028 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 2029 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 2030 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2031 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 2032 ret <4 x i32> %res 2033} 2034define <4 x i32> @test_masked_i32_to_4_mem_mask2(i32* %p, <4 x i32> %default, <4 x i32> %mask) { 2035; CHECK-LABEL: test_masked_i32_to_4_mem_mask2: 2036; CHECK: # %bb.0: 2037; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2038; CHECK-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1} 2039; CHECK-NEXT: retq 2040 %s = load i32, i32* %p 2041 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 2042 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 2043 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2044 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default 2045 ret <4 x i32> %res 2046} 2047 2048define <4 x i32> @test_masked_z_i32_to_4_mem_mask2(i32* %p, <4 x i32> %mask) { 2049; CHECK-LABEL: test_masked_z_i32_to_4_mem_mask2: 2050; CHECK: # %bb.0: 2051; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 2052; CHECK-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1} {z} 2053; CHECK-NEXT: retq 2054 %s = load i32, i32* %p 2055 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 2056 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 2057 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2058 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 2059 ret <4 x i32> %res 2060} 2061define <4 x i32> @test_masked_i32_to_4_mem_mask3(i32* %p, <4 x i32> %default, <4 x i32> %mask) { 2062; CHECK-LABEL: test_masked_i32_to_4_mem_mask3: 2063; CHECK: # %bb.0: 2064; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2065; CHECK-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1} 2066; CHECK-NEXT: retq 2067 %s = load i32, i32* %p 2068 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 2069 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 2070 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2071 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default 2072 ret <4 x i32> %res 2073} 2074 2075define <4 x i32> @test_masked_z_i32_to_4_mem_mask3(i32* %p, <4 x i32> %mask) { 2076; CHECK-LABEL: test_masked_z_i32_to_4_mem_mask3: 2077; CHECK: # %bb.0: 2078; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 2079; CHECK-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1} {z} 2080; CHECK-NEXT: retq 2081 %s = load i32, i32* %p 2082 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 2083 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 2084 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2085 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 2086 ret <4 x i32> %res 2087} 2088define <8 x i32> @test_i32_to_8_mem(i32* %p) { 2089; CHECK-LABEL: test_i32_to_8_mem: 2090; CHECK: # %bb.0: 2091; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 2092; CHECK-NEXT: retq 2093 %s = load i32, i32* %p 2094 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 2095 %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2096 ret <8 x i32> %res 2097} 2098define <8 x i32> @test_masked_i32_to_8_mem_mask0(i32* %p, <8 x i32> %default, <8 x i32> %mask) { 2099; CHECK-LABEL: test_masked_i32_to_8_mem_mask0: 2100; CHECK: # %bb.0: 2101; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2102; CHECK-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1} 2103; CHECK-NEXT: retq 2104 %s = load i32, i32* %p 2105 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 2106 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2107 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2108 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default 2109 ret <8 x i32> %res 2110} 2111 2112define <8 x i32> @test_masked_z_i32_to_8_mem_mask0(i32* %p, <8 x i32> %mask) { 2113; CHECK-LABEL: test_masked_z_i32_to_8_mem_mask0: 2114; CHECK: # %bb.0: 2115; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 2116; CHECK-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1} {z} 2117; CHECK-NEXT: retq 2118 %s = load i32, i32* %p 2119 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 2120 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2121 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2122 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 2123 ret <8 x i32> %res 2124} 2125define <8 x i32> @test_masked_i32_to_8_mem_mask1(i32* %p, <8 x i32> %default, <8 x i32> %mask) { 2126; CHECK-LABEL: test_masked_i32_to_8_mem_mask1: 2127; CHECK: # %bb.0: 2128; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2129; CHECK-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1} 2130; CHECK-NEXT: retq 2131 %s = load i32, i32* %p 2132 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 2133 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2134 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2135 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default 2136 ret <8 x i32> %res 2137} 2138 2139define <8 x i32> @test_masked_z_i32_to_8_mem_mask1(i32* %p, <8 x i32> %mask) { 2140; CHECK-LABEL: test_masked_z_i32_to_8_mem_mask1: 2141; CHECK: # %bb.0: 2142; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 2143; CHECK-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1} {z} 2144; CHECK-NEXT: retq 2145 %s = load i32, i32* %p 2146 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 2147 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2148 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2149 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 2150 ret <8 x i32> %res 2151} 2152define <8 x i32> @test_masked_i32_to_8_mem_mask2(i32* %p, <8 x i32> %default, <8 x i32> %mask) { 2153; CHECK-LABEL: test_masked_i32_to_8_mem_mask2: 2154; CHECK: # %bb.0: 2155; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2156; CHECK-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1} 2157; CHECK-NEXT: retq 2158 %s = load i32, i32* %p 2159 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 2160 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2161 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2162 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default 2163 ret <8 x i32> %res 2164} 2165 2166define <8 x i32> @test_masked_z_i32_to_8_mem_mask2(i32* %p, <8 x i32> %mask) { 2167; CHECK-LABEL: test_masked_z_i32_to_8_mem_mask2: 2168; CHECK: # %bb.0: 2169; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 2170; CHECK-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1} {z} 2171; CHECK-NEXT: retq 2172 %s = load i32, i32* %p 2173 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 2174 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2175 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2176 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 2177 ret <8 x i32> %res 2178} 2179define <8 x i32> @test_masked_i32_to_8_mem_mask3(i32* %p, <8 x i32> %default, <8 x i32> %mask) { 2180; CHECK-LABEL: test_masked_i32_to_8_mem_mask3: 2181; CHECK: # %bb.0: 2182; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2183; CHECK-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1} 2184; CHECK-NEXT: retq 2185 %s = load i32, i32* %p 2186 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 2187 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2188 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2189 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default 2190 ret <8 x i32> %res 2191} 2192 2193define <8 x i32> @test_masked_z_i32_to_8_mem_mask3(i32* %p, <8 x i32> %mask) { 2194; CHECK-LABEL: test_masked_z_i32_to_8_mem_mask3: 2195; CHECK: # %bb.0: 2196; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 2197; CHECK-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1} {z} 2198; CHECK-NEXT: retq 2199 %s = load i32, i32* %p 2200 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 2201 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2202 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2203 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 2204 ret <8 x i32> %res 2205} 2206define <16 x i32> @test_i32_to_16_mem(i32* %p) { 2207; CHECK-LABEL: test_i32_to_16_mem: 2208; CHECK: # %bb.0: 2209; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 2210; CHECK-NEXT: retq 2211 %s = load i32, i32* %p 2212 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 2213 %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2214 ret <16 x i32> %res 2215} 2216define <16 x i32> @test_masked_i32_to_16_mem_mask0(i32* %p, <16 x i32> %default, <16 x i32> %mask) { 2217; CHECK-LABEL: test_masked_i32_to_16_mem_mask0: 2218; CHECK: # %bb.0: 2219; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2220; CHECK-NEXT: vpbroadcastd (%rdi), %zmm0 {%k1} 2221; CHECK-NEXT: retq 2222 %s = load i32, i32* %p 2223 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 2224 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2225 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2226 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default 2227 ret <16 x i32> %res 2228} 2229 2230define <16 x i32> @test_masked_z_i32_to_16_mem_mask0(i32* %p, <16 x i32> %mask) { 2231; CHECK-LABEL: test_masked_z_i32_to_16_mem_mask0: 2232; CHECK: # %bb.0: 2233; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 2234; CHECK-NEXT: vpbroadcastd (%rdi), %zmm0 {%k1} {z} 2235; CHECK-NEXT: retq 2236 %s = load i32, i32* %p 2237 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 2238 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2239 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2240 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 2241 ret <16 x i32> %res 2242} 2243define <16 x i32> @test_masked_i32_to_16_mem_mask1(i32* %p, <16 x i32> %default, <16 x i32> %mask) { 2244; CHECK-LABEL: test_masked_i32_to_16_mem_mask1: 2245; CHECK: # %bb.0: 2246; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2247; CHECK-NEXT: vpbroadcastd (%rdi), %zmm0 {%k1} 2248; CHECK-NEXT: retq 2249 %s = load i32, i32* %p 2250 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 2251 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2252 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2253 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default 2254 ret <16 x i32> %res 2255} 2256 2257define <16 x i32> @test_masked_z_i32_to_16_mem_mask1(i32* %p, <16 x i32> %mask) { 2258; CHECK-LABEL: test_masked_z_i32_to_16_mem_mask1: 2259; CHECK: # %bb.0: 2260; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 2261; CHECK-NEXT: vpbroadcastd (%rdi), %zmm0 {%k1} {z} 2262; CHECK-NEXT: retq 2263 %s = load i32, i32* %p 2264 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 2265 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2266 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2267 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 2268 ret <16 x i32> %res 2269} 2270define <16 x i32> @test_masked_i32_to_16_mem_mask2(i32* %p, <16 x i32> %default, <16 x i32> %mask) { 2271; CHECK-LABEL: test_masked_i32_to_16_mem_mask2: 2272; CHECK: # %bb.0: 2273; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2274; CHECK-NEXT: vpbroadcastd (%rdi), %zmm0 {%k1} 2275; CHECK-NEXT: retq 2276 %s = load i32, i32* %p 2277 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 2278 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2279 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2280 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default 2281 ret <16 x i32> %res 2282} 2283 2284define <16 x i32> @test_masked_z_i32_to_16_mem_mask2(i32* %p, <16 x i32> %mask) { 2285; CHECK-LABEL: test_masked_z_i32_to_16_mem_mask2: 2286; CHECK: # %bb.0: 2287; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 2288; CHECK-NEXT: vpbroadcastd (%rdi), %zmm0 {%k1} {z} 2289; CHECK-NEXT: retq 2290 %s = load i32, i32* %p 2291 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 2292 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2293 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2294 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 2295 ret <16 x i32> %res 2296} 2297define <16 x i32> @test_masked_i32_to_16_mem_mask3(i32* %p, <16 x i32> %default, <16 x i32> %mask) { 2298; CHECK-LABEL: test_masked_i32_to_16_mem_mask3: 2299; CHECK: # %bb.0: 2300; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2301; CHECK-NEXT: vpbroadcastd (%rdi), %zmm0 {%k1} 2302; CHECK-NEXT: retq 2303 %s = load i32, i32* %p 2304 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 2305 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2306 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2307 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default 2308 ret <16 x i32> %res 2309} 2310 2311define <16 x i32> @test_masked_z_i32_to_16_mem_mask3(i32* %p, <16 x i32> %mask) { 2312; CHECK-LABEL: test_masked_z_i32_to_16_mem_mask3: 2313; CHECK: # %bb.0: 2314; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 2315; CHECK-NEXT: vpbroadcastd (%rdi), %zmm0 {%k1} {z} 2316; CHECK-NEXT: retq 2317 %s = load i32, i32* %p 2318 %vec = insertelement <2 x i32> undef, i32 %s, i32 0 2319 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2320 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2321 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 2322 ret <16 x i32> %res 2323} 2324define <2 x i64> @test_i64_to_2_mem(i64* %p) { 2325; CHECK-LABEL: test_i64_to_2_mem: 2326; CHECK: # %bb.0: 2327; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 2328; CHECK-NEXT: retq 2329 %s = load i64, i64* %p 2330 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 2331 %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 2332 ret <2 x i64> %res 2333} 2334define <2 x i64> @test_masked_i64_to_2_mem_mask0(i64* %p, <2 x i64> %default, <2 x i64> %mask) { 2335; CHECK-LABEL: test_masked_i64_to_2_mem_mask0: 2336; CHECK: # %bb.0: 2337; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 2338; CHECK-NEXT: vpbroadcastq (%rdi), %xmm0 {%k1} 2339; CHECK-NEXT: retq 2340 %s = load i64, i64* %p 2341 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 2342 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 2343 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 2344 %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> %default 2345 ret <2 x i64> %res 2346} 2347 2348define <2 x i64> @test_masked_z_i64_to_2_mem_mask0(i64* %p, <2 x i64> %mask) { 2349; CHECK-LABEL: test_masked_z_i64_to_2_mem_mask0: 2350; CHECK: # %bb.0: 2351; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 2352; CHECK-NEXT: vpbroadcastq (%rdi), %xmm0 {%k1} {z} 2353; CHECK-NEXT: retq 2354 %s = load i64, i64* %p 2355 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 2356 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 2357 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 2358 %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> zeroinitializer 2359 ret <2 x i64> %res 2360} 2361define <2 x i64> @test_masked_i64_to_2_mem_mask1(i64* %p, <2 x i64> %default, <2 x i64> %mask) { 2362; CHECK-LABEL: test_masked_i64_to_2_mem_mask1: 2363; CHECK: # %bb.0: 2364; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 2365; CHECK-NEXT: vpbroadcastq (%rdi), %xmm0 {%k1} 2366; CHECK-NEXT: retq 2367 %s = load i64, i64* %p 2368 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 2369 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 2370 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 2371 %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> %default 2372 ret <2 x i64> %res 2373} 2374 2375define <2 x i64> @test_masked_z_i64_to_2_mem_mask1(i64* %p, <2 x i64> %mask) { 2376; CHECK-LABEL: test_masked_z_i64_to_2_mem_mask1: 2377; CHECK: # %bb.0: 2378; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 2379; CHECK-NEXT: vpbroadcastq (%rdi), %xmm0 {%k1} {z} 2380; CHECK-NEXT: retq 2381 %s = load i64, i64* %p 2382 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 2383 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 2384 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 2385 %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> zeroinitializer 2386 ret <2 x i64> %res 2387} 2388define <4 x i64> @test_i64_to_4_mem(i64* %p) { 2389; CHECK-LABEL: test_i64_to_4_mem: 2390; CHECK: # %bb.0: 2391; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 2392; CHECK-NEXT: retq 2393 %s = load i64, i64* %p 2394 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 2395 %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 2396 ret <4 x i64> %res 2397} 2398define <4 x i64> @test_masked_i64_to_4_mem_mask0(i64* %p, <4 x i64> %default, <4 x i64> %mask) { 2399; CHECK-LABEL: test_masked_i64_to_4_mem_mask0: 2400; CHECK: # %bb.0: 2401; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 2402; CHECK-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1} 2403; CHECK-NEXT: retq 2404 %s = load i64, i64* %p 2405 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 2406 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 2407 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 2408 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default 2409 ret <4 x i64> %res 2410} 2411 2412define <4 x i64> @test_masked_z_i64_to_4_mem_mask0(i64* %p, <4 x i64> %mask) { 2413; CHECK-LABEL: test_masked_z_i64_to_4_mem_mask0: 2414; CHECK: # %bb.0: 2415; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 2416; CHECK-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1} {z} 2417; CHECK-NEXT: retq 2418 %s = load i64, i64* %p 2419 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 2420 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 2421 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 2422 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 2423 ret <4 x i64> %res 2424} 2425define <4 x i64> @test_masked_i64_to_4_mem_mask1(i64* %p, <4 x i64> %default, <4 x i64> %mask) { 2426; CHECK-LABEL: test_masked_i64_to_4_mem_mask1: 2427; CHECK: # %bb.0: 2428; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 2429; CHECK-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1} 2430; CHECK-NEXT: retq 2431 %s = load i64, i64* %p 2432 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 2433 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 2434 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 2435 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default 2436 ret <4 x i64> %res 2437} 2438 2439define <4 x i64> @test_masked_z_i64_to_4_mem_mask1(i64* %p, <4 x i64> %mask) { 2440; CHECK-LABEL: test_masked_z_i64_to_4_mem_mask1: 2441; CHECK: # %bb.0: 2442; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 2443; CHECK-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1} {z} 2444; CHECK-NEXT: retq 2445 %s = load i64, i64* %p 2446 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 2447 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 2448 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 2449 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 2450 ret <4 x i64> %res 2451} 2452define <4 x i64> @test_masked_i64_to_4_mem_mask2(i64* %p, <4 x i64> %default, <4 x i64> %mask) { 2453; CHECK-LABEL: test_masked_i64_to_4_mem_mask2: 2454; CHECK: # %bb.0: 2455; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 2456; CHECK-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1} 2457; CHECK-NEXT: retq 2458 %s = load i64, i64* %p 2459 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 2460 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 2461 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 2462 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default 2463 ret <4 x i64> %res 2464} 2465 2466define <4 x i64> @test_masked_z_i64_to_4_mem_mask2(i64* %p, <4 x i64> %mask) { 2467; CHECK-LABEL: test_masked_z_i64_to_4_mem_mask2: 2468; CHECK: # %bb.0: 2469; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 2470; CHECK-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1} {z} 2471; CHECK-NEXT: retq 2472 %s = load i64, i64* %p 2473 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 2474 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 2475 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 2476 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 2477 ret <4 x i64> %res 2478} 2479define <4 x i64> @test_masked_i64_to_4_mem_mask3(i64* %p, <4 x i64> %default, <4 x i64> %mask) { 2480; CHECK-LABEL: test_masked_i64_to_4_mem_mask3: 2481; CHECK: # %bb.0: 2482; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 2483; CHECK-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1} 2484; CHECK-NEXT: retq 2485 %s = load i64, i64* %p 2486 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 2487 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 2488 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 2489 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default 2490 ret <4 x i64> %res 2491} 2492 2493define <4 x i64> @test_masked_z_i64_to_4_mem_mask3(i64* %p, <4 x i64> %mask) { 2494; CHECK-LABEL: test_masked_z_i64_to_4_mem_mask3: 2495; CHECK: # %bb.0: 2496; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 2497; CHECK-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1} {z} 2498; CHECK-NEXT: retq 2499 %s = load i64, i64* %p 2500 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 2501 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 2502 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 2503 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 2504 ret <4 x i64> %res 2505} 2506define <8 x i64> @test_i64_to_8_mem(i64* %p) { 2507; CHECK-LABEL: test_i64_to_8_mem: 2508; CHECK: # %bb.0: 2509; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 2510; CHECK-NEXT: retq 2511 %s = load i64, i64* %p 2512 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 2513 %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2514 ret <8 x i64> %res 2515} 2516define <8 x i64> @test_masked_i64_to_8_mem_mask0(i64* %p, <8 x i64> %default, <8 x i64> %mask) { 2517; CHECK-LABEL: test_masked_i64_to_8_mem_mask0: 2518; CHECK: # %bb.0: 2519; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 2520; CHECK-NEXT: vpbroadcastq (%rdi), %zmm0 {%k1} 2521; CHECK-NEXT: retq 2522 %s = load i64, i64* %p 2523 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 2524 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2525 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2526 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default 2527 ret <8 x i64> %res 2528} 2529 2530define <8 x i64> @test_masked_z_i64_to_8_mem_mask0(i64* %p, <8 x i64> %mask) { 2531; CHECK-LABEL: test_masked_z_i64_to_8_mem_mask0: 2532; CHECK: # %bb.0: 2533; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 2534; CHECK-NEXT: vpbroadcastq (%rdi), %zmm0 {%k1} {z} 2535; CHECK-NEXT: retq 2536 %s = load i64, i64* %p 2537 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 2538 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2539 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2540 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2541 ret <8 x i64> %res 2542} 2543define <8 x i64> @test_masked_i64_to_8_mem_mask1(i64* %p, <8 x i64> %default, <8 x i64> %mask) { 2544; CHECK-LABEL: test_masked_i64_to_8_mem_mask1: 2545; CHECK: # %bb.0: 2546; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 2547; CHECK-NEXT: vpbroadcastq (%rdi), %zmm0 {%k1} 2548; CHECK-NEXT: retq 2549 %s = load i64, i64* %p 2550 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 2551 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2552 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2553 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default 2554 ret <8 x i64> %res 2555} 2556 2557define <8 x i64> @test_masked_z_i64_to_8_mem_mask1(i64* %p, <8 x i64> %mask) { 2558; CHECK-LABEL: test_masked_z_i64_to_8_mem_mask1: 2559; CHECK: # %bb.0: 2560; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 2561; CHECK-NEXT: vpbroadcastq (%rdi), %zmm0 {%k1} {z} 2562; CHECK-NEXT: retq 2563 %s = load i64, i64* %p 2564 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 2565 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2566 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2567 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2568 ret <8 x i64> %res 2569} 2570define <8 x i64> @test_masked_i64_to_8_mem_mask2(i64* %p, <8 x i64> %default, <8 x i64> %mask) { 2571; CHECK-LABEL: test_masked_i64_to_8_mem_mask2: 2572; CHECK: # %bb.0: 2573; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 2574; CHECK-NEXT: vpbroadcastq (%rdi), %zmm0 {%k1} 2575; CHECK-NEXT: retq 2576 %s = load i64, i64* %p 2577 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 2578 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2579 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2580 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default 2581 ret <8 x i64> %res 2582} 2583 2584define <8 x i64> @test_masked_z_i64_to_8_mem_mask2(i64* %p, <8 x i64> %mask) { 2585; CHECK-LABEL: test_masked_z_i64_to_8_mem_mask2: 2586; CHECK: # %bb.0: 2587; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 2588; CHECK-NEXT: vpbroadcastq (%rdi), %zmm0 {%k1} {z} 2589; CHECK-NEXT: retq 2590 %s = load i64, i64* %p 2591 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 2592 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2593 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2594 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2595 ret <8 x i64> %res 2596} 2597define <8 x i64> @test_masked_i64_to_8_mem_mask3(i64* %p, <8 x i64> %default, <8 x i64> %mask) { 2598; CHECK-LABEL: test_masked_i64_to_8_mem_mask3: 2599; CHECK: # %bb.0: 2600; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 2601; CHECK-NEXT: vpbroadcastq (%rdi), %zmm0 {%k1} 2602; CHECK-NEXT: retq 2603 %s = load i64, i64* %p 2604 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 2605 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2606 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2607 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default 2608 ret <8 x i64> %res 2609} 2610 2611define <8 x i64> @test_masked_z_i64_to_8_mem_mask3(i64* %p, <8 x i64> %mask) { 2612; CHECK-LABEL: test_masked_z_i64_to_8_mem_mask3: 2613; CHECK: # %bb.0: 2614; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 2615; CHECK-NEXT: vpbroadcastq (%rdi), %zmm0 {%k1} {z} 2616; CHECK-NEXT: retq 2617 %s = load i64, i64* %p 2618 %vec = insertelement <2 x i64> undef, i64 %s, i32 0 2619 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2620 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2621 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2622 ret <8 x i64> %res 2623} 2624