1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw %s -o - | FileCheck %s 3 4define <16 x i8> @test_16xi8_perm_mask0(<16 x i8> %vec) { 5; CHECK-LABEL: test_16xi8_perm_mask0: 6; CHECK: # %bb.0: 7; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] 8; CHECK-NEXT: retq 9 %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14> 10 ret <16 x i8> %res 11} 12define <16 x i8> @test_masked_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { 13; CHECK-LABEL: test_masked_16xi8_perm_mask0: 14; CHECK: # %bb.0: 15; CHECK-NEXT: vptestnmb %xmm2, %xmm2, %k1 16; CHECK-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] 17; CHECK-NEXT: vmovdqa %xmm1, %xmm0 18; CHECK-NEXT: retq 19 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14> 20 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 21 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 22 ret <16 x i8> %res 23} 24 25define <16 x i8> @test_masked_z_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %mask) { 26; CHECK-LABEL: test_masked_z_16xi8_perm_mask0: 27; CHECK: # %bb.0: 28; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 29; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] 30; CHECK-NEXT: retq 31 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14> 32 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 33 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 34 ret <16 x i8> %res 35} 36define <16 x i8> @test_masked_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { 37; CHECK-LABEL: test_masked_16xi8_perm_mask1: 38; CHECK: # %bb.0: 39; CHECK-NEXT: vptestnmb %xmm2, %xmm2, %k1 40; CHECK-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] 41; CHECK-NEXT: vmovdqa %xmm1, %xmm0 42; CHECK-NEXT: retq 43 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 4, i32 11, i32 14, i32 10, i32 7, i32 1, i32 6, i32 9, i32 14, i32 15, i32 7, i32 13, i32 4, i32 12, i32 8, i32 0> 44 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 45 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 46 ret <16 x i8> %res 47} 48 49define <16 x i8> @test_masked_z_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %mask) { 50; CHECK-LABEL: test_masked_z_16xi8_perm_mask1: 51; CHECK: # %bb.0: 52; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 53; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] 54; CHECK-NEXT: retq 55 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 4, i32 11, i32 14, i32 10, i32 7, i32 1, i32 6, i32 9, i32 14, i32 15, i32 7, i32 13, i32 4, i32 12, i32 8, i32 0> 56 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 57 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 58 ret <16 x i8> %res 59} 60define <16 x i8> @test_masked_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { 61; CHECK-LABEL: test_masked_16xi8_perm_mask2: 62; CHECK: # %bb.0: 63; CHECK-NEXT: vptestnmb %xmm2, %xmm2, %k1 64; CHECK-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] 65; CHECK-NEXT: vmovdqa %xmm1, %xmm0 66; CHECK-NEXT: retq 67 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 11, i32 6, i32 13, i32 10, i32 0, i32 7, i32 13, i32 3, i32 5, i32 13, i32 3, i32 9, i32 3, i32 15, i32 12, i32 7> 68 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 69 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 70 ret <16 x i8> %res 71} 72 73define <16 x i8> @test_masked_z_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %mask) { 74; CHECK-LABEL: test_masked_z_16xi8_perm_mask2: 75; CHECK: # %bb.0: 76; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 77; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] 78; CHECK-NEXT: retq 79 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 11, i32 6, i32 13, i32 10, i32 0, i32 7, i32 13, i32 3, i32 5, i32 13, i32 3, i32 9, i32 3, i32 15, i32 12, i32 7> 80 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 81 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 82 ret <16 x i8> %res 83} 84define <16 x i8> @test_16xi8_perm_mask3(<16 x i8> %vec) { 85; CHECK-LABEL: test_16xi8_perm_mask3: 86; CHECK: # %bb.0: 87; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] 88; CHECK-NEXT: retq 89 %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6> 90 ret <16 x i8> %res 91} 92define <16 x i8> @test_masked_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { 93; CHECK-LABEL: test_masked_16xi8_perm_mask3: 94; CHECK: # %bb.0: 95; CHECK-NEXT: vptestnmb %xmm2, %xmm2, %k1 96; CHECK-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] 97; CHECK-NEXT: vmovdqa %xmm1, %xmm0 98; CHECK-NEXT: retq 99 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6> 100 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 101 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 102 ret <16 x i8> %res 103} 104 105define <16 x i8> @test_masked_z_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %mask) { 106; CHECK-LABEL: test_masked_z_16xi8_perm_mask3: 107; CHECK: # %bb.0: 108; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 109; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] 110; CHECK-NEXT: retq 111 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6> 112 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 113 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 114 ret <16 x i8> %res 115} 116define <16 x i8> @test_16xi8_perm_mem_mask0(<16 x i8>* %vp) { 117; CHECK-LABEL: test_16xi8_perm_mem_mask0: 118; CHECK: # %bb.0: 119; CHECK-NEXT: vmovdqa (%rdi), %xmm0 120; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] 121; CHECK-NEXT: retq 122 %vec = load <16 x i8>, <16 x i8>* %vp 123 %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13> 124 ret <16 x i8> %res 125} 126define <16 x i8> @test_masked_16xi8_perm_mem_mask0(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) { 127; CHECK-LABEL: test_masked_16xi8_perm_mem_mask0: 128; CHECK: # %bb.0: 129; CHECK-NEXT: vmovdqa (%rdi), %xmm2 130; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 131; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] 132; CHECK-NEXT: retq 133 %vec = load <16 x i8>, <16 x i8>* %vp 134 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13> 135 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 136 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 137 ret <16 x i8> %res 138} 139 140define <16 x i8> @test_masked_z_16xi8_perm_mem_mask0(<16 x i8>* %vp, <16 x i8> %mask) { 141; CHECK-LABEL: test_masked_z_16xi8_perm_mem_mask0: 142; CHECK: # %bb.0: 143; CHECK-NEXT: vmovdqa (%rdi), %xmm1 144; CHECK-NEXT: vptestnmb %xmm0, %xmm0, %k1 145; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] 146; CHECK-NEXT: retq 147 %vec = load <16 x i8>, <16 x i8>* %vp 148 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13> 149 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 150 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 151 ret <16 x i8> %res 152} 153 154define <16 x i8> @test_masked_16xi8_perm_mem_mask1(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) { 155; CHECK-LABEL: test_masked_16xi8_perm_mem_mask1: 156; CHECK: # %bb.0: 157; CHECK-NEXT: vmovdqa (%rdi), %xmm2 158; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 159; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] 160; CHECK-NEXT: retq 161 %vec = load <16 x i8>, <16 x i8>* %vp 162 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 14, i32 9, i32 15, i32 9, i32 7, i32 10, i32 15, i32 14, i32 12, i32 1, i32 9, i32 7, i32 10, i32 13, i32 3, i32 11> 163 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 164 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 165 ret <16 x i8> %res 166} 167 168define <16 x i8> @test_masked_z_16xi8_perm_mem_mask1(<16 x i8>* %vp, <16 x i8> %mask) { 169; CHECK-LABEL: test_masked_z_16xi8_perm_mem_mask1: 170; CHECK: # %bb.0: 171; CHECK-NEXT: vmovdqa (%rdi), %xmm1 172; CHECK-NEXT: vptestnmb %xmm0, %xmm0, %k1 173; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] 174; CHECK-NEXT: retq 175 %vec = load <16 x i8>, <16 x i8>* %vp 176 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 14, i32 9, i32 15, i32 9, i32 7, i32 10, i32 15, i32 14, i32 12, i32 1, i32 9, i32 7, i32 10, i32 13, i32 3, i32 11> 177 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 178 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 179 ret <16 x i8> %res 180} 181 182define <16 x i8> @test_masked_16xi8_perm_mem_mask2(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) { 183; CHECK-LABEL: test_masked_16xi8_perm_mem_mask2: 184; CHECK: # %bb.0: 185; CHECK-NEXT: vmovdqa (%rdi), %xmm2 186; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 187; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] 188; CHECK-NEXT: retq 189 %vec = load <16 x i8>, <16 x i8>* %vp 190 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 12, i32 5, i32 13, i32 1, i32 2, i32 11, i32 0, i32 9, i32 14, i32 8, i32 10, i32 0, i32 10, i32 9> 191 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 192 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 193 ret <16 x i8> %res 194} 195 196define <16 x i8> @test_masked_z_16xi8_perm_mem_mask2(<16 x i8>* %vp, <16 x i8> %mask) { 197; CHECK-LABEL: test_masked_z_16xi8_perm_mem_mask2: 198; CHECK: # %bb.0: 199; CHECK-NEXT: vmovdqa (%rdi), %xmm1 200; CHECK-NEXT: vptestnmb %xmm0, %xmm0, %k1 201; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] 202; CHECK-NEXT: retq 203 %vec = load <16 x i8>, <16 x i8>* %vp 204 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 12, i32 5, i32 13, i32 1, i32 2, i32 11, i32 0, i32 9, i32 14, i32 8, i32 10, i32 0, i32 10, i32 9> 205 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 206 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 207 ret <16 x i8> %res 208} 209 210define <16 x i8> @test_16xi8_perm_mem_mask3(<16 x i8>* %vp) { 211; CHECK-LABEL: test_16xi8_perm_mem_mask3: 212; CHECK: # %bb.0: 213; CHECK-NEXT: vmovdqa (%rdi), %xmm0 214; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] 215; CHECK-NEXT: retq 216 %vec = load <16 x i8>, <16 x i8>* %vp 217 %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4> 218 ret <16 x i8> %res 219} 220define <16 x i8> @test_masked_16xi8_perm_mem_mask3(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) { 221; CHECK-LABEL: test_masked_16xi8_perm_mem_mask3: 222; CHECK: # %bb.0: 223; CHECK-NEXT: vmovdqa (%rdi), %xmm2 224; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 225; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] 226; CHECK-NEXT: retq 227 %vec = load <16 x i8>, <16 x i8>* %vp 228 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4> 229 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 230 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 231 ret <16 x i8> %res 232} 233 234define <16 x i8> @test_masked_z_16xi8_perm_mem_mask3(<16 x i8>* %vp, <16 x i8> %mask) { 235; CHECK-LABEL: test_masked_z_16xi8_perm_mem_mask3: 236; CHECK: # %bb.0: 237; CHECK-NEXT: vmovdqa (%rdi), %xmm1 238; CHECK-NEXT: vptestnmb %xmm0, %xmm0, %k1 239; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] 240; CHECK-NEXT: retq 241 %vec = load <16 x i8>, <16 x i8>* %vp 242 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4> 243 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 244 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 245 ret <16 x i8> %res 246} 247 248define <32 x i8> @test_32xi8_perm_mask0(<32 x i8> %vec) { 249; CHECK-LABEL: test_32xi8_perm_mask0: 250; CHECK: # %bb.0: 251; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] 252; CHECK-NEXT: retq 253 %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 8, i32 0, i32 1, i32 15, i32 3, i32 5, i32 11, i32 13, i32 14, i32 2, i32 10, i32 15, i32 0, i32 10, i32 13, i32 5, i32 20, i32 25, i32 23, i32 18, i32 23, i32 22, i32 25, i32 24, i32 20, i32 21, i32 29, i32 20, i32 24, i32 16, i32 27, i32 21> 254 ret <32 x i8> %res 255} 256define <32 x i8> @test_masked_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { 257; CHECK-LABEL: test_masked_32xi8_perm_mask0: 258; CHECK: # %bb.0: 259; CHECK-NEXT: vptestnmb %ymm2, %ymm2, %k1 260; CHECK-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] 261; CHECK-NEXT: vmovdqa %ymm1, %ymm0 262; CHECK-NEXT: retq 263 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 8, i32 0, i32 1, i32 15, i32 3, i32 5, i32 11, i32 13, i32 14, i32 2, i32 10, i32 15, i32 0, i32 10, i32 13, i32 5, i32 20, i32 25, i32 23, i32 18, i32 23, i32 22, i32 25, i32 24, i32 20, i32 21, i32 29, i32 20, i32 24, i32 16, i32 27, i32 21> 264 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 265 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 266 ret <32 x i8> %res 267} 268 269define <32 x i8> @test_masked_z_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %mask) { 270; CHECK-LABEL: test_masked_z_32xi8_perm_mask0: 271; CHECK: # %bb.0: 272; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 273; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] 274; CHECK-NEXT: retq 275 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 8, i32 0, i32 1, i32 15, i32 3, i32 5, i32 11, i32 13, i32 14, i32 2, i32 10, i32 15, i32 0, i32 10, i32 13, i32 5, i32 20, i32 25, i32 23, i32 18, i32 23, i32 22, i32 25, i32 24, i32 20, i32 21, i32 29, i32 20, i32 24, i32 16, i32 27, i32 21> 276 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 277 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 278 ret <32 x i8> %res 279} 280define <32 x i8> @test_masked_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { 281; CHECK-LABEL: test_masked_32xi8_perm_mask1: 282; CHECK: # %bb.0: 283; CHECK-NEXT: vptestnmb %ymm2, %ymm2, %k1 284; CHECK-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] 285; CHECK-NEXT: vmovdqa %ymm1, %ymm0 286; CHECK-NEXT: retq 287 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 0, i32 4, i32 3, i32 15, i32 5, i32 4, i32 5, i32 15, i32 10, i32 9, i32 11, i32 6, i32 6, i32 10, i32 0, i32 3, i32 21, i32 19, i32 26, i32 22, i32 30, i32 25, i32 22, i32 22, i32 27, i32 22, i32 26, i32 16, i32 23, i32 20, i32 18, i32 24> 288 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 289 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 290 ret <32 x i8> %res 291} 292 293define <32 x i8> @test_masked_z_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %mask) { 294; CHECK-LABEL: test_masked_z_32xi8_perm_mask1: 295; CHECK: # %bb.0: 296; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 297; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] 298; CHECK-NEXT: retq 299 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 0, i32 4, i32 3, i32 15, i32 5, i32 4, i32 5, i32 15, i32 10, i32 9, i32 11, i32 6, i32 6, i32 10, i32 0, i32 3, i32 21, i32 19, i32 26, i32 22, i32 30, i32 25, i32 22, i32 22, i32 27, i32 22, i32 26, i32 16, i32 23, i32 20, i32 18, i32 24> 300 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 301 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 302 ret <32 x i8> %res 303} 304define <32 x i8> @test_masked_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { 305; CHECK-LABEL: test_masked_32xi8_perm_mask2: 306; CHECK: # %bb.0: 307; CHECK-NEXT: vptestnmb %ymm2, %ymm2, %k1 308; CHECK-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] 309; CHECK-NEXT: vmovdqa %ymm1, %ymm0 310; CHECK-NEXT: retq 311 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 7, i32 8, i32 12, i32 14, i32 7, i32 4, i32 7, i32 12, i32 14, i32 12, i32 3, i32 15, i32 10, i32 1, i32 11, i32 15, i32 22, i32 26, i32 21, i32 19, i32 27, i32 16, i32 29, i32 24, i32 17, i32 17, i32 26, i32 29, i32 20, i32 31, i32 17, i32 29> 312 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 313 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 314 ret <32 x i8> %res 315} 316 317define <32 x i8> @test_masked_z_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %mask) { 318; CHECK-LABEL: test_masked_z_32xi8_perm_mask2: 319; CHECK: # %bb.0: 320; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 321; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] 322; CHECK-NEXT: retq 323 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 7, i32 8, i32 12, i32 14, i32 7, i32 4, i32 7, i32 12, i32 14, i32 12, i32 3, i32 15, i32 10, i32 1, i32 11, i32 15, i32 22, i32 26, i32 21, i32 19, i32 27, i32 16, i32 29, i32 24, i32 17, i32 17, i32 26, i32 29, i32 20, i32 31, i32 17, i32 29> 324 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 325 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 326 ret <32 x i8> %res 327} 328define <32 x i8> @test_32xi8_perm_mask3(<32 x i8> %vec) { 329; CHECK-LABEL: test_32xi8_perm_mask3: 330; CHECK: # %bb.0: 331; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] 332; CHECK-NEXT: retq 333 %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 6, i32 1, i32 4, i32 7, i32 12, i32 13, i32 2, i32 8, i32 10, i32 5, i32 13, i32 4, i32 0, i32 0, i32 10, i32 8, i32 31, i32 31, i32 30, i32 16, i32 27, i32 27, i32 26, i32 27, i32 30, i32 26, i32 21, i32 24, i32 19, i32 25, i32 16, i32 18> 334 ret <32 x i8> %res 335} 336define <32 x i8> @test_masked_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { 337; CHECK-LABEL: test_masked_32xi8_perm_mask3: 338; CHECK: # %bb.0: 339; CHECK-NEXT: vptestnmb %ymm2, %ymm2, %k1 340; CHECK-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] 341; CHECK-NEXT: vmovdqa %ymm1, %ymm0 342; CHECK-NEXT: retq 343 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 6, i32 1, i32 4, i32 7, i32 12, i32 13, i32 2, i32 8, i32 10, i32 5, i32 13, i32 4, i32 0, i32 0, i32 10, i32 8, i32 31, i32 31, i32 30, i32 16, i32 27, i32 27, i32 26, i32 27, i32 30, i32 26, i32 21, i32 24, i32 19, i32 25, i32 16, i32 18> 344 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 345 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 346 ret <32 x i8> %res 347} 348 349define <32 x i8> @test_masked_z_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %mask) { 350; CHECK-LABEL: test_masked_z_32xi8_perm_mask3: 351; CHECK: # %bb.0: 352; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 353; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] 354; CHECK-NEXT: retq 355 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 6, i32 1, i32 4, i32 7, i32 12, i32 13, i32 2, i32 8, i32 10, i32 5, i32 13, i32 4, i32 0, i32 0, i32 10, i32 8, i32 31, i32 31, i32 30, i32 16, i32 27, i32 27, i32 26, i32 27, i32 30, i32 26, i32 21, i32 24, i32 19, i32 25, i32 16, i32 18> 356 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 357 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 358 ret <32 x i8> %res 359} 360define <32 x i8> @test_32xi8_perm_mem_mask0(<32 x i8>* %vp) { 361; CHECK-LABEL: test_32xi8_perm_mem_mask0: 362; CHECK: # %bb.0: 363; CHECK-NEXT: vmovdqa (%rdi), %ymm0 364; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] 365; CHECK-NEXT: retq 366 %vec = load <32 x i8>, <32 x i8>* %vp 367 %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 9, i32 0, i32 2, i32 15, i32 4, i32 6, i32 8, i32 4, i32 7, i32 3, i32 0, i32 2, i32 8, i32 1, i32 6, i32 5, i32 22, i32 17, i32 30, i32 23, i32 29, i32 31, i32 21, i32 23, i32 27, i32 22, i32 20, i32 27, i32 30, i32 30, i32 26, i32 22> 368 ret <32 x i8> %res 369} 370define <32 x i8> @test_masked_32xi8_perm_mem_mask0(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) { 371; CHECK-LABEL: test_masked_32xi8_perm_mem_mask0: 372; CHECK: # %bb.0: 373; CHECK-NEXT: vmovdqa (%rdi), %ymm2 374; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 375; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] 376; CHECK-NEXT: retq 377 %vec = load <32 x i8>, <32 x i8>* %vp 378 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 9, i32 0, i32 2, i32 15, i32 4, i32 6, i32 8, i32 4, i32 7, i32 3, i32 0, i32 2, i32 8, i32 1, i32 6, i32 5, i32 22, i32 17, i32 30, i32 23, i32 29, i32 31, i32 21, i32 23, i32 27, i32 22, i32 20, i32 27, i32 30, i32 30, i32 26, i32 22> 379 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 380 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 381 ret <32 x i8> %res 382} 383 384define <32 x i8> @test_masked_z_32xi8_perm_mem_mask0(<32 x i8>* %vp, <32 x i8> %mask) { 385; CHECK-LABEL: test_masked_z_32xi8_perm_mem_mask0: 386; CHECK: # %bb.0: 387; CHECK-NEXT: vmovdqa (%rdi), %ymm1 388; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1 389; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] 390; CHECK-NEXT: retq 391 %vec = load <32 x i8>, <32 x i8>* %vp 392 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 9, i32 0, i32 2, i32 15, i32 4, i32 6, i32 8, i32 4, i32 7, i32 3, i32 0, i32 2, i32 8, i32 1, i32 6, i32 5, i32 22, i32 17, i32 30, i32 23, i32 29, i32 31, i32 21, i32 23, i32 27, i32 22, i32 20, i32 27, i32 30, i32 30, i32 26, i32 22> 393 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 394 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 395 ret <32 x i8> %res 396} 397 398define <32 x i8> @test_masked_32xi8_perm_mem_mask1(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) { 399; CHECK-LABEL: test_masked_32xi8_perm_mem_mask1: 400; CHECK: # %bb.0: 401; CHECK-NEXT: vmovdqa (%rdi), %ymm2 402; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 403; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] 404; CHECK-NEXT: retq 405 %vec = load <32 x i8>, <32 x i8>* %vp 406 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 15, i32 10, i32 1, i32 1, i32 11, i32 0, i32 0, i32 6, i32 8, i32 7, i32 7, i32 9, i32 10, i32 6, i32 5, i32 15, i32 20, i32 28, i32 22, i32 21, i32 17, i32 29, i32 27, i32 30, i32 23, i32 26, i32 17, i32 22, i32 19, i32 16, i32 31, i32 19> 407 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 408 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 409 ret <32 x i8> %res 410} 411 412define <32 x i8> @test_masked_z_32xi8_perm_mem_mask1(<32 x i8>* %vp, <32 x i8> %mask) { 413; CHECK-LABEL: test_masked_z_32xi8_perm_mem_mask1: 414; CHECK: # %bb.0: 415; CHECK-NEXT: vmovdqa (%rdi), %ymm1 416; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1 417; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] 418; CHECK-NEXT: retq 419 %vec = load <32 x i8>, <32 x i8>* %vp 420 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 15, i32 10, i32 1, i32 1, i32 11, i32 0, i32 0, i32 6, i32 8, i32 7, i32 7, i32 9, i32 10, i32 6, i32 5, i32 15, i32 20, i32 28, i32 22, i32 21, i32 17, i32 29, i32 27, i32 30, i32 23, i32 26, i32 17, i32 22, i32 19, i32 16, i32 31, i32 19> 421 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 422 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 423 ret <32 x i8> %res 424} 425 426define <32 x i8> @test_masked_32xi8_perm_mem_mask2(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) { 427; CHECK-LABEL: test_masked_32xi8_perm_mem_mask2: 428; CHECK: # %bb.0: 429; CHECK-NEXT: vmovdqa (%rdi), %ymm2 430; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 431; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] 432; CHECK-NEXT: retq 433 %vec = load <32 x i8>, <32 x i8>* %vp 434 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 6, i32 8, i32 2, i32 15, i32 15, i32 2, i32 6, i32 10, i32 14, i32 7, i32 14, i32 5, i32 7, i32 7, i32 26, i32 19, i32 25, i32 19, i32 21, i32 31, i32 30, i32 29, i32 16, i32 18, i32 20, i32 28, i32 29, i32 25, i32 27, i32 28> 435 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 436 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 437 ret <32 x i8> %res 438} 439 440define <32 x i8> @test_masked_z_32xi8_perm_mem_mask2(<32 x i8>* %vp, <32 x i8> %mask) { 441; CHECK-LABEL: test_masked_z_32xi8_perm_mem_mask2: 442; CHECK: # %bb.0: 443; CHECK-NEXT: vmovdqa (%rdi), %ymm1 444; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1 445; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] 446; CHECK-NEXT: retq 447 %vec = load <32 x i8>, <32 x i8>* %vp 448 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 6, i32 8, i32 2, i32 15, i32 15, i32 2, i32 6, i32 10, i32 14, i32 7, i32 14, i32 5, i32 7, i32 7, i32 26, i32 19, i32 25, i32 19, i32 21, i32 31, i32 30, i32 29, i32 16, i32 18, i32 20, i32 28, i32 29, i32 25, i32 27, i32 28> 449 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 450 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 451 ret <32 x i8> %res 452} 453 454define <32 x i8> @test_32xi8_perm_mem_mask3(<32 x i8>* %vp) { 455; CHECK-LABEL: test_32xi8_perm_mem_mask3: 456; CHECK: # %bb.0: 457; CHECK-NEXT: vmovdqa (%rdi), %ymm0 458; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] 459; CHECK-NEXT: retq 460 %vec = load <32 x i8>, <32 x i8>* %vp 461 %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 13, i32 0, i32 3, i32 0, i32 0, i32 13, i32 5, i32 2, i32 2, i32 10, i32 15, i32 8, i32 14, i32 8, i32 25, i32 26, i32 28, i32 28, i32 31, i32 27, i32 30, i32 19, i32 24, i32 25, i32 29, i32 23, i32 28, i32 22, i32 25, i32 29> 462 ret <32 x i8> %res 463} 464define <32 x i8> @test_masked_32xi8_perm_mem_mask3(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) { 465; CHECK-LABEL: test_masked_32xi8_perm_mem_mask3: 466; CHECK: # %bb.0: 467; CHECK-NEXT: vmovdqa (%rdi), %ymm2 468; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 469; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] 470; CHECK-NEXT: retq 471 %vec = load <32 x i8>, <32 x i8>* %vp 472 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 13, i32 0, i32 3, i32 0, i32 0, i32 13, i32 5, i32 2, i32 2, i32 10, i32 15, i32 8, i32 14, i32 8, i32 25, i32 26, i32 28, i32 28, i32 31, i32 27, i32 30, i32 19, i32 24, i32 25, i32 29, i32 23, i32 28, i32 22, i32 25, i32 29> 473 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 474 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 475 ret <32 x i8> %res 476} 477 478define <32 x i8> @test_masked_z_32xi8_perm_mem_mask3(<32 x i8>* %vp, <32 x i8> %mask) { 479; CHECK-LABEL: test_masked_z_32xi8_perm_mem_mask3: 480; CHECK: # %bb.0: 481; CHECK-NEXT: vmovdqa (%rdi), %ymm1 482; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1 483; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] 484; CHECK-NEXT: retq 485 %vec = load <32 x i8>, <32 x i8>* %vp 486 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 13, i32 0, i32 3, i32 0, i32 0, i32 13, i32 5, i32 2, i32 2, i32 10, i32 15, i32 8, i32 14, i32 8, i32 25, i32 26, i32 28, i32 28, i32 31, i32 27, i32 30, i32 19, i32 24, i32 25, i32 29, i32 23, i32 28, i32 22, i32 25, i32 29> 487 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 488 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 489 ret <32 x i8> %res 490} 491 492define <64 x i8> @test_64xi8_perm_mask0(<64 x i8> %vec) { 493; CHECK-LABEL: test_64xi8_perm_mask0: 494; CHECK: # %bb.0: 495; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] 496; CHECK-NEXT: retq 497 %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 8, i32 4, i32 1, i32 13, i32 15, i32 4, i32 6, i32 12, i32 0, i32 10, i32 2, i32 4, i32 13, i32 0, i32 0, i32 6, i32 23, i32 29, i32 27, i32 26, i32 18, i32 31, i32 22, i32 25, i32 22, i32 16, i32 23, i32 18, i32 16, i32 25, i32 26, i32 17, i32 40, i32 37, i32 38, i32 44, i32 39, i32 46, i32 41, i32 39, i32 42, i32 37, i32 33, i32 42, i32 41, i32 44, i32 34, i32 46, i32 60, i32 62, i32 61, i32 58, i32 60, i32 56, i32 60, i32 51, i32 60, i32 55, i32 60, i32 55, i32 60, i32 49, i32 48, i32 62> 498 ret <64 x i8> %res 499} 500define <64 x i8> @test_masked_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { 501; CHECK-LABEL: test_masked_64xi8_perm_mask0: 502; CHECK: # %bb.0: 503; CHECK-NEXT: vptestnmb %zmm2, %zmm2, %k1 504; CHECK-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] 505; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 506; CHECK-NEXT: retq 507 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 8, i32 4, i32 1, i32 13, i32 15, i32 4, i32 6, i32 12, i32 0, i32 10, i32 2, i32 4, i32 13, i32 0, i32 0, i32 6, i32 23, i32 29, i32 27, i32 26, i32 18, i32 31, i32 22, i32 25, i32 22, i32 16, i32 23, i32 18, i32 16, i32 25, i32 26, i32 17, i32 40, i32 37, i32 38, i32 44, i32 39, i32 46, i32 41, i32 39, i32 42, i32 37, i32 33, i32 42, i32 41, i32 44, i32 34, i32 46, i32 60, i32 62, i32 61, i32 58, i32 60, i32 56, i32 60, i32 51, i32 60, i32 55, i32 60, i32 55, i32 60, i32 49, i32 48, i32 62> 508 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 509 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 510 ret <64 x i8> %res 511} 512 513define <64 x i8> @test_masked_z_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %mask) { 514; CHECK-LABEL: test_masked_z_64xi8_perm_mask0: 515; CHECK: # %bb.0: 516; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 517; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] 518; CHECK-NEXT: retq 519 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 8, i32 4, i32 1, i32 13, i32 15, i32 4, i32 6, i32 12, i32 0, i32 10, i32 2, i32 4, i32 13, i32 0, i32 0, i32 6, i32 23, i32 29, i32 27, i32 26, i32 18, i32 31, i32 22, i32 25, i32 22, i32 16, i32 23, i32 18, i32 16, i32 25, i32 26, i32 17, i32 40, i32 37, i32 38, i32 44, i32 39, i32 46, i32 41, i32 39, i32 42, i32 37, i32 33, i32 42, i32 41, i32 44, i32 34, i32 46, i32 60, i32 62, i32 61, i32 58, i32 60, i32 56, i32 60, i32 51, i32 60, i32 55, i32 60, i32 55, i32 60, i32 49, i32 48, i32 62> 520 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 521 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 522 ret <64 x i8> %res 523} 524define <64 x i8> @test_masked_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { 525; CHECK-LABEL: test_masked_64xi8_perm_mask1: 526; CHECK: # %bb.0: 527; CHECK-NEXT: vptestnmb %zmm2, %zmm2, %k1 528; CHECK-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] 529; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 530; CHECK-NEXT: retq 531 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 7, i32 14, i32 15, i32 10, i32 9, i32 3, i32 1, i32 13, i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 30, i32 30, i32 22, i32 17, i32 28, i32 27, i32 16, i32 23, i32 26, i32 16, i32 30, i32 31, i32 27, i32 17, i32 17, i32 21, i32 32, i32 37, i32 32, i32 47, i32 45, i32 33, i32 46, i32 35, i32 35, i32 42, i32 47, i32 33, i32 32, i32 37, i32 32, i32 41, i32 61, i32 50, i32 49, i32 53, i32 63, i32 50, i32 63, i32 53, i32 55, i32 52, i32 62, i32 63, i32 58, i32 50, i32 63, i32 49> 532 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 533 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 534 ret <64 x i8> %res 535} 536 537define <64 x i8> @test_masked_z_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %mask) { 538; CHECK-LABEL: test_masked_z_64xi8_perm_mask1: 539; CHECK: # %bb.0: 540; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 541; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] 542; CHECK-NEXT: retq 543 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 7, i32 14, i32 15, i32 10, i32 9, i32 3, i32 1, i32 13, i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 30, i32 30, i32 22, i32 17, i32 28, i32 27, i32 16, i32 23, i32 26, i32 16, i32 30, i32 31, i32 27, i32 17, i32 17, i32 21, i32 32, i32 37, i32 32, i32 47, i32 45, i32 33, i32 46, i32 35, i32 35, i32 42, i32 47, i32 33, i32 32, i32 37, i32 32, i32 41, i32 61, i32 50, i32 49, i32 53, i32 63, i32 50, i32 63, i32 53, i32 55, i32 52, i32 62, i32 63, i32 58, i32 50, i32 63, i32 49> 544 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 545 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 546 ret <64 x i8> %res 547} 548define <64 x i8> @test_masked_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { 549; CHECK-LABEL: test_masked_64xi8_perm_mask2: 550; CHECK: # %bb.0: 551; CHECK-NEXT: vptestnmb %zmm2, %zmm2, %k1 552; CHECK-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] 553; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 554; CHECK-NEXT: retq 555 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12, i32 4, i32 6, i32 0, i32 2, i32 0, i32 1, i32 1, i32 6, i32 24, i32 27, i32 18, i32 22, i32 26, i32 17, i32 23, i32 21, i32 31, i32 16, i32 22, i32 22, i32 27, i32 21, i32 19, i32 20, i32 39, i32 47, i32 44, i32 36, i32 40, i32 43, i32 44, i32 39, i32 38, i32 44, i32 38, i32 35, i32 39, i32 46, i32 34, i32 39, i32 58, i32 55, i32 51, i32 48, i32 59, i32 57, i32 48, i32 52, i32 60, i32 58, i32 56, i32 50, i32 59, i32 55, i32 58, i32 60> 556 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 557 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 558 ret <64 x i8> %res 559} 560 561define <64 x i8> @test_masked_z_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %mask) { 562; CHECK-LABEL: test_masked_z_64xi8_perm_mask2: 563; CHECK: # %bb.0: 564; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 565; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] 566; CHECK-NEXT: retq 567 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12, i32 4, i32 6, i32 0, i32 2, i32 0, i32 1, i32 1, i32 6, i32 24, i32 27, i32 18, i32 22, i32 26, i32 17, i32 23, i32 21, i32 31, i32 16, i32 22, i32 22, i32 27, i32 21, i32 19, i32 20, i32 39, i32 47, i32 44, i32 36, i32 40, i32 43, i32 44, i32 39, i32 38, i32 44, i32 38, i32 35, i32 39, i32 46, i32 34, i32 39, i32 58, i32 55, i32 51, i32 48, i32 59, i32 57, i32 48, i32 52, i32 60, i32 58, i32 56, i32 50, i32 59, i32 55, i32 58, i32 60> 568 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 569 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 570 ret <64 x i8> %res 571} 572define <64 x i8> @test_64xi8_perm_mask3(<64 x i8> %vec) { 573; CHECK-LABEL: test_64xi8_perm_mask3: 574; CHECK: # %bb.0: 575; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] 576; CHECK-NEXT: retq 577 %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1, i32 4, i32 4, i32 12, i32 14, i32 25, i32 16, i32 28, i32 20, i32 21, i32 24, i32 19, i32 30, i32 18, i32 22, i32 20, i32 24, i32 25, i32 26, i32 24, i32 22, i32 42, i32 38, i32 44, i32 44, i32 36, i32 37, i32 42, i32 34, i32 43, i32 38, i32 41, i32 34, i32 42, i32 37, i32 39, i32 38, i32 55, i32 59, i32 53, i32 58, i32 48, i32 52, i32 59, i32 48, i32 57, i32 48, i32 55, i32 62, i32 48, i32 56, i32 49, i32 61> 578 ret <64 x i8> %res 579} 580define <64 x i8> @test_masked_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { 581; CHECK-LABEL: test_masked_64xi8_perm_mask3: 582; CHECK: # %bb.0: 583; CHECK-NEXT: vptestnmb %zmm2, %zmm2, %k1 584; CHECK-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] 585; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 586; CHECK-NEXT: retq 587 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1, i32 4, i32 4, i32 12, i32 14, i32 25, i32 16, i32 28, i32 20, i32 21, i32 24, i32 19, i32 30, i32 18, i32 22, i32 20, i32 24, i32 25, i32 26, i32 24, i32 22, i32 42, i32 38, i32 44, i32 44, i32 36, i32 37, i32 42, i32 34, i32 43, i32 38, i32 41, i32 34, i32 42, i32 37, i32 39, i32 38, i32 55, i32 59, i32 53, i32 58, i32 48, i32 52, i32 59, i32 48, i32 57, i32 48, i32 55, i32 62, i32 48, i32 56, i32 49, i32 61> 588 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 589 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 590 ret <64 x i8> %res 591} 592 593define <64 x i8> @test_masked_z_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %mask) { 594; CHECK-LABEL: test_masked_z_64xi8_perm_mask3: 595; CHECK: # %bb.0: 596; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 597; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] 598; CHECK-NEXT: retq 599 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1, i32 4, i32 4, i32 12, i32 14, i32 25, i32 16, i32 28, i32 20, i32 21, i32 24, i32 19, i32 30, i32 18, i32 22, i32 20, i32 24, i32 25, i32 26, i32 24, i32 22, i32 42, i32 38, i32 44, i32 44, i32 36, i32 37, i32 42, i32 34, i32 43, i32 38, i32 41, i32 34, i32 42, i32 37, i32 39, i32 38, i32 55, i32 59, i32 53, i32 58, i32 48, i32 52, i32 59, i32 48, i32 57, i32 48, i32 55, i32 62, i32 48, i32 56, i32 49, i32 61> 600 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 601 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 602 ret <64 x i8> %res 603} 604define <64 x i8> @test_64xi8_perm_mem_mask0(<64 x i8>* %vp) { 605; CHECK-LABEL: test_64xi8_perm_mem_mask0: 606; CHECK: # %bb.0: 607; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 608; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] 609; CHECK-NEXT: retq 610 %vec = load <64 x i8>, <64 x i8>* %vp 611 %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 0, i32 9, i32 15, i32 13, i32 11, i32 11, i32 3, i32 12, i32 4, i32 1, i32 7, i32 5, i32 2, i32 6, i32 14, i32 6, i32 23, i32 27, i32 24, i32 18, i32 30, i32 23, i32 28, i32 22, i32 28, i32 22, i32 19, i32 19, i32 31, i32 25, i32 16, i32 22, i32 35, i32 33, i32 34, i32 32, i32 42, i32 34, i32 41, i32 41, i32 43, i32 40, i32 36, i32 46, i32 37, i32 39, i32 42, i32 40, i32 63, i32 63, i32 62, i32 62, i32 57, i32 55, i32 59, i32 51, i32 52, i32 48, i32 50, i32 48, i32 58, i32 50, i32 60, i32 58> 612 ret <64 x i8> %res 613} 614define <64 x i8> @test_masked_64xi8_perm_mem_mask0(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) { 615; CHECK-LABEL: test_masked_64xi8_perm_mem_mask0: 616; CHECK: # %bb.0: 617; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2 618; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 619; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] 620; CHECK-NEXT: retq 621 %vec = load <64 x i8>, <64 x i8>* %vp 622 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 0, i32 9, i32 15, i32 13, i32 11, i32 11, i32 3, i32 12, i32 4, i32 1, i32 7, i32 5, i32 2, i32 6, i32 14, i32 6, i32 23, i32 27, i32 24, i32 18, i32 30, i32 23, i32 28, i32 22, i32 28, i32 22, i32 19, i32 19, i32 31, i32 25, i32 16, i32 22, i32 35, i32 33, i32 34, i32 32, i32 42, i32 34, i32 41, i32 41, i32 43, i32 40, i32 36, i32 46, i32 37, i32 39, i32 42, i32 40, i32 63, i32 63, i32 62, i32 62, i32 57, i32 55, i32 59, i32 51, i32 52, i32 48, i32 50, i32 48, i32 58, i32 50, i32 60, i32 58> 623 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 624 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 625 ret <64 x i8> %res 626} 627 628define <64 x i8> @test_masked_z_64xi8_perm_mem_mask0(<64 x i8>* %vp, <64 x i8> %mask) { 629; CHECK-LABEL: test_masked_z_64xi8_perm_mem_mask0: 630; CHECK: # %bb.0: 631; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1 632; CHECK-NEXT: vptestnmb %zmm0, %zmm0, %k1 633; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] 634; CHECK-NEXT: retq 635 %vec = load <64 x i8>, <64 x i8>* %vp 636 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 0, i32 9, i32 15, i32 13, i32 11, i32 11, i32 3, i32 12, i32 4, i32 1, i32 7, i32 5, i32 2, i32 6, i32 14, i32 6, i32 23, i32 27, i32 24, i32 18, i32 30, i32 23, i32 28, i32 22, i32 28, i32 22, i32 19, i32 19, i32 31, i32 25, i32 16, i32 22, i32 35, i32 33, i32 34, i32 32, i32 42, i32 34, i32 41, i32 41, i32 43, i32 40, i32 36, i32 46, i32 37, i32 39, i32 42, i32 40, i32 63, i32 63, i32 62, i32 62, i32 57, i32 55, i32 59, i32 51, i32 52, i32 48, i32 50, i32 48, i32 58, i32 50, i32 60, i32 58> 637 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 638 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 639 ret <64 x i8> %res 640} 641 642define <64 x i8> @test_masked_64xi8_perm_mem_mask1(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) { 643; CHECK-LABEL: test_masked_64xi8_perm_mem_mask1: 644; CHECK: # %bb.0: 645; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2 646; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 647; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] 648; CHECK-NEXT: retq 649 %vec = load <64 x i8>, <64 x i8>* %vp 650 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 15, i32 6, i32 14, i32 7, i32 5, i32 1, i32 14, i32 12, i32 5, i32 7, i32 5, i32 0, i32 0, i32 5, i32 3, i32 8, i32 19, i32 19, i32 26, i32 27, i32 20, i32 29, i32 20, i32 21, i32 27, i32 16, i32 30, i32 17, i32 23, i32 27, i32 16, i32 28, i32 47, i32 39, i32 33, i32 33, i32 33, i32 44, i32 38, i32 46, i32 39, i32 33, i32 38, i32 44, i32 45, i32 32, i32 34, i32 39, i32 50, i32 61, i32 62, i32 53, i32 54, i32 56, i32 52, i32 56, i32 51, i32 52, i32 55, i32 57, i32 56, i32 52, i32 51, i32 49> 651 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 652 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 653 ret <64 x i8> %res 654} 655 656define <64 x i8> @test_masked_z_64xi8_perm_mem_mask1(<64 x i8>* %vp, <64 x i8> %mask) { 657; CHECK-LABEL: test_masked_z_64xi8_perm_mem_mask1: 658; CHECK: # %bb.0: 659; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1 660; CHECK-NEXT: vptestnmb %zmm0, %zmm0, %k1 661; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] 662; CHECK-NEXT: retq 663 %vec = load <64 x i8>, <64 x i8>* %vp 664 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 15, i32 6, i32 14, i32 7, i32 5, i32 1, i32 14, i32 12, i32 5, i32 7, i32 5, i32 0, i32 0, i32 5, i32 3, i32 8, i32 19, i32 19, i32 26, i32 27, i32 20, i32 29, i32 20, i32 21, i32 27, i32 16, i32 30, i32 17, i32 23, i32 27, i32 16, i32 28, i32 47, i32 39, i32 33, i32 33, i32 33, i32 44, i32 38, i32 46, i32 39, i32 33, i32 38, i32 44, i32 45, i32 32, i32 34, i32 39, i32 50, i32 61, i32 62, i32 53, i32 54, i32 56, i32 52, i32 56, i32 51, i32 52, i32 55, i32 57, i32 56, i32 52, i32 51, i32 49> 665 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 666 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 667 ret <64 x i8> %res 668} 669 670define <64 x i8> @test_masked_64xi8_perm_mem_mask2(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) { 671; CHECK-LABEL: test_masked_64xi8_perm_mem_mask2: 672; CHECK: # %bb.0: 673; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2 674; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 675; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] 676; CHECK-NEXT: retq 677 %vec = load <64 x i8>, <64 x i8>* %vp 678 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 12, i32 1, i32 11, i32 3, i32 4, i32 11, i32 10, i32 11, i32 8, i32 13, i32 1, i32 10, i32 1, i32 11, i32 5, i32 10, i32 27, i32 26, i32 19, i32 29, i32 19, i32 24, i32 26, i32 19, i32 26, i32 20, i32 18, i32 28, i32 24, i32 21, i32 25, i32 16, i32 34, i32 38, i32 47, i32 40, i32 33, i32 44, i32 44, i32 44, i32 41, i32 43, i32 35, i32 43, i32 45, i32 44, i32 37, i32 41, i32 58, i32 62, i32 49, i32 61, i32 56, i32 53, i32 55, i32 48, i32 51, i32 58, i32 58, i32 55, i32 63, i32 55, i32 53, i32 61> 679 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 680 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 681 ret <64 x i8> %res 682} 683 684define <64 x i8> @test_masked_z_64xi8_perm_mem_mask2(<64 x i8>* %vp, <64 x i8> %mask) { 685; CHECK-LABEL: test_masked_z_64xi8_perm_mem_mask2: 686; CHECK: # %bb.0: 687; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1 688; CHECK-NEXT: vptestnmb %zmm0, %zmm0, %k1 689; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] 690; CHECK-NEXT: retq 691 %vec = load <64 x i8>, <64 x i8>* %vp 692 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 12, i32 1, i32 11, i32 3, i32 4, i32 11, i32 10, i32 11, i32 8, i32 13, i32 1, i32 10, i32 1, i32 11, i32 5, i32 10, i32 27, i32 26, i32 19, i32 29, i32 19, i32 24, i32 26, i32 19, i32 26, i32 20, i32 18, i32 28, i32 24, i32 21, i32 25, i32 16, i32 34, i32 38, i32 47, i32 40, i32 33, i32 44, i32 44, i32 44, i32 41, i32 43, i32 35, i32 43, i32 45, i32 44, i32 37, i32 41, i32 58, i32 62, i32 49, i32 61, i32 56, i32 53, i32 55, i32 48, i32 51, i32 58, i32 58, i32 55, i32 63, i32 55, i32 53, i32 61> 693 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 694 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 695 ret <64 x i8> %res 696} 697 698define <64 x i8> @test_64xi8_perm_mem_mask3(<64 x i8>* %vp) { 699; CHECK-LABEL: test_64xi8_perm_mem_mask3: 700; CHECK: # %bb.0: 701; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 702; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] 703; CHECK-NEXT: retq 704 %vec = load <64 x i8>, <64 x i8>* %vp 705 %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7, i32 11, i32 10, i32 4, i32 10, i32 20, i32 21, i32 24, i32 27, i32 18, i32 16, i32 26, i32 16, i32 16, i32 19, i32 26, i32 17, i32 16, i32 31, i32 22, i32 30, i32 35, i32 38, i32 37, i32 34, i32 37, i32 47, i32 43, i32 38, i32 38, i32 36, i32 40, i32 43, i32 42, i32 39, i32 32, i32 46, i32 54, i32 54, i32 48, i32 50, i32 61, i32 56, i32 59, i32 50, i32 53, i32 61, i32 61, i32 51, i32 48, i32 60, i32 50, i32 60> 706 ret <64 x i8> %res 707} 708define <64 x i8> @test_masked_64xi8_perm_mem_mask3(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) { 709; CHECK-LABEL: test_masked_64xi8_perm_mem_mask3: 710; CHECK: # %bb.0: 711; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2 712; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 713; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] 714; CHECK-NEXT: retq 715 %vec = load <64 x i8>, <64 x i8>* %vp 716 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7, i32 11, i32 10, i32 4, i32 10, i32 20, i32 21, i32 24, i32 27, i32 18, i32 16, i32 26, i32 16, i32 16, i32 19, i32 26, i32 17, i32 16, i32 31, i32 22, i32 30, i32 35, i32 38, i32 37, i32 34, i32 37, i32 47, i32 43, i32 38, i32 38, i32 36, i32 40, i32 43, i32 42, i32 39, i32 32, i32 46, i32 54, i32 54, i32 48, i32 50, i32 61, i32 56, i32 59, i32 50, i32 53, i32 61, i32 61, i32 51, i32 48, i32 60, i32 50, i32 60> 717 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 718 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 719 ret <64 x i8> %res 720} 721 722define <64 x i8> @test_masked_z_64xi8_perm_mem_mask3(<64 x i8>* %vp, <64 x i8> %mask) { 723; CHECK-LABEL: test_masked_z_64xi8_perm_mem_mask3: 724; CHECK: # %bb.0: 725; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1 726; CHECK-NEXT: vptestnmb %zmm0, %zmm0, %k1 727; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] 728; CHECK-NEXT: retq 729 %vec = load <64 x i8>, <64 x i8>* %vp 730 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7, i32 11, i32 10, i32 4, i32 10, i32 20, i32 21, i32 24, i32 27, i32 18, i32 16, i32 26, i32 16, i32 16, i32 19, i32 26, i32 17, i32 16, i32 31, i32 22, i32 30, i32 35, i32 38, i32 37, i32 34, i32 37, i32 47, i32 43, i32 38, i32 38, i32 36, i32 40, i32 43, i32 42, i32 39, i32 32, i32 46, i32 54, i32 54, i32 48, i32 50, i32 61, i32 56, i32 59, i32 50, i32 53, i32 61, i32 61, i32 51, i32 48, i32 60, i32 50, i32 60> 731 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 732 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 733 ret <64 x i8> %res 734} 735 736define <8 x i16> @test_8xi16_perm_high_mask0(<8 x i16> %vec) { 737; CHECK-LABEL: test_8xi16_perm_high_mask0: 738; CHECK: # %bb.0: 739; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,7,6] 740; CHECK-NEXT: retq 741 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 7, i32 6> 742 ret <8 x i16> %res 743} 744define <8 x i16> @test_masked_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 745; CHECK-LABEL: test_masked_8xi16_perm_high_mask0: 746; CHECK: # %bb.0: 747; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1 748; CHECK-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] 749; CHECK-NEXT: vmovdqa %xmm1, %xmm0 750; CHECK-NEXT: retq 751 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 7, i32 6> 752 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 753 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 754 ret <8 x i16> %res 755} 756 757define <8 x i16> @test_masked_z_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %mask) { 758; CHECK-LABEL: test_masked_z_8xi16_perm_high_mask0: 759; CHECK: # %bb.0: 760; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 761; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6] 762; CHECK-NEXT: retq 763 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 7, i32 6> 764 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 765 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 766 ret <8 x i16> %res 767} 768define <8 x i16> @test_masked_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 769; CHECK-LABEL: test_masked_8xi16_perm_low_mask1: 770; CHECK: # %bb.0: 771; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1 772; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] 773; CHECK-NEXT: vmovdqa %xmm1, %xmm0 774; CHECK-NEXT: retq 775 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7> 776 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 777 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 778 ret <8 x i16> %res 779} 780 781define <8 x i16> @test_masked_z_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %mask) { 782; CHECK-LABEL: test_masked_z_8xi16_perm_low_mask1: 783; CHECK: # %bb.0: 784; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 785; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7] 786; CHECK-NEXT: retq 787 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7> 788 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 789 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 790 ret <8 x i16> %res 791} 792define <8 x i16> @test_masked_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 793; CHECK-LABEL: test_masked_8xi16_perm_high_mask2: 794; CHECK: # %bb.0: 795; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1 796; CHECK-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] 797; CHECK-NEXT: vmovdqa %xmm1, %xmm0 798; CHECK-NEXT: retq 799 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 4, i32 5> 800 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 801 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 802 ret <8 x i16> %res 803} 804 805define <8 x i16> @test_masked_z_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %mask) { 806; CHECK-LABEL: test_masked_z_8xi16_perm_high_mask2: 807; CHECK: # %bb.0: 808; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 809; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5] 810; CHECK-NEXT: retq 811 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 4, i32 5> 812 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 813 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 814 ret <8 x i16> %res 815} 816define <8 x i16> @test_8xi16_perm_low_mask3(<8 x i16> %vec) { 817; CHECK-LABEL: test_8xi16_perm_low_mask3: 818; CHECK: # %bb.0: 819; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,1,1,1,4,5,6,7] 820; CHECK-NEXT: retq 821 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7> 822 ret <8 x i16> %res 823} 824define <8 x i16> @test_masked_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 825; CHECK-LABEL: test_masked_8xi16_perm_low_mask3: 826; CHECK: # %bb.0: 827; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1 828; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] 829; CHECK-NEXT: vmovdqa %xmm1, %xmm0 830; CHECK-NEXT: retq 831 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7> 832 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 833 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 834 ret <8 x i16> %res 835} 836 837define <8 x i16> @test_masked_z_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %mask) { 838; CHECK-LABEL: test_masked_z_8xi16_perm_low_mask3: 839; CHECK: # %bb.0: 840; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 841; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7] 842; CHECK-NEXT: retq 843 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7> 844 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 845 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 846 ret <8 x i16> %res 847} 848define <8 x i16> @test_masked_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 849; CHECK-LABEL: test_masked_8xi16_perm_high_mask4: 850; CHECK: # %bb.0: 851; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1 852; CHECK-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] 853; CHECK-NEXT: vmovdqa %xmm1, %xmm0 854; CHECK-NEXT: retq 855 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 7, i32 6> 856 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 857 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 858 ret <8 x i16> %res 859} 860 861define <8 x i16> @test_masked_z_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %mask) { 862; CHECK-LABEL: test_masked_z_8xi16_perm_high_mask4: 863; CHECK: # %bb.0: 864; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 865; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6] 866; CHECK-NEXT: retq 867 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 7, i32 6> 868 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 869 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 870 ret <8 x i16> %res 871} 872define <8 x i16> @test_masked_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 873; CHECK-LABEL: test_masked_8xi16_perm_low_mask5: 874; CHECK: # %bb.0: 875; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1 876; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] 877; CHECK-NEXT: vmovdqa %xmm1, %xmm0 878; CHECK-NEXT: retq 879 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 2, i32 1, i32 4, i32 5, i32 6, i32 7> 880 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 881 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 882 ret <8 x i16> %res 883} 884 885define <8 x i16> @test_masked_z_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %mask) { 886; CHECK-LABEL: test_masked_z_8xi16_perm_low_mask5: 887; CHECK: # %bb.0: 888; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 889; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7] 890; CHECK-NEXT: retq 891 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 2, i32 1, i32 4, i32 5, i32 6, i32 7> 892 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 893 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 894 ret <8 x i16> %res 895} 896define <8 x i16> @test_8xi16_perm_high_mask6(<8 x i16> %vec) { 897; CHECK-LABEL: test_8xi16_perm_high_mask6: 898; CHECK: # %bb.0: 899; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,5] 900; CHECK-NEXT: retq 901 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 5> 902 ret <8 x i16> %res 903} 904define <8 x i16> @test_masked_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 905; CHECK-LABEL: test_masked_8xi16_perm_high_mask6: 906; CHECK: # %bb.0: 907; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1 908; CHECK-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] 909; CHECK-NEXT: vmovdqa %xmm1, %xmm0 910; CHECK-NEXT: retq 911 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 5> 912 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 913 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 914 ret <8 x i16> %res 915} 916 917define <8 x i16> @test_masked_z_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %mask) { 918; CHECK-LABEL: test_masked_z_8xi16_perm_high_mask6: 919; CHECK: # %bb.0: 920; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 921; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5] 922; CHECK-NEXT: retq 923 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 5> 924 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 925 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 926 ret <8 x i16> %res 927} 928define <8 x i16> @test_masked_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 929; CHECK-LABEL: test_masked_8xi16_perm_low_mask7: 930; CHECK: # %bb.0: 931; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1 932; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] 933; CHECK-NEXT: vmovdqa %xmm1, %xmm0 934; CHECK-NEXT: retq 935 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7> 936 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 937 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 938 ret <8 x i16> %res 939} 940 941define <8 x i16> @test_masked_z_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %mask) { 942; CHECK-LABEL: test_masked_z_8xi16_perm_low_mask7: 943; CHECK: # %bb.0: 944; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 945; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7] 946; CHECK-NEXT: retq 947 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7> 948 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 949 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 950 ret <8 x i16> %res 951} 952define <8 x i16> @test_8xi16_perm_high_mem_mask0(<8 x i16>* %vp) { 953; CHECK-LABEL: test_8xi16_perm_high_mem_mask0: 954; CHECK: # %bb.0: 955; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,7,4,6] 956; CHECK-NEXT: retq 957 %vec = load <8 x i16>, <8 x i16>* %vp 958 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 4, i32 6> 959 ret <8 x i16> %res 960} 961define <8 x i16> @test_masked_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 962; CHECK-LABEL: test_masked_8xi16_perm_high_mem_mask0: 963; CHECK: # %bb.0: 964; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 965; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] 966; CHECK-NEXT: retq 967 %vec = load <8 x i16>, <8 x i16>* %vp 968 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 4, i32 6> 969 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 970 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 971 ret <8 x i16> %res 972} 973 974define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i16> %mask) { 975; CHECK-LABEL: test_masked_z_8xi16_perm_high_mem_mask0: 976; CHECK: # %bb.0: 977; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 978; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] 979; CHECK-NEXT: retq 980 %vec = load <8 x i16>, <8 x i16>* %vp 981 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 4, i32 6> 982 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 983 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 984 ret <8 x i16> %res 985} 986 987define <8 x i16> @test_masked_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 988; CHECK-LABEL: test_masked_8xi16_perm_low_mem_mask1: 989; CHECK: # %bb.0: 990; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 991; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] 992; CHECK-NEXT: retq 993 %vec = load <8 x i16>, <8 x i16>* %vp 994 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 995 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 996 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 997 ret <8 x i16> %res 998} 999 1000define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i16> %mask) { 1001; CHECK-LABEL: test_masked_z_8xi16_perm_low_mem_mask1: 1002; CHECK: # %bb.0: 1003; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 1004; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] 1005; CHECK-NEXT: retq 1006 %vec = load <8 x i16>, <8 x i16>* %vp 1007 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 1008 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1009 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 1010 ret <8 x i16> %res 1011} 1012 1013define <8 x i16> @test_masked_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 1014; CHECK-LABEL: test_masked_8xi16_perm_high_mem_mask2: 1015; CHECK: # %bb.0: 1016; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 1017; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] 1018; CHECK-NEXT: retq 1019 %vec = load <8 x i16>, <8 x i16>* %vp 1020 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 6, i32 5, i32 7> 1021 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1022 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 1023 ret <8 x i16> %res 1024} 1025 1026define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i16> %mask) { 1027; CHECK-LABEL: test_masked_z_8xi16_perm_high_mem_mask2: 1028; CHECK: # %bb.0: 1029; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 1030; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] 1031; CHECK-NEXT: retq 1032 %vec = load <8 x i16>, <8 x i16>* %vp 1033 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 6, i32 5, i32 7> 1034 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1035 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 1036 ret <8 x i16> %res 1037} 1038 1039define <8 x i16> @test_8xi16_perm_low_mem_mask3(<8 x i16>* %vp) { 1040; CHECK-LABEL: test_8xi16_perm_low_mem_mask3: 1041; CHECK: # %bb.0: 1042; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = mem[3,1,2,0,4,5,6,7] 1043; CHECK-NEXT: retq 1044 %vec = load <8 x i16>, <8 x i16>* %vp 1045 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7> 1046 ret <8 x i16> %res 1047} 1048define <8 x i16> @test_masked_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 1049; CHECK-LABEL: test_masked_8xi16_perm_low_mem_mask3: 1050; CHECK: # %bb.0: 1051; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 1052; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] 1053; CHECK-NEXT: retq 1054 %vec = load <8 x i16>, <8 x i16>* %vp 1055 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7> 1056 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1057 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 1058 ret <8 x i16> %res 1059} 1060 1061define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i16> %mask) { 1062; CHECK-LABEL: test_masked_z_8xi16_perm_low_mem_mask3: 1063; CHECK: # %bb.0: 1064; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 1065; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] 1066; CHECK-NEXT: retq 1067 %vec = load <8 x i16>, <8 x i16>* %vp 1068 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7> 1069 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1070 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 1071 ret <8 x i16> %res 1072} 1073 1074define <8 x i16> @test_masked_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 1075; CHECK-LABEL: test_masked_8xi16_perm_high_mem_mask4: 1076; CHECK: # %bb.0: 1077; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 1078; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] 1079; CHECK-NEXT: retq 1080 %vec = load <8 x i16>, <8 x i16>* %vp 1081 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 7, i32 5> 1082 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1083 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 1084 ret <8 x i16> %res 1085} 1086 1087define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i16> %mask) { 1088; CHECK-LABEL: test_masked_z_8xi16_perm_high_mem_mask4: 1089; CHECK: # %bb.0: 1090; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 1091; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] 1092; CHECK-NEXT: retq 1093 %vec = load <8 x i16>, <8 x i16>* %vp 1094 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 7, i32 5> 1095 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1096 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 1097 ret <8 x i16> %res 1098} 1099 1100define <8 x i16> @test_masked_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 1101; CHECK-LABEL: test_masked_8xi16_perm_low_mem_mask5: 1102; CHECK: # %bb.0: 1103; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 1104; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] 1105; CHECK-NEXT: retq 1106 %vec = load <8 x i16>, <8 x i16>* %vp 1107 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 1108 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1109 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 1110 ret <8 x i16> %res 1111} 1112 1113define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i16> %mask) { 1114; CHECK-LABEL: test_masked_z_8xi16_perm_low_mem_mask5: 1115; CHECK: # %bb.0: 1116; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 1117; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] 1118; CHECK-NEXT: retq 1119 %vec = load <8 x i16>, <8 x i16>* %vp 1120 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 1121 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1122 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 1123 ret <8 x i16> %res 1124} 1125 1126define <8 x i16> @test_8xi16_perm_high_mem_mask6(<8 x i16>* %vp) { 1127; CHECK-LABEL: test_8xi16_perm_high_mem_mask6: 1128; CHECK: # %bb.0: 1129; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,4,4,4] 1130; CHECK-NEXT: retq 1131 %vec = load <8 x i16>, <8 x i16>* %vp 1132 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 4, i32 4> 1133 ret <8 x i16> %res 1134} 1135define <8 x i16> @test_masked_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 1136; CHECK-LABEL: test_masked_8xi16_perm_high_mem_mask6: 1137; CHECK: # %bb.0: 1138; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 1139; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] 1140; CHECK-NEXT: retq 1141 %vec = load <8 x i16>, <8 x i16>* %vp 1142 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 4, i32 4> 1143 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1144 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 1145 ret <8 x i16> %res 1146} 1147 1148define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i16> %mask) { 1149; CHECK-LABEL: test_masked_z_8xi16_perm_high_mem_mask6: 1150; CHECK: # %bb.0: 1151; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 1152; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] 1153; CHECK-NEXT: retq 1154 %vec = load <8 x i16>, <8 x i16>* %vp 1155 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 4, i32 4> 1156 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1157 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 1158 ret <8 x i16> %res 1159} 1160 1161define <8 x i16> @test_masked_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 1162; CHECK-LABEL: test_masked_8xi16_perm_low_mem_mask7: 1163; CHECK: # %bb.0: 1164; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 1165; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] 1166; CHECK-NEXT: retq 1167 %vec = load <8 x i16>, <8 x i16>* %vp 1168 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 3, i32 3, i32 1, i32 4, i32 5, i32 6, i32 7> 1169 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1170 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 1171 ret <8 x i16> %res 1172} 1173 1174define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i16> %mask) { 1175; CHECK-LABEL: test_masked_z_8xi16_perm_low_mem_mask7: 1176; CHECK: # %bb.0: 1177; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 1178; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] 1179; CHECK-NEXT: retq 1180 %vec = load <8 x i16>, <8 x i16>* %vp 1181 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 3, i32 3, i32 1, i32 4, i32 5, i32 6, i32 7> 1182 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1183 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 1184 ret <8 x i16> %res 1185} 1186 1187define <16 x i16> @test_16xi16_perm_high_mask0(<16 x i16> %vec) { 1188; CHECK-LABEL: test_16xi16_perm_high_mask0: 1189; CHECK: # %bb.0: 1190; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] 1191; CHECK-NEXT: retq 1192 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 12> 1193 ret <16 x i16> %res 1194} 1195define <16 x i16> @test_masked_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 1196; CHECK-LABEL: test_masked_16xi16_perm_high_mask0: 1197; CHECK: # %bb.0: 1198; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 1199; CHECK-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] 1200; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1201; CHECK-NEXT: retq 1202 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 12> 1203 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1204 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1205 ret <16 x i16> %res 1206} 1207 1208define <16 x i16> @test_masked_z_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i16> %mask) { 1209; CHECK-LABEL: test_masked_z_16xi16_perm_high_mask0: 1210; CHECK: # %bb.0: 1211; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1212; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] 1213; CHECK-NEXT: retq 1214 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 12> 1215 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1216 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1217 ret <16 x i16> %res 1218} 1219define <16 x i16> @test_masked_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 1220; CHECK-LABEL: test_masked_16xi16_perm_low_mask1: 1221; CHECK: # %bb.0: 1222; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 1223; CHECK-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] 1224; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1225; CHECK-NEXT: retq 1226 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 8, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 1227 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1228 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1229 ret <16 x i16> %res 1230} 1231 1232define <16 x i16> @test_masked_z_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i16> %mask) { 1233; CHECK-LABEL: test_masked_z_16xi16_perm_low_mask1: 1234; CHECK: # %bb.0: 1235; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1236; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] 1237; CHECK-NEXT: retq 1238 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 8, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 1239 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1240 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1241 ret <16 x i16> %res 1242} 1243define <16 x i16> @test_masked_16xi16_perm_high_mask2(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 1244; CHECK-LABEL: test_masked_16xi16_perm_high_mask2: 1245; CHECK: # %bb.0: 1246; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 1247; CHECK-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] 1248; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1249; CHECK-NEXT: retq 1250 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 5, i32 5, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 13, i32 13, i32 13> 1251 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1252 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1253 ret <16 x i16> %res 1254} 1255 1256define <16 x i16> @test_masked_z_16xi16_perm_high_mask2(<16 x i16> %vec, <16 x i16> %mask) { 1257; CHECK-LABEL: test_masked_z_16xi16_perm_high_mask2: 1258; CHECK: # %bb.0: 1259; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1260; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] 1261; CHECK-NEXT: retq 1262 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 5, i32 5, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 13, i32 13, i32 13> 1263 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1264 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1265 ret <16 x i16> %res 1266} 1267define <16 x i16> @test_16xi16_perm_low_mask3(<16 x i16> %vec) { 1268; CHECK-LABEL: test_16xi16_perm_low_mask3: 1269; CHECK: # %bb.0: 1270; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] 1271; CHECK-NEXT: retq 1272 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 1273 ret <16 x i16> %res 1274} 1275define <16 x i16> @test_masked_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 1276; CHECK-LABEL: test_masked_16xi16_perm_low_mask3: 1277; CHECK: # %bb.0: 1278; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 1279; CHECK-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] 1280; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1281; CHECK-NEXT: retq 1282 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 1283 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1284 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1285 ret <16 x i16> %res 1286} 1287 1288define <16 x i16> @test_masked_z_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i16> %mask) { 1289; CHECK-LABEL: test_masked_z_16xi16_perm_low_mask3: 1290; CHECK: # %bb.0: 1291; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1292; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] 1293; CHECK-NEXT: retq 1294 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 1295 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1296 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1297 ret <16 x i16> %res 1298} 1299define <16 x i16> @test_masked_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 1300; CHECK-LABEL: test_masked_16xi16_perm_high_mask4: 1301; CHECK: # %bb.0: 1302; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 1303; CHECK-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] 1304; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1305; CHECK-NEXT: retq 1306 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 12, i32 15> 1307 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1308 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1309 ret <16 x i16> %res 1310} 1311 1312define <16 x i16> @test_masked_z_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i16> %mask) { 1313; CHECK-LABEL: test_masked_z_16xi16_perm_high_mask4: 1314; CHECK: # %bb.0: 1315; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1316; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] 1317; CHECK-NEXT: retq 1318 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 12, i32 15> 1319 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1320 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1321 ret <16 x i16> %res 1322} 1323define <16 x i16> @test_masked_16xi16_perm_low_mask5(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 1324; CHECK-LABEL: test_masked_16xi16_perm_low_mask5: 1325; CHECK: # %bb.0: 1326; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 1327; CHECK-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] 1328; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1329; CHECK-NEXT: retq 1330 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 1331 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1332 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1333 ret <16 x i16> %res 1334} 1335 1336define <16 x i16> @test_masked_z_16xi16_perm_low_mask5(<16 x i16> %vec, <16 x i16> %mask) { 1337; CHECK-LABEL: test_masked_z_16xi16_perm_low_mask5: 1338; CHECK: # %bb.0: 1339; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1340; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] 1341; CHECK-NEXT: retq 1342 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 1343 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1344 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1345 ret <16 x i16> %res 1346} 1347define <16 x i16> @test_16xi16_perm_high_mask6(<16 x i16> %vec) { 1348; CHECK-LABEL: test_16xi16_perm_high_mask6: 1349; CHECK: # %bb.0: 1350; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] 1351; CHECK-NEXT: retq 1352 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 14, i32 13> 1353 ret <16 x i16> %res 1354} 1355define <16 x i16> @test_masked_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 1356; CHECK-LABEL: test_masked_16xi16_perm_high_mask6: 1357; CHECK: # %bb.0: 1358; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 1359; CHECK-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] 1360; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1361; CHECK-NEXT: retq 1362 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 14, i32 13> 1363 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1364 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1365 ret <16 x i16> %res 1366} 1367 1368define <16 x i16> @test_masked_z_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i16> %mask) { 1369; CHECK-LABEL: test_masked_z_16xi16_perm_high_mask6: 1370; CHECK: # %bb.0: 1371; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1372; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] 1373; CHECK-NEXT: retq 1374 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 14, i32 13> 1375 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1376 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1377 ret <16 x i16> %res 1378} 1379define <16 x i16> @test_masked_16xi16_perm_low_mask7(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 1380; CHECK-LABEL: test_masked_16xi16_perm_low_mask7: 1381; CHECK: # %bb.0: 1382; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 1383; CHECK-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] 1384; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1385; CHECK-NEXT: retq 1386 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 10, i32 12, i32 13, i32 14, i32 15> 1387 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1388 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1389 ret <16 x i16> %res 1390} 1391 1392define <16 x i16> @test_masked_z_16xi16_perm_low_mask7(<16 x i16> %vec, <16 x i16> %mask) { 1393; CHECK-LABEL: test_masked_z_16xi16_perm_low_mask7: 1394; CHECK: # %bb.0: 1395; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1396; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] 1397; CHECK-NEXT: retq 1398 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 10, i32 12, i32 13, i32 14, i32 15> 1399 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1400 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1401 ret <16 x i16> %res 1402} 1403define <16 x i16> @test_16xi16_perm_high_mem_mask0(<16 x i16>* %vp) { 1404; CHECK-LABEL: test_16xi16_perm_high_mem_mask0: 1405; CHECK: # %bb.0: 1406; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] 1407; CHECK-NEXT: retq 1408 %vec = load <16 x i16>, <16 x i16>* %vp 1409 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 12, i32 15> 1410 ret <16 x i16> %res 1411} 1412define <16 x i16> @test_masked_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 1413; CHECK-LABEL: test_masked_16xi16_perm_high_mem_mask0: 1414; CHECK: # %bb.0: 1415; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1416; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] 1417; CHECK-NEXT: retq 1418 %vec = load <16 x i16>, <16 x i16>* %vp 1419 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 12, i32 15> 1420 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1421 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1422 ret <16 x i16> %res 1423} 1424 1425define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 x i16> %mask) { 1426; CHECK-LABEL: test_masked_z_16xi16_perm_high_mem_mask0: 1427; CHECK: # %bb.0: 1428; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 1429; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] 1430; CHECK-NEXT: retq 1431 %vec = load <16 x i16>, <16 x i16>* %vp 1432 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 12, i32 15> 1433 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1434 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1435 ret <16 x i16> %res 1436} 1437 1438define <16 x i16> @test_masked_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 1439; CHECK-LABEL: test_masked_16xi16_perm_low_mem_mask1: 1440; CHECK: # %bb.0: 1441; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1442; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] 1443; CHECK-NEXT: retq 1444 %vec = load <16 x i16>, <16 x i16>* %vp 1445 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 1446 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1447 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1448 ret <16 x i16> %res 1449} 1450 1451define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 x i16> %mask) { 1452; CHECK-LABEL: test_masked_z_16xi16_perm_low_mem_mask1: 1453; CHECK: # %bb.0: 1454; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 1455; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] 1456; CHECK-NEXT: retq 1457 %vec = load <16 x i16>, <16 x i16>* %vp 1458 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 1459 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1460 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1461 ret <16 x i16> %res 1462} 1463 1464define <16 x i16> @test_masked_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 1465; CHECK-LABEL: test_masked_16xi16_perm_high_mem_mask2: 1466; CHECK: # %bb.0: 1467; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1468; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] 1469; CHECK-NEXT: retq 1470 %vec = load <16 x i16>, <16 x i16>* %vp 1471 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 13, i32 14> 1472 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1473 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1474 ret <16 x i16> %res 1475} 1476 1477define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 x i16> %mask) { 1478; CHECK-LABEL: test_masked_z_16xi16_perm_high_mem_mask2: 1479; CHECK: # %bb.0: 1480; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 1481; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] 1482; CHECK-NEXT: retq 1483 %vec = load <16 x i16>, <16 x i16>* %vp 1484 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 13, i32 14> 1485 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1486 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1487 ret <16 x i16> %res 1488} 1489 1490define <16 x i16> @test_16xi16_perm_low_mem_mask3(<16 x i16>* %vp) { 1491; CHECK-LABEL: test_16xi16_perm_low_mem_mask3: 1492; CHECK: # %bb.0: 1493; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] 1494; CHECK-NEXT: retq 1495 %vec = load <16 x i16>, <16 x i16>* %vp 1496 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 1497 ret <16 x i16> %res 1498} 1499define <16 x i16> @test_masked_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 1500; CHECK-LABEL: test_masked_16xi16_perm_low_mem_mask3: 1501; CHECK: # %bb.0: 1502; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1503; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] 1504; CHECK-NEXT: retq 1505 %vec = load <16 x i16>, <16 x i16>* %vp 1506 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 1507 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1508 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1509 ret <16 x i16> %res 1510} 1511 1512define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 x i16> %mask) { 1513; CHECK-LABEL: test_masked_z_16xi16_perm_low_mem_mask3: 1514; CHECK: # %bb.0: 1515; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 1516; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] 1517; CHECK-NEXT: retq 1518 %vec = load <16 x i16>, <16 x i16>* %vp 1519 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 1520 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1521 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1522 ret <16 x i16> %res 1523} 1524 1525define <16 x i16> @test_masked_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 1526; CHECK-LABEL: test_masked_16xi16_perm_high_mem_mask4: 1527; CHECK: # %bb.0: 1528; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1529; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] 1530; CHECK-NEXT: retq 1531 %vec = load <16 x i16>, <16 x i16>* %vp 1532 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 14, i32 15> 1533 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1534 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1535 ret <16 x i16> %res 1536} 1537 1538define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 x i16> %mask) { 1539; CHECK-LABEL: test_masked_z_16xi16_perm_high_mem_mask4: 1540; CHECK: # %bb.0: 1541; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 1542; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] 1543; CHECK-NEXT: retq 1544 %vec = load <16 x i16>, <16 x i16>* %vp 1545 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 14, i32 15> 1546 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1547 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1548 ret <16 x i16> %res 1549} 1550 1551define <16 x i16> @test_masked_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 1552; CHECK-LABEL: test_masked_16xi16_perm_low_mem_mask5: 1553; CHECK: # %bb.0: 1554; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1555; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] 1556; CHECK-NEXT: retq 1557 %vec = load <16 x i16>, <16 x i16>* %vp 1558 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 1559 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1560 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1561 ret <16 x i16> %res 1562} 1563 1564define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 x i16> %mask) { 1565; CHECK-LABEL: test_masked_z_16xi16_perm_low_mem_mask5: 1566; CHECK: # %bb.0: 1567; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 1568; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] 1569; CHECK-NEXT: retq 1570 %vec = load <16 x i16>, <16 x i16>* %vp 1571 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 1572 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1573 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1574 ret <16 x i16> %res 1575} 1576 1577define <16 x i16> @test_16xi16_perm_high_mem_mask6(<16 x i16>* %vp) { 1578; CHECK-LABEL: test_16xi16_perm_high_mem_mask6: 1579; CHECK: # %bb.0: 1580; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] 1581; CHECK-NEXT: retq 1582 %vec = load <16 x i16>, <16 x i16>* %vp 1583 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 5, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 12, i32 13> 1584 ret <16 x i16> %res 1585} 1586define <16 x i16> @test_masked_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 1587; CHECK-LABEL: test_masked_16xi16_perm_high_mem_mask6: 1588; CHECK: # %bb.0: 1589; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1590; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] 1591; CHECK-NEXT: retq 1592 %vec = load <16 x i16>, <16 x i16>* %vp 1593 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 5, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 12, i32 13> 1594 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1595 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1596 ret <16 x i16> %res 1597} 1598 1599define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 x i16> %mask) { 1600; CHECK-LABEL: test_masked_z_16xi16_perm_high_mem_mask6: 1601; CHECK: # %bb.0: 1602; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 1603; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] 1604; CHECK-NEXT: retq 1605 %vec = load <16 x i16>, <16 x i16>* %vp 1606 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 5, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 12, i32 13> 1607 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1608 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1609 ret <16 x i16> %res 1610} 1611 1612define <16 x i16> @test_masked_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 1613; CHECK-LABEL: test_masked_16xi16_perm_low_mem_mask7: 1614; CHECK: # %bb.0: 1615; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1616; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] 1617; CHECK-NEXT: retq 1618 %vec = load <16 x i16>, <16 x i16>* %vp 1619 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 1620 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1621 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1622 ret <16 x i16> %res 1623} 1624 1625define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16 x i16> %mask) { 1626; CHECK-LABEL: test_masked_z_16xi16_perm_low_mem_mask7: 1627; CHECK: # %bb.0: 1628; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 1629; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] 1630; CHECK-NEXT: retq 1631 %vec = load <16 x i16>, <16 x i16>* %vp 1632 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 1633 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1634 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1635 ret <16 x i16> %res 1636} 1637 1638define <32 x i16> @test_32xi16_perm_high_mask0(<32 x i16> %vec) { 1639; CHECK-LABEL: test_32xi16_perm_high_mask0: 1640; CHECK: # %bb.0: 1641; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] 1642; CHECK-NEXT: retq 1643 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 28> 1644 ret <32 x i16> %res 1645} 1646define <32 x i16> @test_masked_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 1647; CHECK-LABEL: test_masked_32xi16_perm_high_mask0: 1648; CHECK: # %bb.0: 1649; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 1650; CHECK-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] 1651; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1652; CHECK-NEXT: retq 1653 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 28> 1654 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1655 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1656 ret <32 x i16> %res 1657} 1658 1659define <32 x i16> @test_masked_z_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16> %mask) { 1660; CHECK-LABEL: test_masked_z_32xi16_perm_high_mask0: 1661; CHECK: # %bb.0: 1662; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1663; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] 1664; CHECK-NEXT: retq 1665 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 28> 1666 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1667 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1668 ret <32 x i16> %res 1669} 1670define <32 x i16> @test_masked_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 1671; CHECK-LABEL: test_masked_32xi16_perm_low_mask1: 1672; CHECK: # %bb.0: 1673; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 1674; CHECK-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] 1675; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1676; CHECK-NEXT: retq 1677 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 10, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 18, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 26, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31> 1678 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1679 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1680 ret <32 x i16> %res 1681} 1682 1683define <32 x i16> @test_masked_z_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> %mask) { 1684; CHECK-LABEL: test_masked_z_32xi16_perm_low_mask1: 1685; CHECK: # %bb.0: 1686; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1687; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] 1688; CHECK-NEXT: retq 1689 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 10, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 18, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 26, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31> 1690 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1691 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1692 ret <32 x i16> %res 1693} 1694define <32 x i16> @test_masked_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 1695; CHECK-LABEL: test_masked_32xi16_perm_high_mask2: 1696; CHECK: # %bb.0: 1697; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 1698; CHECK-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] 1699; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1700; CHECK-NEXT: retq 1701 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 14, i32 12, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 22, i32 20, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 30, i32 28, i32 31> 1702 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1703 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1704 ret <32 x i16> %res 1705} 1706 1707define <32 x i16> @test_masked_z_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16> %mask) { 1708; CHECK-LABEL: test_masked_z_32xi16_perm_high_mask2: 1709; CHECK: # %bb.0: 1710; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1711; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] 1712; CHECK-NEXT: retq 1713 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 14, i32 12, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 22, i32 20, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 30, i32 28, i32 31> 1714 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1715 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1716 ret <32 x i16> %res 1717} 1718define <32 x i16> @test_32xi16_perm_low_mask3(<32 x i16> %vec) { 1719; CHECK-LABEL: test_32xi16_perm_low_mask3: 1720; CHECK: # %bb.0: 1721; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] 1722; CHECK-NEXT: retq 1723 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 9, i32 11, i32 12, i32 13, i32 14, i32 15, i32 19, i32 19, i32 17, i32 19, i32 20, i32 21, i32 22, i32 23, i32 27, i32 27, i32 25, i32 27, i32 28, i32 29, i32 30, i32 31> 1724 ret <32 x i16> %res 1725} 1726define <32 x i16> @test_masked_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 1727; CHECK-LABEL: test_masked_32xi16_perm_low_mask3: 1728; CHECK: # %bb.0: 1729; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 1730; CHECK-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] 1731; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1732; CHECK-NEXT: retq 1733 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 9, i32 11, i32 12, i32 13, i32 14, i32 15, i32 19, i32 19, i32 17, i32 19, i32 20, i32 21, i32 22, i32 23, i32 27, i32 27, i32 25, i32 27, i32 28, i32 29, i32 30, i32 31> 1734 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1735 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1736 ret <32 x i16> %res 1737} 1738 1739define <32 x i16> @test_masked_z_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> %mask) { 1740; CHECK-LABEL: test_masked_z_32xi16_perm_low_mask3: 1741; CHECK: # %bb.0: 1742; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1743; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] 1744; CHECK-NEXT: retq 1745 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 9, i32 11, i32 12, i32 13, i32 14, i32 15, i32 19, i32 19, i32 17, i32 19, i32 20, i32 21, i32 22, i32 23, i32 27, i32 27, i32 25, i32 27, i32 28, i32 29, i32 30, i32 31> 1746 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1747 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1748 ret <32 x i16> %res 1749} 1750define <32 x i16> @test_masked_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 1751; CHECK-LABEL: test_masked_32xi16_perm_high_mask4: 1752; CHECK: # %bb.0: 1753; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 1754; CHECK-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] 1755; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1756; CHECK-NEXT: retq 1757 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 23, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 31, i32 29, i32 30> 1758 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1759 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1760 ret <32 x i16> %res 1761} 1762 1763define <32 x i16> @test_masked_z_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16> %mask) { 1764; CHECK-LABEL: test_masked_z_32xi16_perm_high_mask4: 1765; CHECK: # %bb.0: 1766; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1767; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] 1768; CHECK-NEXT: retq 1769 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 23, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 31, i32 29, i32 30> 1770 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1771 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1772 ret <32 x i16> %res 1773} 1774define <32 x i16> @test_masked_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 1775; CHECK-LABEL: test_masked_32xi16_perm_low_mask5: 1776; CHECK: # %bb.0: 1777; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 1778; CHECK-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] 1779; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1780; CHECK-NEXT: retq 1781 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 1, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 10, i32 9, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15, i32 18, i32 17, i32 17, i32 16, i32 20, i32 21, i32 22, i32 23, i32 26, i32 25, i32 25, i32 24, i32 28, i32 29, i32 30, i32 31> 1782 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1783 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1784 ret <32 x i16> %res 1785} 1786 1787define <32 x i16> @test_masked_z_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> %mask) { 1788; CHECK-LABEL: test_masked_z_32xi16_perm_low_mask5: 1789; CHECK: # %bb.0: 1790; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1791; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] 1792; CHECK-NEXT: retq 1793 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 1, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 10, i32 9, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15, i32 18, i32 17, i32 17, i32 16, i32 20, i32 21, i32 22, i32 23, i32 26, i32 25, i32 25, i32 24, i32 28, i32 29, i32 30, i32 31> 1794 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1795 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1796 ret <32 x i16> %res 1797} 1798define <32 x i16> @test_32xi16_perm_high_mask6(<32 x i16> %vec) { 1799; CHECK-LABEL: test_32xi16_perm_high_mask6: 1800; CHECK: # %bb.0: 1801; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] 1802; CHECK-NEXT: retq 1803 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 20, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 28, i32 28, i32 29, i32 30> 1804 ret <32 x i16> %res 1805} 1806define <32 x i16> @test_masked_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 1807; CHECK-LABEL: test_masked_32xi16_perm_high_mask6: 1808; CHECK: # %bb.0: 1809; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 1810; CHECK-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] 1811; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1812; CHECK-NEXT: retq 1813 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 20, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 28, i32 28, i32 29, i32 30> 1814 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1815 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1816 ret <32 x i16> %res 1817} 1818 1819define <32 x i16> @test_masked_z_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16> %mask) { 1820; CHECK-LABEL: test_masked_z_32xi16_perm_high_mask6: 1821; CHECK: # %bb.0: 1822; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1823; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] 1824; CHECK-NEXT: retq 1825 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 20, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 28, i32 28, i32 29, i32 30> 1826 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1827 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1828 ret <32 x i16> %res 1829} 1830define <32 x i16> @test_masked_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 1831; CHECK-LABEL: test_masked_32xi16_perm_low_mask7: 1832; CHECK: # %bb.0: 1833; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 1834; CHECK-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] 1835; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1836; CHECK-NEXT: retq 1837 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 0, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15, i32 19, i32 16, i32 19, i32 16, i32 20, i32 21, i32 22, i32 23, i32 27, i32 24, i32 27, i32 24, i32 28, i32 29, i32 30, i32 31> 1838 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1839 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1840 ret <32 x i16> %res 1841} 1842 1843define <32 x i16> @test_masked_z_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> %mask) { 1844; CHECK-LABEL: test_masked_z_32xi16_perm_low_mask7: 1845; CHECK: # %bb.0: 1846; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1847; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] 1848; CHECK-NEXT: retq 1849 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 0, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15, i32 19, i32 16, i32 19, i32 16, i32 20, i32 21, i32 22, i32 23, i32 27, i32 24, i32 27, i32 24, i32 28, i32 29, i32 30, i32 31> 1850 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1851 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1852 ret <32 x i16> %res 1853} 1854define <32 x i16> @test_32xi16_perm_high_mem_mask0(<32 x i16>* %vp) { 1855; CHECK-LABEL: test_32xi16_perm_high_mem_mask0: 1856; CHECK: # %bb.0: 1857; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] 1858; CHECK-NEXT: retq 1859 %vec = load <32 x i16>, <32 x i16>* %vp 1860 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 29, i32 30> 1861 ret <32 x i16> %res 1862} 1863define <32 x i16> @test_masked_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 1864; CHECK-LABEL: test_masked_32xi16_perm_high_mem_mask0: 1865; CHECK: # %bb.0: 1866; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1867; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] 1868; CHECK-NEXT: retq 1869 %vec = load <32 x i16>, <32 x i16>* %vp 1870 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 29, i32 30> 1871 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1872 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1873 ret <32 x i16> %res 1874} 1875 1876define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 x i16> %mask) { 1877; CHECK-LABEL: test_masked_z_32xi16_perm_high_mem_mask0: 1878; CHECK: # %bb.0: 1879; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 1880; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] 1881; CHECK-NEXT: retq 1882 %vec = load <32 x i16>, <32 x i16>* %vp 1883 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 29, i32 30> 1884 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1885 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1886 ret <32 x i16> %res 1887} 1888 1889define <32 x i16> @test_masked_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 1890; CHECK-LABEL: test_masked_32xi16_perm_low_mem_mask1: 1891; CHECK: # %bb.0: 1892; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1893; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] 1894; CHECK-NEXT: retq 1895 %vec = load <32 x i16>, <32 x i16>* %vp 1896 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 11, i32 11, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 19, i32 19, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 27, i32 27, i32 28, i32 29, i32 30, i32 31> 1897 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1898 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1899 ret <32 x i16> %res 1900} 1901 1902define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 x i16> %mask) { 1903; CHECK-LABEL: test_masked_z_32xi16_perm_low_mem_mask1: 1904; CHECK: # %bb.0: 1905; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 1906; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] 1907; CHECK-NEXT: retq 1908 %vec = load <32 x i16>, <32 x i16>* %vp 1909 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 11, i32 11, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 19, i32 19, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 27, i32 27, i32 28, i32 29, i32 30, i32 31> 1910 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1911 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1912 ret <32 x i16> %res 1913} 1914 1915define <32 x i16> @test_masked_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 1916; CHECK-LABEL: test_masked_32xi16_perm_high_mem_mask2: 1917; CHECK: # %bb.0: 1918; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1919; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] 1920; CHECK-NEXT: retq 1921 %vec = load <32 x i16>, <32 x i16>* %vp 1922 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 7, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 15, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 23, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 31, i32 30, i32 28> 1923 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1924 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1925 ret <32 x i16> %res 1926} 1927 1928define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 x i16> %mask) { 1929; CHECK-LABEL: test_masked_z_32xi16_perm_high_mem_mask2: 1930; CHECK: # %bb.0: 1931; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 1932; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] 1933; CHECK-NEXT: retq 1934 %vec = load <32 x i16>, <32 x i16>* %vp 1935 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 7, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 15, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 23, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 31, i32 30, i32 28> 1936 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1937 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1938 ret <32 x i16> %res 1939} 1940 1941define <32 x i16> @test_32xi16_perm_low_mem_mask3(<32 x i16>* %vp) { 1942; CHECK-LABEL: test_32xi16_perm_low_mem_mask3: 1943; CHECK: # %bb.0: 1944; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] 1945; CHECK-NEXT: retq 1946 %vec = load <32 x i16>, <32 x i16>* %vp 1947 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7, i32 10, i32 10, i32 8, i32 11, i32 12, i32 13, i32 14, i32 15, i32 18, i32 18, i32 16, i32 19, i32 20, i32 21, i32 22, i32 23, i32 26, i32 26, i32 24, i32 27, i32 28, i32 29, i32 30, i32 31> 1948 ret <32 x i16> %res 1949} 1950define <32 x i16> @test_masked_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 1951; CHECK-LABEL: test_masked_32xi16_perm_low_mem_mask3: 1952; CHECK: # %bb.0: 1953; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1954; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] 1955; CHECK-NEXT: retq 1956 %vec = load <32 x i16>, <32 x i16>* %vp 1957 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7, i32 10, i32 10, i32 8, i32 11, i32 12, i32 13, i32 14, i32 15, i32 18, i32 18, i32 16, i32 19, i32 20, i32 21, i32 22, i32 23, i32 26, i32 26, i32 24, i32 27, i32 28, i32 29, i32 30, i32 31> 1958 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1959 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1960 ret <32 x i16> %res 1961} 1962 1963define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 x i16> %mask) { 1964; CHECK-LABEL: test_masked_z_32xi16_perm_low_mem_mask3: 1965; CHECK: # %bb.0: 1966; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 1967; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] 1968; CHECK-NEXT: retq 1969 %vec = load <32 x i16>, <32 x i16>* %vp 1970 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7, i32 10, i32 10, i32 8, i32 11, i32 12, i32 13, i32 14, i32 15, i32 18, i32 18, i32 16, i32 19, i32 20, i32 21, i32 22, i32 23, i32 26, i32 26, i32 24, i32 27, i32 28, i32 29, i32 30, i32 31> 1971 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1972 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1973 ret <32 x i16> %res 1974} 1975 1976define <32 x i16> @test_masked_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 1977; CHECK-LABEL: test_masked_32xi16_perm_high_mem_mask4: 1978; CHECK: # %bb.0: 1979; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1980; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] 1981; CHECK-NEXT: retq 1982 %vec = load <32 x i16>, <32 x i16>* %vp 1983 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 14, i32 13, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 22, i32 21, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 30, i32 29> 1984 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1985 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1986 ret <32 x i16> %res 1987} 1988 1989define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 x i16> %mask) { 1990; CHECK-LABEL: test_masked_z_32xi16_perm_high_mem_mask4: 1991; CHECK: # %bb.0: 1992; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 1993; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] 1994; CHECK-NEXT: retq 1995 %vec = load <32 x i16>, <32 x i16>* %vp 1996 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 14, i32 13, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 22, i32 21, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 30, i32 29> 1997 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1998 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1999 ret <32 x i16> %res 2000} 2001 2002define <32 x i16> @test_masked_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 2003; CHECK-LABEL: test_masked_32xi16_perm_low_mem_mask5: 2004; CHECK: # %bb.0: 2005; CHECK-NEXT: vpshufd {{.*#+}} zmm2 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] 2006; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 2007; CHECK-NEXT: vmovdqu16 %zmm2, %zmm0 {%k1} 2008; CHECK-NEXT: retq 2009 %vec = load <32 x i16>, <32 x i16>* %vp 2010 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 9, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 16, i32 17, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 24, i32 25, i32 28, i32 29, i32 30, i32 31> 2011 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 2012 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 2013 ret <32 x i16> %res 2014} 2015 2016define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 x i16> %mask) { 2017; CHECK-LABEL: test_masked_z_32xi16_perm_low_mem_mask5: 2018; CHECK: # %bb.0: 2019; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] 2020; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 2021; CHECK-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} {z} 2022; CHECK-NEXT: retq 2023 %vec = load <32 x i16>, <32 x i16>* %vp 2024 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 9, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 16, i32 17, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 24, i32 25, i32 28, i32 29, i32 30, i32 31> 2025 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 2026 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 2027 ret <32 x i16> %res 2028} 2029 2030define <32 x i16> @test_32xi16_perm_high_mem_mask6(<32 x i16>* %vp) { 2031; CHECK-LABEL: test_32xi16_perm_high_mem_mask6: 2032; CHECK: # %bb.0: 2033; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] 2034; CHECK-NEXT: retq 2035 %vec = load <32 x i16>, <32 x i16>* %vp 2036 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 6, i32 8, i32 9, i32 10, i32 11, i32 14, i32 13, i32 14, i32 14, i32 16, i32 17, i32 18, i32 19, i32 22, i32 21, i32 22, i32 22, i32 24, i32 25, i32 26, i32 27, i32 30, i32 29, i32 30, i32 30> 2037 ret <32 x i16> %res 2038} 2039define <32 x i16> @test_masked_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 2040; CHECK-LABEL: test_masked_32xi16_perm_high_mem_mask6: 2041; CHECK: # %bb.0: 2042; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 2043; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] 2044; CHECK-NEXT: retq 2045 %vec = load <32 x i16>, <32 x i16>* %vp 2046 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 6, i32 8, i32 9, i32 10, i32 11, i32 14, i32 13, i32 14, i32 14, i32 16, i32 17, i32 18, i32 19, i32 22, i32 21, i32 22, i32 22, i32 24, i32 25, i32 26, i32 27, i32 30, i32 29, i32 30, i32 30> 2047 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 2048 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 2049 ret <32 x i16> %res 2050} 2051 2052define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 x i16> %mask) { 2053; CHECK-LABEL: test_masked_z_32xi16_perm_high_mem_mask6: 2054; CHECK: # %bb.0: 2055; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 2056; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] 2057; CHECK-NEXT: retq 2058 %vec = load <32 x i16>, <32 x i16>* %vp 2059 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 6, i32 8, i32 9, i32 10, i32 11, i32 14, i32 13, i32 14, i32 14, i32 16, i32 17, i32 18, i32 19, i32 22, i32 21, i32 22, i32 22, i32 24, i32 25, i32 26, i32 27, i32 30, i32 29, i32 30, i32 30> 2060 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 2061 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 2062 ret <32 x i16> %res 2063} 2064 2065define <32 x i16> @test_masked_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 2066; CHECK-LABEL: test_masked_32xi16_perm_low_mem_mask7: 2067; CHECK: # %bb.0: 2068; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 2069; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] 2070; CHECK-NEXT: retq 2071 %vec = load <32 x i16>, <32 x i16>* %vp 2072 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 1, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15, i32 19, i32 17, i32 19, i32 16, i32 20, i32 21, i32 22, i32 23, i32 27, i32 25, i32 27, i32 24, i32 28, i32 29, i32 30, i32 31> 2073 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 2074 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 2075 ret <32 x i16> %res 2076} 2077 2078define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 x i16> %mask) { 2079; CHECK-LABEL: test_masked_z_32xi16_perm_low_mem_mask7: 2080; CHECK: # %bb.0: 2081; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 2082; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] 2083; CHECK-NEXT: retq 2084 %vec = load <32 x i16>, <32 x i16>* %vp 2085 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 1, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15, i32 19, i32 17, i32 19, i32 16, i32 20, i32 21, i32 22, i32 23, i32 27, i32 25, i32 27, i32 24, i32 28, i32 29, i32 30, i32 31> 2086 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 2087 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 2088 ret <32 x i16> %res 2089} 2090 2091define <4 x i32> @test_4xi32_perm_mask0(<4 x i32> %vec) { 2092; CHECK-LABEL: test_4xi32_perm_mask0: 2093; CHECK: # %bb.0: 2094; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,3,0] 2095; CHECK-NEXT: retq 2096 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 0> 2097 ret <4 x i32> %res 2098} 2099define <4 x i32> @test_masked_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { 2100; CHECK-LABEL: test_masked_4xi32_perm_mask0: 2101; CHECK: # %bb.0: 2102; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 2103; CHECK-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] 2104; CHECK-NEXT: vmovdqa %xmm1, %xmm0 2105; CHECK-NEXT: retq 2106 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 0> 2107 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2108 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 2109 ret <4 x i32> %res 2110} 2111 2112define <4 x i32> @test_masked_z_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %mask) { 2113; CHECK-LABEL: test_masked_z_4xi32_perm_mask0: 2114; CHECK: # %bb.0: 2115; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2116; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0] 2117; CHECK-NEXT: retq 2118 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 0> 2119 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2120 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 2121 ret <4 x i32> %res 2122} 2123define <4 x i32> @test_masked_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { 2124; CHECK-LABEL: test_masked_4xi32_perm_mask1: 2125; CHECK: # %bb.0: 2126; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 2127; CHECK-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] 2128; CHECK-NEXT: vmovdqa %xmm1, %xmm0 2129; CHECK-NEXT: retq 2130 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 0> 2131 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2132 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 2133 ret <4 x i32> %res 2134} 2135 2136define <4 x i32> @test_masked_z_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %mask) { 2137; CHECK-LABEL: test_masked_z_4xi32_perm_mask1: 2138; CHECK: # %bb.0: 2139; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2140; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0] 2141; CHECK-NEXT: retq 2142 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 0> 2143 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2144 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 2145 ret <4 x i32> %res 2146} 2147define <4 x i32> @test_masked_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { 2148; CHECK-LABEL: test_masked_4xi32_perm_mask2: 2149; CHECK: # %bb.0: 2150; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 2151; CHECK-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] 2152; CHECK-NEXT: vmovdqa %xmm1, %xmm0 2153; CHECK-NEXT: retq 2154 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 0> 2155 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2156 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 2157 ret <4 x i32> %res 2158} 2159 2160define <4 x i32> @test_masked_z_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %mask) { 2161; CHECK-LABEL: test_masked_z_4xi32_perm_mask2: 2162; CHECK: # %bb.0: 2163; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2164; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0] 2165; CHECK-NEXT: retq 2166 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 0> 2167 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2168 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 2169 ret <4 x i32> %res 2170} 2171define <4 x i32> @test_4xi32_perm_mask3(<4 x i32> %vec) { 2172; CHECK-LABEL: test_4xi32_perm_mask3: 2173; CHECK: # %bb.0: 2174; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,0,3] 2175; CHECK-NEXT: retq 2176 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3> 2177 ret <4 x i32> %res 2178} 2179define <4 x i32> @test_masked_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { 2180; CHECK-LABEL: test_masked_4xi32_perm_mask3: 2181; CHECK: # %bb.0: 2182; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 2183; CHECK-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] 2184; CHECK-NEXT: vmovdqa %xmm1, %xmm0 2185; CHECK-NEXT: retq 2186 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3> 2187 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2188 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 2189 ret <4 x i32> %res 2190} 2191 2192define <4 x i32> @test_masked_z_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %mask) { 2193; CHECK-LABEL: test_masked_z_4xi32_perm_mask3: 2194; CHECK: # %bb.0: 2195; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2196; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3] 2197; CHECK-NEXT: retq 2198 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3> 2199 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2200 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 2201 ret <4 x i32> %res 2202} 2203define <4 x i32> @test_4xi32_perm_mem_mask0(<4 x i32>* %vp) { 2204; CHECK-LABEL: test_4xi32_perm_mem_mask0: 2205; CHECK: # %bb.0: 2206; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,3,3] 2207; CHECK-NEXT: retq 2208 %vec = load <4 x i32>, <4 x i32>* %vp 2209 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3> 2210 ret <4 x i32> %res 2211} 2212define <4 x i32> @test_masked_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { 2213; CHECK-LABEL: test_masked_4xi32_perm_mem_mask0: 2214; CHECK: # %bb.0: 2215; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2216; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] 2217; CHECK-NEXT: retq 2218 %vec = load <4 x i32>, <4 x i32>* %vp 2219 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3> 2220 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2221 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 2222 ret <4 x i32> %res 2223} 2224 2225define <4 x i32> @test_masked_z_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> %mask) { 2226; CHECK-LABEL: test_masked_z_4xi32_perm_mem_mask0: 2227; CHECK: # %bb.0: 2228; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 2229; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] 2230; CHECK-NEXT: retq 2231 %vec = load <4 x i32>, <4 x i32>* %vp 2232 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3> 2233 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2234 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 2235 ret <4 x i32> %res 2236} 2237 2238define <4 x i32> @test_masked_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { 2239; CHECK-LABEL: test_masked_4xi32_perm_mem_mask1: 2240; CHECK: # %bb.0: 2241; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2242; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] 2243; CHECK-NEXT: retq 2244 %vec = load <4 x i32>, <4 x i32>* %vp 2245 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 1> 2246 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2247 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 2248 ret <4 x i32> %res 2249} 2250 2251define <4 x i32> @test_masked_z_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> %mask) { 2252; CHECK-LABEL: test_masked_z_4xi32_perm_mem_mask1: 2253; CHECK: # %bb.0: 2254; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 2255; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] 2256; CHECK-NEXT: retq 2257 %vec = load <4 x i32>, <4 x i32>* %vp 2258 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 1> 2259 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2260 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 2261 ret <4 x i32> %res 2262} 2263 2264define <4 x i32> @test_masked_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { 2265; CHECK-LABEL: test_masked_4xi32_perm_mem_mask2: 2266; CHECK: # %bb.0: 2267; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2268; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] 2269; CHECK-NEXT: retq 2270 %vec = load <4 x i32>, <4 x i32>* %vp 2271 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 0, i32 1> 2272 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2273 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 2274 ret <4 x i32> %res 2275} 2276 2277define <4 x i32> @test_masked_z_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> %mask) { 2278; CHECK-LABEL: test_masked_z_4xi32_perm_mem_mask2: 2279; CHECK: # %bb.0: 2280; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 2281; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] 2282; CHECK-NEXT: retq 2283 %vec = load <4 x i32>, <4 x i32>* %vp 2284 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 0, i32 1> 2285 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2286 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 2287 ret <4 x i32> %res 2288} 2289 2290define <4 x i32> @test_4xi32_perm_mem_mask3(<4 x i32>* %vp) { 2291; CHECK-LABEL: test_4xi32_perm_mem_mask3: 2292; CHECK: # %bb.0: 2293; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,0,1,0] 2294; CHECK-NEXT: retq 2295 %vec = load <4 x i32>, <4 x i32>* %vp 2296 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 1, i32 0> 2297 ret <4 x i32> %res 2298} 2299define <4 x i32> @test_masked_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { 2300; CHECK-LABEL: test_masked_4xi32_perm_mem_mask3: 2301; CHECK: # %bb.0: 2302; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2303; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] 2304; CHECK-NEXT: retq 2305 %vec = load <4 x i32>, <4 x i32>* %vp 2306 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 1, i32 0> 2307 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2308 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 2309 ret <4 x i32> %res 2310} 2311 2312define <4 x i32> @test_masked_z_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> %mask) { 2313; CHECK-LABEL: test_masked_z_4xi32_perm_mem_mask3: 2314; CHECK: # %bb.0: 2315; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 2316; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] 2317; CHECK-NEXT: retq 2318 %vec = load <4 x i32>, <4 x i32>* %vp 2319 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 1, i32 0> 2320 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2321 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 2322 ret <4 x i32> %res 2323} 2324 2325define <8 x i32> @test_8xi32_perm_mask0(<8 x i32> %vec) { 2326; CHECK-LABEL: test_8xi32_perm_mask0: 2327; CHECK: # %bb.0: 2328; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,1,0,6,7,5,4] 2329; CHECK-NEXT: retq 2330 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4> 2331 ret <8 x i32> %res 2332} 2333define <8 x i32> @test_masked_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 2334; CHECK-LABEL: test_masked_8xi32_perm_mask0: 2335; CHECK: # %bb.0: 2336; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 2337; CHECK-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[2,3,1,0,6,7,5,4] 2338; CHECK-NEXT: vmovdqa %ymm1, %ymm0 2339; CHECK-NEXT: retq 2340 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4> 2341 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2342 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 2343 ret <8 x i32> %res 2344} 2345 2346define <8 x i32> @test_masked_z_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %mask) { 2347; CHECK-LABEL: test_masked_z_8xi32_perm_mask0: 2348; CHECK: # %bb.0: 2349; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2350; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,1,0,6,7,5,4] 2351; CHECK-NEXT: retq 2352 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4> 2353 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2354 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 2355 ret <8 x i32> %res 2356} 2357define <8 x i32> @test_masked_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 2358; CHECK-LABEL: test_masked_8xi32_perm_mask1: 2359; CHECK: # %bb.0: 2360; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 2361; CHECK-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,3,4,7,7,7] 2362; CHECK-NEXT: vmovdqa %ymm1, %ymm0 2363; CHECK-NEXT: retq 2364 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 3, i32 3, i32 4, i32 7, i32 7, i32 7> 2365 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2366 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 2367 ret <8 x i32> %res 2368} 2369 2370define <8 x i32> @test_masked_z_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %mask) { 2371; CHECK-LABEL: test_masked_z_8xi32_perm_mask1: 2372; CHECK: # %bb.0: 2373; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2374; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,3,4,7,7,7] 2375; CHECK-NEXT: retq 2376 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 3, i32 3, i32 4, i32 7, i32 7, i32 7> 2377 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2378 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 2379 ret <8 x i32> %res 2380} 2381define <8 x i32> @test_masked_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 2382; CHECK-LABEL: test_masked_8xi32_perm_mask2: 2383; CHECK: # %bb.0: 2384; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 2385; CHECK-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3,5,6,4,7] 2386; CHECK-NEXT: vmovdqa %ymm1, %ymm0 2387; CHECK-NEXT: retq 2388 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 0, i32 3, i32 5, i32 6, i32 4, i32 7> 2389 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2390 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 2391 ret <8 x i32> %res 2392} 2393 2394define <8 x i32> @test_masked_z_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %mask) { 2395; CHECK-LABEL: test_masked_z_8xi32_perm_mask2: 2396; CHECK: # %bb.0: 2397; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2398; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3,5,6,4,7] 2399; CHECK-NEXT: retq 2400 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 0, i32 3, i32 5, i32 6, i32 4, i32 7> 2401 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2402 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 2403 ret <8 x i32> %res 2404} 2405define <8 x i32> @test_8xi32_perm_mask3(<8 x i32> %vec) { 2406; CHECK-LABEL: test_8xi32_perm_mask3: 2407; CHECK: # %bb.0: 2408; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,3,1,0,5,7,5,4] 2409; CHECK-NEXT: retq 2410 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 0, i32 5, i32 7, i32 5, i32 4> 2411 ret <8 x i32> %res 2412} 2413define <8 x i32> @test_masked_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 2414; CHECK-LABEL: test_masked_8xi32_perm_mask3: 2415; CHECK: # %bb.0: 2416; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 2417; CHECK-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,3,1,0,5,7,5,4] 2418; CHECK-NEXT: vmovdqa %ymm1, %ymm0 2419; CHECK-NEXT: retq 2420 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 0, i32 5, i32 7, i32 5, i32 4> 2421 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2422 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 2423 ret <8 x i32> %res 2424} 2425 2426define <8 x i32> @test_masked_z_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %mask) { 2427; CHECK-LABEL: test_masked_z_8xi32_perm_mask3: 2428; CHECK: # %bb.0: 2429; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2430; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3,1,0,5,7,5,4] 2431; CHECK-NEXT: retq 2432 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 0, i32 5, i32 7, i32 5, i32 4> 2433 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2434 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 2435 ret <8 x i32> %res 2436} 2437define <8 x i32> @test_8xi32_perm_mem_mask0(<8 x i32>* %vp) { 2438; CHECK-LABEL: test_8xi32_perm_mem_mask0: 2439; CHECK: # %bb.0: 2440; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = mem[1,0,2,0,5,4,6,4] 2441; CHECK-NEXT: retq 2442 %vec = load <8 x i32>, <8 x i32>* %vp 2443 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 5, i32 4, i32 6, i32 4> 2444 ret <8 x i32> %res 2445} 2446define <8 x i32> @test_masked_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 2447; CHECK-LABEL: test_masked_8xi32_perm_mem_mask0: 2448; CHECK: # %bb.0: 2449; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2450; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[1,0,2,0,5,4,6,4] 2451; CHECK-NEXT: retq 2452 %vec = load <8 x i32>, <8 x i32>* %vp 2453 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 5, i32 4, i32 6, i32 4> 2454 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2455 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 2456 ret <8 x i32> %res 2457} 2458 2459define <8 x i32> @test_masked_z_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %mask) { 2460; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask0: 2461; CHECK: # %bb.0: 2462; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 2463; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,2,0,5,4,6,4] 2464; CHECK-NEXT: retq 2465 %vec = load <8 x i32>, <8 x i32>* %vp 2466 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 5, i32 4, i32 6, i32 4> 2467 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2468 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 2469 ret <8 x i32> %res 2470} 2471 2472define <8 x i32> @test_masked_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 2473; CHECK-LABEL: test_masked_8xi32_perm_mem_mask1: 2474; CHECK: # %bb.0: 2475; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2476; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[0,3,2,0,4,7,6,4] 2477; CHECK-NEXT: retq 2478 %vec = load <8 x i32>, <8 x i32>* %vp 2479 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 2, i32 0, i32 4, i32 7, i32 6, i32 4> 2480 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2481 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 2482 ret <8 x i32> %res 2483} 2484 2485define <8 x i32> @test_masked_z_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %mask) { 2486; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask1: 2487; CHECK: # %bb.0: 2488; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 2489; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] 2490; CHECK-NEXT: retq 2491 %vec = load <8 x i32>, <8 x i32>* %vp 2492 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 2, i32 0, i32 4, i32 7, i32 6, i32 4> 2493 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2494 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 2495 ret <8 x i32> %res 2496} 2497 2498define <8 x i32> @test_masked_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 2499; CHECK-LABEL: test_masked_8xi32_perm_mem_mask2: 2500; CHECK: # %bb.0: 2501; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2502; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,3,1,7,6,7,5] 2503; CHECK-NEXT: retq 2504 %vec = load <8 x i32>, <8 x i32>* %vp 2505 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 3, i32 1, i32 7, i32 6, i32 7, i32 5> 2506 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2507 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 2508 ret <8 x i32> %res 2509} 2510 2511define <8 x i32> @test_masked_z_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %mask) { 2512; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask2: 2513; CHECK: # %bb.0: 2514; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 2515; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,1,7,6,7,5] 2516; CHECK-NEXT: retq 2517 %vec = load <8 x i32>, <8 x i32>* %vp 2518 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 3, i32 1, i32 7, i32 6, i32 7, i32 5> 2519 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2520 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 2521 ret <8 x i32> %res 2522} 2523 2524define <8 x i32> @test_8xi32_perm_mem_mask3(<8 x i32>* %vp) { 2525; CHECK-LABEL: test_8xi32_perm_mem_mask3: 2526; CHECK: # %bb.0: 2527; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,0,0,7,6,4,4] 2528; CHECK-NEXT: retq 2529 %vec = load <8 x i32>, <8 x i32>* %vp 2530 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 4, i32 4> 2531 ret <8 x i32> %res 2532} 2533define <8 x i32> @test_masked_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 2534; CHECK-LABEL: test_masked_8xi32_perm_mem_mask3: 2535; CHECK: # %bb.0: 2536; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2537; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,0,0,7,6,4,4] 2538; CHECK-NEXT: retq 2539 %vec = load <8 x i32>, <8 x i32>* %vp 2540 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 4, i32 4> 2541 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2542 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 2543 ret <8 x i32> %res 2544} 2545 2546define <8 x i32> @test_masked_z_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %mask) { 2547; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask3: 2548; CHECK: # %bb.0: 2549; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 2550; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,0,0,7,6,4,4] 2551; CHECK-NEXT: retq 2552 %vec = load <8 x i32>, <8 x i32>* %vp 2553 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 4, i32 4> 2554 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2555 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 2556 ret <8 x i32> %res 2557} 2558 2559define <16 x i32> @test_16xi32_perm_mask0(<16 x i32> %vec) { 2560; CHECK-LABEL: test_16xi32_perm_mask0: 2561; CHECK: # %bb.0: 2562; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] 2563; CHECK-NEXT: retq 2564 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 0, i32 7, i32 5, i32 7, i32 4, i32 11, i32 9, i32 11, i32 8, i32 15, i32 13, i32 15, i32 12> 2565 ret <16 x i32> %res 2566} 2567define <16 x i32> @test_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 2568; CHECK-LABEL: test_masked_16xi32_perm_mask0: 2569; CHECK: # %bb.0: 2570; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 2571; CHECK-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] 2572; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 2573; CHECK-NEXT: retq 2574 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 0, i32 7, i32 5, i32 7, i32 4, i32 11, i32 9, i32 11, i32 8, i32 15, i32 13, i32 15, i32 12> 2575 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2576 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 2577 ret <16 x i32> %res 2578} 2579 2580define <16 x i32> @test_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) { 2581; CHECK-LABEL: test_masked_z_16xi32_perm_mask0: 2582; CHECK: # %bb.0: 2583; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2584; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] 2585; CHECK-NEXT: retq 2586 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 0, i32 7, i32 5, i32 7, i32 4, i32 11, i32 9, i32 11, i32 8, i32 15, i32 13, i32 15, i32 12> 2587 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2588 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 2589 ret <16 x i32> %res 2590} 2591define <16 x i32> @test_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 2592; CHECK-LABEL: test_masked_16xi32_perm_mask1: 2593; CHECK: # %bb.0: 2594; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 2595; CHECK-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] 2596; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 2597; CHECK-NEXT: retq 2598 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 2, i32 0, i32 3, i32 0, i32 6, i32 4, i32 7, i32 4, i32 10, i32 8, i32 11, i32 8, i32 14, i32 12, i32 15, i32 12> 2599 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2600 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 2601 ret <16 x i32> %res 2602} 2603 2604define <16 x i32> @test_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %mask) { 2605; CHECK-LABEL: test_masked_z_16xi32_perm_mask1: 2606; CHECK: # %bb.0: 2607; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2608; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] 2609; CHECK-NEXT: retq 2610 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 2, i32 0, i32 3, i32 0, i32 6, i32 4, i32 7, i32 4, i32 10, i32 8, i32 11, i32 8, i32 14, i32 12, i32 15, i32 12> 2611 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2612 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 2613 ret <16 x i32> %res 2614} 2615define <16 x i32> @test_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 2616; CHECK-LABEL: test_masked_16xi32_perm_mask2: 2617; CHECK: # %bb.0: 2618; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 2619; CHECK-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] 2620; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 2621; CHECK-NEXT: retq 2622 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4, i32 9, i32 11, i32 11, i32 8, i32 13, i32 15, i32 15, i32 12> 2623 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2624 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 2625 ret <16 x i32> %res 2626} 2627 2628define <16 x i32> @test_masked_z_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %mask) { 2629; CHECK-LABEL: test_masked_z_16xi32_perm_mask2: 2630; CHECK: # %bb.0: 2631; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2632; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] 2633; CHECK-NEXT: retq 2634 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4, i32 9, i32 11, i32 11, i32 8, i32 13, i32 15, i32 15, i32 12> 2635 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2636 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 2637 ret <16 x i32> %res 2638} 2639define <16 x i32> @test_16xi32_perm_mask3(<16 x i32> %vec) { 2640; CHECK-LABEL: test_16xi32_perm_mask3: 2641; CHECK: # %bb.0: 2642; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] 2643; CHECK-NEXT: retq 2644 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 0, i32 3, i32 7, i32 6, i32 4, i32 7, i32 11, i32 10, i32 8, i32 11, i32 15, i32 14, i32 12, i32 15> 2645 ret <16 x i32> %res 2646} 2647define <16 x i32> @test_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 2648; CHECK-LABEL: test_masked_16xi32_perm_mask3: 2649; CHECK: # %bb.0: 2650; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 2651; CHECK-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] 2652; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 2653; CHECK-NEXT: retq 2654 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 0, i32 3, i32 7, i32 6, i32 4, i32 7, i32 11, i32 10, i32 8, i32 11, i32 15, i32 14, i32 12, i32 15> 2655 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2656 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 2657 ret <16 x i32> %res 2658} 2659 2660define <16 x i32> @test_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %mask) { 2661; CHECK-LABEL: test_masked_z_16xi32_perm_mask3: 2662; CHECK: # %bb.0: 2663; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2664; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] 2665; CHECK-NEXT: retq 2666 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 0, i32 3, i32 7, i32 6, i32 4, i32 7, i32 11, i32 10, i32 8, i32 11, i32 15, i32 14, i32 12, i32 15> 2667 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2668 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 2669 ret <16 x i32> %res 2670} 2671define <16 x i32> @test_16xi32_perm_mem_mask0(<16 x i32>* %vp) { 2672; CHECK-LABEL: test_16xi32_perm_mem_mask0: 2673; CHECK: # %bb.0: 2674; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] 2675; CHECK-NEXT: retq 2676 %vec = load <16 x i32>, <16 x i32>* %vp 2677 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 1, i32 3, i32 5, i32 4, i32 5, i32 7, i32 9, i32 8, i32 9, i32 11, i32 13, i32 12, i32 13, i32 15> 2678 ret <16 x i32> %res 2679} 2680define <16 x i32> @test_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 2681; CHECK-LABEL: test_masked_16xi32_perm_mem_mask0: 2682; CHECK: # %bb.0: 2683; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2684; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] 2685; CHECK-NEXT: retq 2686 %vec = load <16 x i32>, <16 x i32>* %vp 2687 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 1, i32 3, i32 5, i32 4, i32 5, i32 7, i32 9, i32 8, i32 9, i32 11, i32 13, i32 12, i32 13, i32 15> 2688 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2689 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 2690 ret <16 x i32> %res 2691} 2692 2693define <16 x i32> @test_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %mask) { 2694; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask0: 2695; CHECK: # %bb.0: 2696; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 2697; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] 2698; CHECK-NEXT: retq 2699 %vec = load <16 x i32>, <16 x i32>* %vp 2700 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 1, i32 3, i32 5, i32 4, i32 5, i32 7, i32 9, i32 8, i32 9, i32 11, i32 13, i32 12, i32 13, i32 15> 2701 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2702 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 2703 ret <16 x i32> %res 2704} 2705 2706define <16 x i32> @test_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 2707; CHECK-LABEL: test_masked_16xi32_perm_mem_mask1: 2708; CHECK: # %bb.0: 2709; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2710; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] 2711; CHECK-NEXT: retq 2712 %vec = load <16 x i32>, <16 x i32>* %vp 2713 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 2, i32 5, i32 4, i32 4, i32 6, i32 9, i32 8, i32 8, i32 10, i32 13, i32 12, i32 12, i32 14> 2714 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2715 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 2716 ret <16 x i32> %res 2717} 2718 2719define <16 x i32> @test_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %mask) { 2720; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask1: 2721; CHECK: # %bb.0: 2722; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 2723; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] 2724; CHECK-NEXT: retq 2725 %vec = load <16 x i32>, <16 x i32>* %vp 2726 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 2, i32 5, i32 4, i32 4, i32 6, i32 9, i32 8, i32 8, i32 10, i32 13, i32 12, i32 12, i32 14> 2727 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2728 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 2729 ret <16 x i32> %res 2730} 2731 2732define <16 x i32> @test_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 2733; CHECK-LABEL: test_masked_16xi32_perm_mem_mask2: 2734; CHECK: # %bb.0: 2735; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2736; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] 2737; CHECK-NEXT: retq 2738 %vec = load <16 x i32>, <16 x i32>* %vp 2739 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 2, i32 0, i32 1, i32 2, i32 6, i32 4, i32 5, i32 6, i32 10, i32 8, i32 9, i32 10, i32 14, i32 12, i32 13, i32 14> 2740 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2741 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 2742 ret <16 x i32> %res 2743} 2744 2745define <16 x i32> @test_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %mask) { 2746; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask2: 2747; CHECK: # %bb.0: 2748; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 2749; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] 2750; CHECK-NEXT: retq 2751 %vec = load <16 x i32>, <16 x i32>* %vp 2752 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 2, i32 0, i32 1, i32 2, i32 6, i32 4, i32 5, i32 6, i32 10, i32 8, i32 9, i32 10, i32 14, i32 12, i32 13, i32 14> 2753 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2754 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 2755 ret <16 x i32> %res 2756} 2757 2758define <16 x i32> @test_16xi32_perm_mem_mask3(<16 x i32>* %vp) { 2759; CHECK-LABEL: test_16xi32_perm_mem_mask3: 2760; CHECK: # %bb.0: 2761; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] 2762; CHECK-NEXT: retq 2763 %vec = load <16 x i32>, <16 x i32>* %vp 2764 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 1, i32 1, i32 7, i32 5, i32 5, i32 5, i32 11, i32 9, i32 9, i32 9, i32 15, i32 13, i32 13, i32 13> 2765 ret <16 x i32> %res 2766} 2767define <16 x i32> @test_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 2768; CHECK-LABEL: test_masked_16xi32_perm_mem_mask3: 2769; CHECK: # %bb.0: 2770; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2771; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] 2772; CHECK-NEXT: retq 2773 %vec = load <16 x i32>, <16 x i32>* %vp 2774 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 1, i32 1, i32 7, i32 5, i32 5, i32 5, i32 11, i32 9, i32 9, i32 9, i32 15, i32 13, i32 13, i32 13> 2775 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2776 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 2777 ret <16 x i32> %res 2778} 2779 2780define <16 x i32> @test_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %mask) { 2781; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask3: 2782; CHECK: # %bb.0: 2783; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 2784; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] 2785; CHECK-NEXT: retq 2786 %vec = load <16 x i32>, <16 x i32>* %vp 2787 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 1, i32 1, i32 7, i32 5, i32 5, i32 5, i32 11, i32 9, i32 9, i32 9, i32 15, i32 13, i32 13, i32 13> 2788 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2789 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 2790 ret <16 x i32> %res 2791} 2792 2793