1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+prefer-256-bit | FileCheck %s --check-prefix=AVX256 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,-prefer-256-bit | FileCheck %s --check-prefix=AVX512VL 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+prefer-256-bit | FileCheck %s --check-prefix=AVX512F 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,-prefer-256-bit | FileCheck %s --check-prefix=AVX512F 6 7define <8 x i16> @testv8i1_sext_v8i16(<8 x i32>* %p) { 8; AVX256-LABEL: testv8i1_sext_v8i16: 9; AVX256: # %bb.0: 10; AVX256-NEXT: vmovdqa (%rdi), %ymm0 11; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1 12; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 13; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 14; AVX256-NEXT: vpmovdw %ymm0, %xmm0 15; AVX256-NEXT: vzeroupper 16; AVX256-NEXT: retq 17; 18; AVX512VL-LABEL: testv8i1_sext_v8i16: 19; AVX512VL: # %bb.0: 20; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 21; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1 22; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 23; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 24; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 25; AVX512VL-NEXT: vzeroupper 26; AVX512VL-NEXT: retq 27; 28; AVX512F-LABEL: testv8i1_sext_v8i16: 29; AVX512F: # %bb.0: 30; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0 31; AVX512F-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 32; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 33; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 34; AVX512F-NEXT: vzeroupper 35; AVX512F-NEXT: retq 36 %in = load <8 x i32>, <8 x i32>* %p 37 %cmp = icmp eq <8 x i32> %in, zeroinitializer 38 %ext = sext <8 x i1> %cmp to <8 x i16> 39 ret <8 x i16> %ext 40} 41 42define <16 x i8> @testv16i1_sext_v16i8(<8 x i32>* %p, <8 x i32>* %q) { 43; AVX256-LABEL: testv16i1_sext_v16i8: 44; AVX256: # %bb.0: 45; AVX256-NEXT: vmovdqa (%rdi), %ymm0 46; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1 47; AVX256-NEXT: vmovdqa (%rsi), %ymm0 48; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k2 49; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 50; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z} 51; AVX256-NEXT: vpmovdw %ymm1, %xmm1 52; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 53; AVX256-NEXT: vpmovdw %ymm0, %xmm0 54; AVX256-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 55; AVX256-NEXT: vzeroupper 56; AVX256-NEXT: retq 57; 58; AVX512VL-LABEL: testv16i1_sext_v16i8: 59; AVX512VL: # %bb.0: 60; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 61; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0 62; AVX512VL-NEXT: vmovdqa (%rsi), %ymm0 63; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1 64; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1 65; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 66; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 67; AVX512VL-NEXT: vzeroupper 68; AVX512VL-NEXT: retq 69; 70; AVX512F-LABEL: testv16i1_sext_v16i8: 71; AVX512F: # %bb.0: 72; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 73; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 74; AVX512F-NEXT: vmovdqa (%rsi), %ymm0 75; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1 76; AVX512F-NEXT: kunpckbw %k0, %k1, %k1 77; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 78; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 79; AVX512F-NEXT: vzeroupper 80; AVX512F-NEXT: retq 81 %in = load <8 x i32>, <8 x i32>* %p 82 %cmp = icmp eq <8 x i32> %in, zeroinitializer 83 %in2 = load <8 x i32>, <8 x i32>* %q 84 %cmp2 = icmp eq <8 x i32> %in2, zeroinitializer 85 %concat = shufflevector <8 x i1> %cmp, <8 x i1> %cmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 86 %ext = sext <16 x i1> %concat to <16 x i8> 87 ret <16 x i8> %ext 88} 89 90define <16 x i16> @testv16i1_sext_v16i16(<8 x i32>* %p, <8 x i32>* %q) { 91; AVX256-LABEL: testv16i1_sext_v16i16: 92; AVX256: # %bb.0: 93; AVX256-NEXT: vmovdqa (%rdi), %ymm0 94; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1 95; AVX256-NEXT: vmovdqa (%rsi), %ymm0 96; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k2 97; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 98; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z} 99; AVX256-NEXT: vpmovdw %ymm1, %xmm1 100; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k2} {z} 101; AVX256-NEXT: vpmovdw %ymm0, %xmm0 102; AVX256-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 103; AVX256-NEXT: retq 104; 105; AVX512VL-LABEL: testv16i1_sext_v16i16: 106; AVX512VL: # %bb.0: 107; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 108; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0 109; AVX512VL-NEXT: vmovdqa (%rsi), %ymm0 110; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1 111; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1 112; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 113; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0 114; AVX512VL-NEXT: retq 115; 116; AVX512F-LABEL: testv16i1_sext_v16i16: 117; AVX512F: # %bb.0: 118; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 119; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 120; AVX512F-NEXT: vmovdqa (%rsi), %ymm0 121; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1 122; AVX512F-NEXT: kunpckbw %k0, %k1, %k1 123; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 124; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 125; AVX512F-NEXT: retq 126 %in = load <8 x i32>, <8 x i32>* %p 127 %cmp = icmp eq <8 x i32> %in, zeroinitializer 128 %in2 = load <8 x i32>, <8 x i32>* %q 129 %cmp2 = icmp eq <8 x i32> %in2, zeroinitializer 130 %concat = shufflevector <8 x i1> %cmp, <8 x i1> %cmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 131 %ext = sext <16 x i1> %concat to <16 x i16> 132 ret <16 x i16> %ext 133} 134 135define <8 x i16> @testv8i1_zext_v8i16(<8 x i32>* %p) { 136; AVX256-LABEL: testv8i1_zext_v8i16: 137; AVX256: # %bb.0: 138; AVX256-NEXT: vmovdqa (%rdi), %ymm0 139; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1 140; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 141; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 142; AVX256-NEXT: vpmovdw %ymm0, %xmm0 143; AVX256-NEXT: vpsrlw $15, %xmm0, %xmm0 144; AVX256-NEXT: vzeroupper 145; AVX256-NEXT: retq 146; 147; AVX512VL-LABEL: testv8i1_zext_v8i16: 148; AVX512VL: # %bb.0: 149; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 150; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1 151; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 152; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 153; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 154; AVX512VL-NEXT: vpsrlw $15, %xmm0, %xmm0 155; AVX512VL-NEXT: vzeroupper 156; AVX512VL-NEXT: retq 157; 158; AVX512F-LABEL: testv8i1_zext_v8i16: 159; AVX512F: # %bb.0: 160; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0 161; AVX512F-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 162; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 163; AVX512F-NEXT: vpsrlw $15, %xmm0, %xmm0 164; AVX512F-NEXT: vzeroupper 165; AVX512F-NEXT: retq 166 %in = load <8 x i32>, <8 x i32>* %p 167 %cmp = icmp eq <8 x i32> %in, zeroinitializer 168 %ext = zext <8 x i1> %cmp to <8 x i16> 169 ret <8 x i16> %ext 170} 171 172define <16 x i8> @testv16i1_zext_v16i8(<8 x i32>* %p, <8 x i32>* %q) { 173; AVX256-LABEL: testv16i1_zext_v16i8: 174; AVX256: # %bb.0: 175; AVX256-NEXT: vmovdqa (%rdi), %ymm0 176; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1 177; AVX256-NEXT: vmovdqa (%rsi), %ymm0 178; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k2 179; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 180; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z} 181; AVX256-NEXT: vpmovdw %ymm1, %xmm1 182; AVX256-NEXT: vpsrlw $15, %xmm1, %xmm1 183; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 184; AVX256-NEXT: vpmovdw %ymm0, %xmm0 185; AVX256-NEXT: vpsrlw $15, %xmm0, %xmm0 186; AVX256-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 187; AVX256-NEXT: vzeroupper 188; AVX256-NEXT: retq 189; 190; AVX512VL-LABEL: testv16i1_zext_v16i8: 191; AVX512VL: # %bb.0: 192; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 193; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0 194; AVX512VL-NEXT: vmovdqa (%rsi), %ymm0 195; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1 196; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1 197; AVX512VL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} 198; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 199; AVX512VL-NEXT: vzeroupper 200; AVX512VL-NEXT: retq 201; 202; AVX512F-LABEL: testv16i1_zext_v16i8: 203; AVX512F: # %bb.0: 204; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 205; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 206; AVX512F-NEXT: vmovdqa (%rsi), %ymm0 207; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1 208; AVX512F-NEXT: kunpckbw %k0, %k1, %k1 209; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} 210; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 211; AVX512F-NEXT: vzeroupper 212; AVX512F-NEXT: retq 213 %in = load <8 x i32>, <8 x i32>* %p 214 %cmp = icmp eq <8 x i32> %in, zeroinitializer 215 %in2 = load <8 x i32>, <8 x i32>* %q 216 %cmp2 = icmp eq <8 x i32> %in2, zeroinitializer 217 %concat = shufflevector <8 x i1> %cmp, <8 x i1> %cmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 218 %ext = zext <16 x i1> %concat to <16 x i8> 219 ret <16 x i8> %ext 220} 221 222define <16 x i16> @testv16i1_zext_v16i16(<8 x i32>* %p, <8 x i32>* %q) { 223; AVX256-LABEL: testv16i1_zext_v16i16: 224; AVX256: # %bb.0: 225; AVX256-NEXT: vmovdqa (%rdi), %ymm0 226; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1 227; AVX256-NEXT: vmovdqa (%rsi), %ymm0 228; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k2 229; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 230; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z} 231; AVX256-NEXT: vpmovdw %ymm1, %xmm1 232; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k2} {z} 233; AVX256-NEXT: vpmovdw %ymm0, %xmm0 234; AVX256-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 235; AVX256-NEXT: vpsrlw $15, %ymm0, %ymm0 236; AVX256-NEXT: retq 237; 238; AVX512VL-LABEL: testv16i1_zext_v16i16: 239; AVX512VL: # %bb.0: 240; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 241; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0 242; AVX512VL-NEXT: vmovdqa (%rsi), %ymm0 243; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1 244; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1 245; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 246; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0 247; AVX512VL-NEXT: vpsrlw $15, %ymm0, %ymm0 248; AVX512VL-NEXT: retq 249; 250; AVX512F-LABEL: testv16i1_zext_v16i16: 251; AVX512F: # %bb.0: 252; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 253; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 254; AVX512F-NEXT: vmovdqa (%rsi), %ymm0 255; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1 256; AVX512F-NEXT: kunpckbw %k0, %k1, %k1 257; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 258; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 259; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0 260; AVX512F-NEXT: retq 261 %in = load <8 x i32>, <8 x i32>* %p 262 %cmp = icmp eq <8 x i32> %in, zeroinitializer 263 %in2 = load <8 x i32>, <8 x i32>* %q 264 %cmp2 = icmp eq <8 x i32> %in2, zeroinitializer 265 %concat = shufflevector <8 x i1> %cmp, <8 x i1> %cmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 266 %ext = zext <16 x i1> %concat to <16 x i16> 267 ret <16 x i16> %ext 268} 269