1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 4 5define void @trunc_shl_7_v4i32_v4i64(<4 x i32> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { 6; SSE2-LABEL: trunc_shl_7_v4i32_v4i64: 7; SSE2: # %bb.0: 8; SSE2-NEXT: movaps (%rsi), %xmm0 9; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],mem[0,2] 10; SSE2-NEXT: pslld $7, %xmm0 11; SSE2-NEXT: movdqa %xmm0, (%rdi) 12; SSE2-NEXT: retq 13; 14; AVX2-LABEL: trunc_shl_7_v4i32_v4i64: 15; AVX2: # %bb.0: 16; AVX2-NEXT: vmovaps (%rsi), %xmm0 17; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],mem[0,2] 18; AVX2-NEXT: vpslld $7, %xmm0, %xmm0 19; AVX2-NEXT: vmovdqa %xmm0, (%rdi) 20; AVX2-NEXT: retq 21 %val = load <4 x i64>, <4 x i64> addrspace(1)* %in 22 %shl = shl <4 x i64> %val, <i64 7, i64 7, i64 7, i64 7> 23 %trunc = trunc <4 x i64> %shl to <4 x i32> 24 store <4 x i32> %trunc, <4 x i32> addrspace(1)* %out 25 ret void 26} 27 28define <8 x i16> @trunc_shl_15_v8i16_v8i32(<8 x i32> %a) { 29; SSE2-LABEL: trunc_shl_15_v8i16_v8i32: 30; SSE2: # %bb.0: 31; SSE2-NEXT: pslld $16, %xmm1 32; SSE2-NEXT: psrad $16, %xmm1 33; SSE2-NEXT: pslld $16, %xmm0 34; SSE2-NEXT: psrad $16, %xmm0 35; SSE2-NEXT: packssdw %xmm1, %xmm0 36; SSE2-NEXT: psllw $15, %xmm0 37; SSE2-NEXT: retq 38; 39; AVX2-LABEL: trunc_shl_15_v8i16_v8i32: 40; AVX2: # %bb.0: 41; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u] 42; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 43; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0 44; AVX2-NEXT: vzeroupper 45; AVX2-NEXT: retq 46 %shl = shl <8 x i32> %a, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 47 %conv = trunc <8 x i32> %shl to <8 x i16> 48 ret <8 x i16> %conv 49} 50 51define <8 x i16> @trunc_shl_16_v8i16_v8i32(<8 x i32> %a) { 52; SSE2-LABEL: trunc_shl_16_v8i16_v8i32: 53; SSE2: # %bb.0: 54; SSE2-NEXT: xorps %xmm0, %xmm0 55; SSE2-NEXT: retq 56; 57; AVX2-LABEL: trunc_shl_16_v8i16_v8i32: 58; AVX2: # %bb.0: 59; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 60; AVX2-NEXT: retq 61 %shl = shl <8 x i32> %a, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 62 %conv = trunc <8 x i32> %shl to <8 x i16> 63 ret <8 x i16> %conv 64} 65 66define <8 x i16> @trunc_shl_17_v8i16_v8i32(<8 x i32> %a) { 67; SSE2-LABEL: trunc_shl_17_v8i16_v8i32: 68; SSE2: # %bb.0: 69; SSE2-NEXT: xorps %xmm0, %xmm0 70; SSE2-NEXT: retq 71; 72; AVX2-LABEL: trunc_shl_17_v8i16_v8i32: 73; AVX2: # %bb.0: 74; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 75; AVX2-NEXT: retq 76 %shl = shl <8 x i32> %a, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17> 77 %conv = trunc <8 x i32> %shl to <8 x i16> 78 ret <8 x i16> %conv 79} 80 81define void @trunc_shl_31_i32_i64(i32* %out, i64* %in) { 82; SSE2-LABEL: trunc_shl_31_i32_i64: 83; SSE2: # %bb.0: 84; SSE2-NEXT: movl (%rsi), %eax 85; SSE2-NEXT: shll $31, %eax 86; SSE2-NEXT: movl %eax, (%rdi) 87; SSE2-NEXT: retq 88; 89; AVX2-LABEL: trunc_shl_31_i32_i64: 90; AVX2: # %bb.0: 91; AVX2-NEXT: movl (%rsi), %eax 92; AVX2-NEXT: shll $31, %eax 93; AVX2-NEXT: movl %eax, (%rdi) 94; AVX2-NEXT: retq 95 %val = load i64, i64* %in 96 %shl = shl i64 %val, 31 97 %trunc = trunc i64 %shl to i32 98 store i32 %trunc, i32* %out 99 ret void 100} 101 102define void @trunc_shl_32_i32_i64(i32* %out, i64* %in) { 103; SSE2-LABEL: trunc_shl_32_i32_i64: 104; SSE2: # %bb.0: 105; SSE2-NEXT: movl $0, (%rdi) 106; SSE2-NEXT: retq 107; 108; AVX2-LABEL: trunc_shl_32_i32_i64: 109; AVX2: # %bb.0: 110; AVX2-NEXT: movl $0, (%rdi) 111; AVX2-NEXT: retq 112 %val = load i64, i64* %in 113 %shl = shl i64 %val, 32 114 %trunc = trunc i64 %shl to i32 115 store i32 %trunc, i32* %out 116 ret void 117} 118 119define void @trunc_shl_15_i16_i64(i16* %out, i64* %in) { 120; SSE2-LABEL: trunc_shl_15_i16_i64: 121; SSE2: # %bb.0: 122; SSE2-NEXT: movl (%rsi), %eax 123; SSE2-NEXT: shll $15, %eax 124; SSE2-NEXT: movw %ax, (%rdi) 125; SSE2-NEXT: retq 126; 127; AVX2-LABEL: trunc_shl_15_i16_i64: 128; AVX2: # %bb.0: 129; AVX2-NEXT: movl (%rsi), %eax 130; AVX2-NEXT: shll $15, %eax 131; AVX2-NEXT: movw %ax, (%rdi) 132; AVX2-NEXT: retq 133 %val = load i64, i64* %in 134 %shl = shl i64 %val, 15 135 %trunc = trunc i64 %shl to i16 136 store i16 %trunc, i16* %out 137 ret void 138} 139 140define void @trunc_shl_16_i16_i64(i16* %out, i64* %in) { 141; SSE2-LABEL: trunc_shl_16_i16_i64: 142; SSE2: # %bb.0: 143; SSE2-NEXT: movw $0, (%rdi) 144; SSE2-NEXT: retq 145; 146; AVX2-LABEL: trunc_shl_16_i16_i64: 147; AVX2: # %bb.0: 148; AVX2-NEXT: movw $0, (%rdi) 149; AVX2-NEXT: retq 150 %val = load i64, i64* %in 151 %shl = shl i64 %val, 16 152 %trunc = trunc i64 %shl to i16 153 store i16 %trunc, i16* %out 154 ret void 155} 156 157define void @trunc_shl_7_i8_i64(i8* %out, i64* %in) { 158; SSE2-LABEL: trunc_shl_7_i8_i64: 159; SSE2: # %bb.0: 160; SSE2-NEXT: movb (%rsi), %al 161; SSE2-NEXT: shlb $7, %al 162; SSE2-NEXT: movb %al, (%rdi) 163; SSE2-NEXT: retq 164; 165; AVX2-LABEL: trunc_shl_7_i8_i64: 166; AVX2: # %bb.0: 167; AVX2-NEXT: movb (%rsi), %al 168; AVX2-NEXT: shlb $7, %al 169; AVX2-NEXT: movb %al, (%rdi) 170; AVX2-NEXT: retq 171 %val = load i64, i64* %in 172 %shl = shl i64 %val, 7 173 %trunc = trunc i64 %shl to i8 174 store i8 %trunc, i8* %out 175 ret void 176} 177 178define void @trunc_shl_8_i8_i64(i8* %out, i64* %in) { 179; SSE2-LABEL: trunc_shl_8_i8_i64: 180; SSE2: # %bb.0: 181; SSE2-NEXT: movb $0, (%rdi) 182; SSE2-NEXT: retq 183; 184; AVX2-LABEL: trunc_shl_8_i8_i64: 185; AVX2: # %bb.0: 186; AVX2-NEXT: movb $0, (%rdi) 187; AVX2-NEXT: retq 188 %val = load i64, i64* %in 189 %shl = shl i64 %val, 8 190 %trunc = trunc i64 %shl to i8 191 store i8 %trunc, i8* %out 192 ret void 193} 194