1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 5 6define <8 x i16> @sdiv_vec8x16(<8 x i16> %var) { 7; SSE-LABEL: sdiv_vec8x16: 8; SSE: # %bb.0: # %entry 9; SSE-NEXT: movdqa %xmm0, %xmm1 10; SSE-NEXT: psraw $15, %xmm1 11; SSE-NEXT: psrlw $11, %xmm1 12; SSE-NEXT: paddw %xmm0, %xmm1 13; SSE-NEXT: psraw $5, %xmm1 14; SSE-NEXT: movdqa %xmm1, %xmm0 15; SSE-NEXT: retq 16; 17; AVX-LABEL: sdiv_vec8x16: 18; AVX: # %bb.0: # %entry 19; AVX-NEXT: vpsraw $15, %xmm0, %xmm1 20; AVX-NEXT: vpsrlw $11, %xmm1, %xmm1 21; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 22; AVX-NEXT: vpsraw $5, %xmm0, %xmm0 23; AVX-NEXT: retq 24entry: 25 %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32> 26 ret <8 x i16> %0 27} 28 29define <8 x i16> @sdiv_vec8x16_minsize(<8 x i16> %var) minsize { 30; SSE-LABEL: sdiv_vec8x16_minsize: 31; SSE: # %bb.0: # %entry 32; SSE-NEXT: movdqa %xmm0, %xmm1 33; SSE-NEXT: psraw $15, %xmm1 34; SSE-NEXT: psrlw $11, %xmm1 35; SSE-NEXT: paddw %xmm0, %xmm1 36; SSE-NEXT: psraw $5, %xmm1 37; SSE-NEXT: movdqa %xmm1, %xmm0 38; SSE-NEXT: retq 39; 40; AVX-LABEL: sdiv_vec8x16_minsize: 41; AVX: # %bb.0: # %entry 42; AVX-NEXT: vpsraw $15, %xmm0, %xmm1 43; AVX-NEXT: vpsrlw $11, %xmm1, %xmm1 44; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 45; AVX-NEXT: vpsraw $5, %xmm0, %xmm0 46; AVX-NEXT: retq 47entry: 48 %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32> 49 ret <8 x i16> %0 50} 51 52define <4 x i32> @sdiv_vec4x32(<4 x i32> %var) { 53; SSE-LABEL: sdiv_vec4x32: 54; SSE: # %bb.0: # %entry 55; SSE-NEXT: movdqa %xmm0, %xmm1 56; SSE-NEXT: psrad $31, %xmm1 57; SSE-NEXT: psrld $28, %xmm1 58; SSE-NEXT: paddd %xmm0, %xmm1 59; SSE-NEXT: psrad $4, %xmm1 60; SSE-NEXT: movdqa %xmm1, %xmm0 61; SSE-NEXT: retq 62; 63; AVX-LABEL: sdiv_vec4x32: 64; AVX: # %bb.0: # %entry 65; AVX-NEXT: vpsrad $31, %xmm0, %xmm1 66; AVX-NEXT: vpsrld $28, %xmm1, %xmm1 67; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 68; AVX-NEXT: vpsrad $4, %xmm0, %xmm0 69; AVX-NEXT: retq 70entry: 71%0 = sdiv <4 x i32> %var, <i32 16, i32 16, i32 16, i32 16> 72ret <4 x i32> %0 73} 74 75define <4 x i32> @sdiv_negative(<4 x i32> %var) { 76; SSE-LABEL: sdiv_negative: 77; SSE: # %bb.0: # %entry 78; SSE-NEXT: movdqa %xmm0, %xmm1 79; SSE-NEXT: psrad $31, %xmm1 80; SSE-NEXT: psrld $28, %xmm1 81; SSE-NEXT: paddd %xmm0, %xmm1 82; SSE-NEXT: psrad $4, %xmm1 83; SSE-NEXT: pxor %xmm0, %xmm0 84; SSE-NEXT: psubd %xmm1, %xmm0 85; SSE-NEXT: retq 86; 87; AVX-LABEL: sdiv_negative: 88; AVX: # %bb.0: # %entry 89; AVX-NEXT: vpsrad $31, %xmm0, %xmm1 90; AVX-NEXT: vpsrld $28, %xmm1, %xmm1 91; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 92; AVX-NEXT: vpsrad $4, %xmm0, %xmm0 93; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 94; AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0 95; AVX-NEXT: retq 96entry: 97%0 = sdiv <4 x i32> %var, <i32 -16, i32 -16, i32 -16, i32 -16> 98ret <4 x i32> %0 99} 100 101define <8 x i32> @sdiv8x32(<8 x i32> %var) { 102; SSE-LABEL: sdiv8x32: 103; SSE: # %bb.0: # %entry 104; SSE-NEXT: movdqa %xmm0, %xmm2 105; SSE-NEXT: psrad $31, %xmm2 106; SSE-NEXT: psrld $26, %xmm2 107; SSE-NEXT: paddd %xmm0, %xmm2 108; SSE-NEXT: psrad $6, %xmm2 109; SSE-NEXT: movdqa %xmm1, %xmm3 110; SSE-NEXT: psrad $31, %xmm3 111; SSE-NEXT: psrld $26, %xmm3 112; SSE-NEXT: paddd %xmm1, %xmm3 113; SSE-NEXT: psrad $6, %xmm3 114; SSE-NEXT: movdqa %xmm2, %xmm0 115; SSE-NEXT: movdqa %xmm3, %xmm1 116; SSE-NEXT: retq 117; 118; AVX1-LABEL: sdiv8x32: 119; AVX1: # %bb.0: # %entry 120; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1 121; AVX1-NEXT: vpsrld $26, %xmm1, %xmm1 122; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1 123; AVX1-NEXT: vpsrad $6, %xmm1, %xmm1 124; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 125; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 126; AVX1-NEXT: vpsrld $26, %xmm2, %xmm2 127; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0 128; AVX1-NEXT: vpsrad $6, %xmm0, %xmm0 129; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 130; AVX1-NEXT: retq 131; 132; AVX2-LABEL: sdiv8x32: 133; AVX2: # %bb.0: # %entry 134; AVX2-NEXT: vpsrad $31, %ymm0, %ymm1 135; AVX2-NEXT: vpsrld $26, %ymm1, %ymm1 136; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 137; AVX2-NEXT: vpsrad $6, %ymm0, %ymm0 138; AVX2-NEXT: retq 139entry: 140%0 = sdiv <8 x i32> %var, <i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64> 141ret <8 x i32> %0 142} 143 144define <16 x i16> @sdiv16x16(<16 x i16> %var) { 145; SSE-LABEL: sdiv16x16: 146; SSE: # %bb.0: # %entry 147; SSE-NEXT: movdqa %xmm0, %xmm2 148; SSE-NEXT: psraw $15, %xmm2 149; SSE-NEXT: psrlw $14, %xmm2 150; SSE-NEXT: paddw %xmm0, %xmm2 151; SSE-NEXT: psraw $2, %xmm2 152; SSE-NEXT: movdqa %xmm1, %xmm3 153; SSE-NEXT: psraw $15, %xmm3 154; SSE-NEXT: psrlw $14, %xmm3 155; SSE-NEXT: paddw %xmm1, %xmm3 156; SSE-NEXT: psraw $2, %xmm3 157; SSE-NEXT: movdqa %xmm2, %xmm0 158; SSE-NEXT: movdqa %xmm3, %xmm1 159; SSE-NEXT: retq 160; 161; AVX1-LABEL: sdiv16x16: 162; AVX1: # %bb.0: # %entry 163; AVX1-NEXT: vpsraw $15, %xmm0, %xmm1 164; AVX1-NEXT: vpsrlw $14, %xmm1, %xmm1 165; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm1 166; AVX1-NEXT: vpsraw $2, %xmm1, %xmm1 167; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 168; AVX1-NEXT: vpsraw $15, %xmm0, %xmm2 169; AVX1-NEXT: vpsrlw $14, %xmm2, %xmm2 170; AVX1-NEXT: vpaddw %xmm2, %xmm0, %xmm0 171; AVX1-NEXT: vpsraw $2, %xmm0, %xmm0 172; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 173; AVX1-NEXT: retq 174; 175; AVX2-LABEL: sdiv16x16: 176; AVX2: # %bb.0: # %entry 177; AVX2-NEXT: vpsraw $15, %ymm0, %ymm1 178; AVX2-NEXT: vpsrlw $14, %ymm1, %ymm1 179; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0 180; AVX2-NEXT: vpsraw $2, %ymm0, %ymm0 181; AVX2-NEXT: retq 182entry: 183 %a0 = sdiv <16 x i16> %var, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4> 184 ret <16 x i16> %a0 185} 186 187; Div-by-0 in any lane is UB. 188 189define <4 x i32> @sdiv_non_splat(<4 x i32> %x) { 190; SSE-LABEL: sdiv_non_splat: 191; SSE: # %bb.0: 192; SSE-NEXT: retq 193; 194; AVX-LABEL: sdiv_non_splat: 195; AVX: # %bb.0: 196; AVX-NEXT: retq 197 %y = sdiv <4 x i32> %x, <i32 2, i32 0, i32 0, i32 0> 198 ret <4 x i32> %y 199} 200