1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 5 6define <8 x i16> @sdiv_vec8x16(<8 x i16> %var) { 7; SSE-LABEL: sdiv_vec8x16: 8; SSE: # BB#0: # %entry 9; SSE-NEXT: movdqa %xmm0, %xmm1 10; SSE-NEXT: psraw $15, %xmm1 11; SSE-NEXT: psrlw $11, %xmm1 12; SSE-NEXT: paddw %xmm0, %xmm1 13; SSE-NEXT: psraw $5, %xmm1 14; SSE-NEXT: movdqa %xmm1, %xmm0 15; SSE-NEXT: retq 16; 17; AVX-LABEL: sdiv_vec8x16: 18; AVX: # BB#0: # %entry 19; AVX-NEXT: vpsraw $15, %xmm0, %xmm1 20; AVX-NEXT: vpsrlw $11, %xmm1, %xmm1 21; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 22; AVX-NEXT: vpsraw $5, %xmm0, %xmm0 23; AVX-NEXT: retq 24entry: 25 %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32> 26 ret <8 x i16> %0 27} 28 29define <8 x i16> @sdiv_vec8x16_minsize(<8 x i16> %var) minsize { 30; SSE-LABEL: sdiv_vec8x16_minsize: 31; SSE: # BB#0: # %entry 32; SSE-NEXT: movdqa %xmm0, %xmm1 33; SSE-NEXT: psraw $15, %xmm1 34; SSE-NEXT: psrlw $11, %xmm1 35; SSE-NEXT: paddw %xmm0, %xmm1 36; SSE-NEXT: psraw $5, %xmm1 37; SSE-NEXT: movdqa %xmm1, %xmm0 38; SSE-NEXT: retq 39; 40; AVX-LABEL: sdiv_vec8x16_minsize: 41; AVX: # BB#0: # %entry 42; AVX-NEXT: vpsraw $15, %xmm0, %xmm1 43; AVX-NEXT: vpsrlw $11, %xmm1, %xmm1 44; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 45; AVX-NEXT: vpsraw $5, %xmm0, %xmm0 46; AVX-NEXT: retq 47entry: 48 %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32> 49 ret <8 x i16> %0 50} 51 52define <4 x i32> @sdiv_zero(<4 x i32> %var) { 53; SSE-LABEL: sdiv_zero: 54; SSE: # BB#0: # %entry 55; SSE-NEXT: pextrd $1, %xmm0, %eax 56; SSE-NEXT: xorl %esi, %esi 57; SSE-NEXT: cltd 58; SSE-NEXT: idivl %esi 59; SSE-NEXT: movl %eax, %ecx 60; SSE-NEXT: movd %xmm0, %eax 61; SSE-NEXT: cltd 62; SSE-NEXT: idivl %esi 63; SSE-NEXT: movd %eax, %xmm1 64; SSE-NEXT: pinsrd $1, %ecx, %xmm1 65; SSE-NEXT: pextrd $2, %xmm0, %eax 66; SSE-NEXT: cltd 67; SSE-NEXT: idivl %esi 68; SSE-NEXT: pinsrd $2, %eax, %xmm1 69; SSE-NEXT: pextrd $3, %xmm0, %eax 70; SSE-NEXT: cltd 71; SSE-NEXT: idivl %esi 72; SSE-NEXT: pinsrd $3, %eax, %xmm1 73; SSE-NEXT: movdqa %xmm1, %xmm0 74; SSE-NEXT: retq 75; 76; AVX-LABEL: sdiv_zero: 77; AVX: # BB#0: # %entry 78; AVX-NEXT: vpextrd $1, %xmm0, %eax 79; AVX-NEXT: xorl %esi, %esi 80; AVX-NEXT: cltd 81; AVX-NEXT: idivl %esi 82; AVX-NEXT: movl %eax, %ecx 83; AVX-NEXT: vmovd %xmm0, %eax 84; AVX-NEXT: cltd 85; AVX-NEXT: idivl %esi 86; AVX-NEXT: vmovd %eax, %xmm1 87; AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 88; AVX-NEXT: vpextrd $2, %xmm0, %eax 89; AVX-NEXT: cltd 90; AVX-NEXT: idivl %esi 91; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 92; AVX-NEXT: vpextrd $3, %xmm0, %eax 93; AVX-NEXT: cltd 94; AVX-NEXT: idivl %esi 95; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 96; AVX-NEXT: retq 97entry: 98 %0 = sdiv <4 x i32> %var, <i32 0, i32 0, i32 0, i32 0> 99 ret <4 x i32> %0 100} 101 102define <4 x i32> @sdiv_vec4x32(<4 x i32> %var) { 103; SSE-LABEL: sdiv_vec4x32: 104; SSE: # BB#0: # %entry 105; SSE-NEXT: movdqa %xmm0, %xmm1 106; SSE-NEXT: psrad $31, %xmm1 107; SSE-NEXT: psrld $28, %xmm1 108; SSE-NEXT: paddd %xmm0, %xmm1 109; SSE-NEXT: psrad $4, %xmm1 110; SSE-NEXT: movdqa %xmm1, %xmm0 111; SSE-NEXT: retq 112; 113; AVX-LABEL: sdiv_vec4x32: 114; AVX: # BB#0: # %entry 115; AVX-NEXT: vpsrad $31, %xmm0, %xmm1 116; AVX-NEXT: vpsrld $28, %xmm1, %xmm1 117; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 118; AVX-NEXT: vpsrad $4, %xmm0, %xmm0 119; AVX-NEXT: retq 120entry: 121%0 = sdiv <4 x i32> %var, <i32 16, i32 16, i32 16, i32 16> 122ret <4 x i32> %0 123} 124 125define <4 x i32> @sdiv_negative(<4 x i32> %var) { 126; SSE-LABEL: sdiv_negative: 127; SSE: # BB#0: # %entry 128; SSE-NEXT: movdqa %xmm0, %xmm1 129; SSE-NEXT: psrad $31, %xmm1 130; SSE-NEXT: psrld $28, %xmm1 131; SSE-NEXT: paddd %xmm0, %xmm1 132; SSE-NEXT: psrad $4, %xmm1 133; SSE-NEXT: pxor %xmm0, %xmm0 134; SSE-NEXT: psubd %xmm1, %xmm0 135; SSE-NEXT: retq 136; 137; AVX-LABEL: sdiv_negative: 138; AVX: # BB#0: # %entry 139; AVX-NEXT: vpsrad $31, %xmm0, %xmm1 140; AVX-NEXT: vpsrld $28, %xmm1, %xmm1 141; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 142; AVX-NEXT: vpsrad $4, %xmm0, %xmm0 143; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 144; AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0 145; AVX-NEXT: retq 146entry: 147%0 = sdiv <4 x i32> %var, <i32 -16, i32 -16, i32 -16, i32 -16> 148ret <4 x i32> %0 149} 150 151define <8 x i32> @sdiv8x32(<8 x i32> %var) { 152; SSE-LABEL: sdiv8x32: 153; SSE: # BB#0: # %entry 154; SSE-NEXT: movdqa %xmm0, %xmm2 155; SSE-NEXT: psrad $31, %xmm2 156; SSE-NEXT: psrld $26, %xmm2 157; SSE-NEXT: paddd %xmm0, %xmm2 158; SSE-NEXT: psrad $6, %xmm2 159; SSE-NEXT: movdqa %xmm1, %xmm3 160; SSE-NEXT: psrad $31, %xmm3 161; SSE-NEXT: psrld $26, %xmm3 162; SSE-NEXT: paddd %xmm1, %xmm3 163; SSE-NEXT: psrad $6, %xmm3 164; SSE-NEXT: movdqa %xmm2, %xmm0 165; SSE-NEXT: movdqa %xmm3, %xmm1 166; SSE-NEXT: retq 167; 168; AVX1-LABEL: sdiv8x32: 169; AVX1: # BB#0: # %entry 170; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1 171; AVX1-NEXT: vpsrld $26, %xmm1, %xmm1 172; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1 173; AVX1-NEXT: vpsrad $6, %xmm1, %xmm1 174; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 175; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 176; AVX1-NEXT: vpsrld $26, %xmm2, %xmm2 177; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0 178; AVX1-NEXT: vpsrad $6, %xmm0, %xmm0 179; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 180; AVX1-NEXT: retq 181; 182; AVX2-LABEL: sdiv8x32: 183; AVX2: # BB#0: # %entry 184; AVX2-NEXT: vpsrad $31, %ymm0, %ymm1 185; AVX2-NEXT: vpsrld $26, %ymm1, %ymm1 186; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 187; AVX2-NEXT: vpsrad $6, %ymm0, %ymm0 188; AVX2-NEXT: retq 189entry: 190%0 = sdiv <8 x i32> %var, <i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64> 191ret <8 x i32> %0 192} 193 194define <16 x i16> @sdiv16x16(<16 x i16> %var) { 195; SSE-LABEL: sdiv16x16: 196; SSE: # BB#0: # %entry 197; SSE-NEXT: movdqa %xmm0, %xmm2 198; SSE-NEXT: psraw $15, %xmm2 199; SSE-NEXT: psrlw $14, %xmm2 200; SSE-NEXT: paddw %xmm0, %xmm2 201; SSE-NEXT: psraw $2, %xmm2 202; SSE-NEXT: movdqa %xmm1, %xmm3 203; SSE-NEXT: psraw $15, %xmm3 204; SSE-NEXT: psrlw $14, %xmm3 205; SSE-NEXT: paddw %xmm1, %xmm3 206; SSE-NEXT: psraw $2, %xmm3 207; SSE-NEXT: movdqa %xmm2, %xmm0 208; SSE-NEXT: movdqa %xmm3, %xmm1 209; SSE-NEXT: retq 210; 211; AVX1-LABEL: sdiv16x16: 212; AVX1: # BB#0: # %entry 213; AVX1-NEXT: vpsraw $15, %xmm0, %xmm1 214; AVX1-NEXT: vpsrlw $14, %xmm1, %xmm1 215; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm1 216; AVX1-NEXT: vpsraw $2, %xmm1, %xmm1 217; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 218; AVX1-NEXT: vpsraw $15, %xmm0, %xmm2 219; AVX1-NEXT: vpsrlw $14, %xmm2, %xmm2 220; AVX1-NEXT: vpaddw %xmm2, %xmm0, %xmm0 221; AVX1-NEXT: vpsraw $2, %xmm0, %xmm0 222; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 223; AVX1-NEXT: retq 224; 225; AVX2-LABEL: sdiv16x16: 226; AVX2: # BB#0: # %entry 227; AVX2-NEXT: vpsraw $15, %ymm0, %ymm1 228; AVX2-NEXT: vpsrlw $14, %ymm1, %ymm1 229; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0 230; AVX2-NEXT: vpsraw $2, %ymm0, %ymm0 231; AVX2-NEXT: retq 232entry: 233 %a0 = sdiv <16 x i16> %var, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4> 234 ret <16 x i16> %a0 235} 236 237define <4 x i32> @sdiv_non_splat(<4 x i32> %x) { 238; SSE-LABEL: sdiv_non_splat: 239; SSE: # BB#0: 240; SSE-NEXT: pextrd $1, %xmm0, %eax 241; SSE-NEXT: xorl %ecx, %ecx 242; SSE-NEXT: cltd 243; SSE-NEXT: idivl %ecx 244; SSE-NEXT: movd %xmm0, %edx 245; SSE-NEXT: movl %edx, %esi 246; SSE-NEXT: shrl $31, %esi 247; SSE-NEXT: addl %edx, %esi 248; SSE-NEXT: sarl %esi 249; SSE-NEXT: movd %esi, %xmm1 250; SSE-NEXT: pinsrd $1, %eax, %xmm1 251; SSE-NEXT: pextrd $2, %xmm0, %eax 252; SSE-NEXT: cltd 253; SSE-NEXT: idivl %ecx 254; SSE-NEXT: pinsrd $2, %eax, %xmm1 255; SSE-NEXT: pextrd $3, %xmm0, %eax 256; SSE-NEXT: cltd 257; SSE-NEXT: idivl %ecx 258; SSE-NEXT: pinsrd $3, %eax, %xmm1 259; SSE-NEXT: movdqa %xmm1, %xmm0 260; SSE-NEXT: retq 261; 262; AVX-LABEL: sdiv_non_splat: 263; AVX: # BB#0: 264; AVX-NEXT: vpextrd $1, %xmm0, %eax 265; AVX-NEXT: xorl %ecx, %ecx 266; AVX-NEXT: cltd 267; AVX-NEXT: idivl %ecx 268; AVX-NEXT: vmovd %xmm0, %edx 269; AVX-NEXT: movl %edx, %esi 270; AVX-NEXT: shrl $31, %esi 271; AVX-NEXT: addl %edx, %esi 272; AVX-NEXT: sarl %esi 273; AVX-NEXT: vmovd %esi, %xmm1 274; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 275; AVX-NEXT: vpextrd $2, %xmm0, %eax 276; AVX-NEXT: cltd 277; AVX-NEXT: idivl %ecx 278; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 279; AVX-NEXT: vpextrd $3, %xmm0, %eax 280; AVX-NEXT: cltd 281; AVX-NEXT: idivl %ecx 282; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 283; AVX-NEXT: retq 284 %y = sdiv <4 x i32> %x, <i32 2, i32 0, i32 0, i32 0> 285 ret <4 x i32> %y 286} 287