1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F --check-prefix=AVX512VL 7; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl,+avx512bw --show-mc-encoding | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW 8 9define <4 x i32> @test_abs_gt_v4i32(<4 x i32> %a) nounwind { 10; SSE2-LABEL: test_abs_gt_v4i32: 11; SSE2: # %bb.0: 12; SSE2-NEXT: movdqa %xmm0, %xmm1 13; SSE2-NEXT: psrad $31, %xmm1 14; SSE2-NEXT: paddd %xmm1, %xmm0 15; SSE2-NEXT: pxor %xmm1, %xmm0 16; SSE2-NEXT: retq 17; 18; SSSE3-LABEL: test_abs_gt_v4i32: 19; SSSE3: # %bb.0: 20; SSSE3-NEXT: pabsd %xmm0, %xmm0 21; SSSE3-NEXT: retq 22; 23; AVX1-LABEL: test_abs_gt_v4i32: 24; AVX1: # %bb.0: 25; AVX1-NEXT: vpabsd %xmm0, %xmm0 26; AVX1-NEXT: retq 27; 28; AVX2-LABEL: test_abs_gt_v4i32: 29; AVX2: # %bb.0: 30; AVX2-NEXT: vpabsd %xmm0, %xmm0 31; AVX2-NEXT: retq 32; 33; AVX512-LABEL: test_abs_gt_v4i32: 34; AVX512: # %bb.0: 35; AVX512-NEXT: vpabsd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xc0] 36; AVX512-NEXT: retq # encoding: [0xc3] 37 %tmp1neg = sub <4 x i32> zeroinitializer, %a 38 %b = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1> 39 %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg 40 ret <4 x i32> %abs 41} 42 43define <4 x i32> @test_abs_ge_v4i32(<4 x i32> %a) nounwind { 44; SSE2-LABEL: test_abs_ge_v4i32: 45; SSE2: # %bb.0: 46; SSE2-NEXT: movdqa %xmm0, %xmm1 47; SSE2-NEXT: psrad $31, %xmm1 48; SSE2-NEXT: paddd %xmm1, %xmm0 49; SSE2-NEXT: pxor %xmm1, %xmm0 50; SSE2-NEXT: retq 51; 52; SSSE3-LABEL: test_abs_ge_v4i32: 53; SSSE3: # %bb.0: 54; SSSE3-NEXT: pabsd %xmm0, %xmm0 55; SSSE3-NEXT: retq 56; 57; AVX1-LABEL: test_abs_ge_v4i32: 58; AVX1: # %bb.0: 59; AVX1-NEXT: vpabsd %xmm0, %xmm0 60; AVX1-NEXT: retq 61; 62; AVX2-LABEL: test_abs_ge_v4i32: 63; AVX2: # %bb.0: 64; AVX2-NEXT: vpabsd %xmm0, %xmm0 65; AVX2-NEXT: retq 66; 67; AVX512-LABEL: test_abs_ge_v4i32: 68; AVX512: # %bb.0: 69; AVX512-NEXT: vpabsd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xc0] 70; AVX512-NEXT: retq # encoding: [0xc3] 71 %tmp1neg = sub <4 x i32> zeroinitializer, %a 72 %b = icmp sge <4 x i32> %a, zeroinitializer 73 %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg 74 ret <4 x i32> %abs 75} 76 77define <8 x i16> @test_abs_gt_v8i16(<8 x i16> %a) nounwind { 78; SSE2-LABEL: test_abs_gt_v8i16: 79; SSE2: # %bb.0: 80; SSE2-NEXT: movdqa %xmm0, %xmm1 81; SSE2-NEXT: psraw $15, %xmm1 82; SSE2-NEXT: paddw %xmm1, %xmm0 83; SSE2-NEXT: pxor %xmm1, %xmm0 84; SSE2-NEXT: retq 85; 86; SSSE3-LABEL: test_abs_gt_v8i16: 87; SSSE3: # %bb.0: 88; SSSE3-NEXT: pabsw %xmm0, %xmm0 89; SSSE3-NEXT: retq 90; 91; AVX1-LABEL: test_abs_gt_v8i16: 92; AVX1: # %bb.0: 93; AVX1-NEXT: vpabsw %xmm0, %xmm0 94; AVX1-NEXT: retq 95; 96; AVX2-LABEL: test_abs_gt_v8i16: 97; AVX2: # %bb.0: 98; AVX2-NEXT: vpabsw %xmm0, %xmm0 99; AVX2-NEXT: retq 100; 101; AVX512F-LABEL: test_abs_gt_v8i16: 102; AVX512F: # %bb.0: 103; AVX512F-NEXT: vpabsw %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x1d,0xc0] 104; AVX512F-NEXT: retq # encoding: [0xc3] 105; 106; AVX512BW-LABEL: test_abs_gt_v8i16: 107; AVX512BW: # %bb.0: 108; AVX512BW-NEXT: vpabsw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1d,0xc0] 109; AVX512BW-NEXT: retq # encoding: [0xc3] 110 %tmp1neg = sub <8 x i16> zeroinitializer, %a 111 %b = icmp sgt <8 x i16> %a, zeroinitializer 112 %abs = select <8 x i1> %b, <8 x i16> %a, <8 x i16> %tmp1neg 113 ret <8 x i16> %abs 114} 115 116define <16 x i8> @test_abs_lt_v16i8(<16 x i8> %a) nounwind { 117; SSE2-LABEL: test_abs_lt_v16i8: 118; SSE2: # %bb.0: 119; SSE2-NEXT: pxor %xmm1, %xmm1 120; SSE2-NEXT: pcmpgtb %xmm0, %xmm1 121; SSE2-NEXT: paddb %xmm1, %xmm0 122; SSE2-NEXT: pxor %xmm1, %xmm0 123; SSE2-NEXT: retq 124; 125; SSSE3-LABEL: test_abs_lt_v16i8: 126; SSSE3: # %bb.0: 127; SSSE3-NEXT: pabsb %xmm0, %xmm0 128; SSSE3-NEXT: retq 129; 130; AVX1-LABEL: test_abs_lt_v16i8: 131; AVX1: # %bb.0: 132; AVX1-NEXT: vpabsb %xmm0, %xmm0 133; AVX1-NEXT: retq 134; 135; AVX2-LABEL: test_abs_lt_v16i8: 136; AVX2: # %bb.0: 137; AVX2-NEXT: vpabsb %xmm0, %xmm0 138; AVX2-NEXT: retq 139; 140; AVX512F-LABEL: test_abs_lt_v16i8: 141; AVX512F: # %bb.0: 142; AVX512F-NEXT: vpabsb %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x1c,0xc0] 143; AVX512F-NEXT: retq # encoding: [0xc3] 144; 145; AVX512BW-LABEL: test_abs_lt_v16i8: 146; AVX512BW: # %bb.0: 147; AVX512BW-NEXT: vpabsb %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1c,0xc0] 148; AVX512BW-NEXT: retq # encoding: [0xc3] 149 %tmp1neg = sub <16 x i8> zeroinitializer, %a 150 %b = icmp slt <16 x i8> %a, zeroinitializer 151 %abs = select <16 x i1> %b, <16 x i8> %tmp1neg, <16 x i8> %a 152 ret <16 x i8> %abs 153} 154 155define <4 x i32> @test_abs_le_v4i32(<4 x i32> %a) nounwind { 156; SSE2-LABEL: test_abs_le_v4i32: 157; SSE2: # %bb.0: 158; SSE2-NEXT: movdqa %xmm0, %xmm1 159; SSE2-NEXT: psrad $31, %xmm1 160; SSE2-NEXT: paddd %xmm1, %xmm0 161; SSE2-NEXT: pxor %xmm1, %xmm0 162; SSE2-NEXT: retq 163; 164; SSSE3-LABEL: test_abs_le_v4i32: 165; SSSE3: # %bb.0: 166; SSSE3-NEXT: pabsd %xmm0, %xmm0 167; SSSE3-NEXT: retq 168; 169; AVX1-LABEL: test_abs_le_v4i32: 170; AVX1: # %bb.0: 171; AVX1-NEXT: vpabsd %xmm0, %xmm0 172; AVX1-NEXT: retq 173; 174; AVX2-LABEL: test_abs_le_v4i32: 175; AVX2: # %bb.0: 176; AVX2-NEXT: vpabsd %xmm0, %xmm0 177; AVX2-NEXT: retq 178; 179; AVX512-LABEL: test_abs_le_v4i32: 180; AVX512: # %bb.0: 181; AVX512-NEXT: vpabsd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xc0] 182; AVX512-NEXT: retq # encoding: [0xc3] 183 %tmp1neg = sub <4 x i32> zeroinitializer, %a 184 %b = icmp sle <4 x i32> %a, zeroinitializer 185 %abs = select <4 x i1> %b, <4 x i32> %tmp1neg, <4 x i32> %a 186 ret <4 x i32> %abs 187} 188 189define <8 x i32> @test_abs_gt_v8i32(<8 x i32> %a) nounwind { 190; SSE2-LABEL: test_abs_gt_v8i32: 191; SSE2: # %bb.0: 192; SSE2-NEXT: movdqa %xmm0, %xmm2 193; SSE2-NEXT: psrad $31, %xmm2 194; SSE2-NEXT: paddd %xmm2, %xmm0 195; SSE2-NEXT: pxor %xmm2, %xmm0 196; SSE2-NEXT: movdqa %xmm1, %xmm2 197; SSE2-NEXT: psrad $31, %xmm2 198; SSE2-NEXT: paddd %xmm2, %xmm1 199; SSE2-NEXT: pxor %xmm2, %xmm1 200; SSE2-NEXT: retq 201; 202; SSSE3-LABEL: test_abs_gt_v8i32: 203; SSSE3: # %bb.0: 204; SSSE3-NEXT: pabsd %xmm0, %xmm0 205; SSSE3-NEXT: pabsd %xmm1, %xmm1 206; SSSE3-NEXT: retq 207; 208; AVX1-LABEL: test_abs_gt_v8i32: 209; AVX1: # %bb.0: 210; AVX1-NEXT: vpabsd %xmm0, %xmm1 211; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 212; AVX1-NEXT: vpabsd %xmm0, %xmm0 213; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 214; AVX1-NEXT: retq 215; 216; AVX2-LABEL: test_abs_gt_v8i32: 217; AVX2: # %bb.0: 218; AVX2-NEXT: vpabsd %ymm0, %ymm0 219; AVX2-NEXT: retq 220; 221; AVX512-LABEL: test_abs_gt_v8i32: 222; AVX512: # %bb.0: 223; AVX512-NEXT: vpabsd %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1e,0xc0] 224; AVX512-NEXT: retq # encoding: [0xc3] 225 %tmp1neg = sub <8 x i32> zeroinitializer, %a 226 %b = icmp sgt <8 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 227 %abs = select <8 x i1> %b, <8 x i32> %a, <8 x i32> %tmp1neg 228 ret <8 x i32> %abs 229} 230 231define <8 x i32> @test_abs_ge_v8i32(<8 x i32> %a) nounwind { 232; SSE2-LABEL: test_abs_ge_v8i32: 233; SSE2: # %bb.0: 234; SSE2-NEXT: movdqa %xmm0, %xmm2 235; SSE2-NEXT: psrad $31, %xmm2 236; SSE2-NEXT: paddd %xmm2, %xmm0 237; SSE2-NEXT: pxor %xmm2, %xmm0 238; SSE2-NEXT: movdqa %xmm1, %xmm2 239; SSE2-NEXT: psrad $31, %xmm2 240; SSE2-NEXT: paddd %xmm2, %xmm1 241; SSE2-NEXT: pxor %xmm2, %xmm1 242; SSE2-NEXT: retq 243; 244; SSSE3-LABEL: test_abs_ge_v8i32: 245; SSSE3: # %bb.0: 246; SSSE3-NEXT: pabsd %xmm0, %xmm0 247; SSSE3-NEXT: pabsd %xmm1, %xmm1 248; SSSE3-NEXT: retq 249; 250; AVX1-LABEL: test_abs_ge_v8i32: 251; AVX1: # %bb.0: 252; AVX1-NEXT: vpabsd %xmm0, %xmm1 253; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 254; AVX1-NEXT: vpabsd %xmm0, %xmm0 255; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 256; AVX1-NEXT: retq 257; 258; AVX2-LABEL: test_abs_ge_v8i32: 259; AVX2: # %bb.0: 260; AVX2-NEXT: vpabsd %ymm0, %ymm0 261; AVX2-NEXT: retq 262; 263; AVX512-LABEL: test_abs_ge_v8i32: 264; AVX512: # %bb.0: 265; AVX512-NEXT: vpabsd %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1e,0xc0] 266; AVX512-NEXT: retq # encoding: [0xc3] 267 %tmp1neg = sub <8 x i32> zeroinitializer, %a 268 %b = icmp sge <8 x i32> %a, zeroinitializer 269 %abs = select <8 x i1> %b, <8 x i32> %a, <8 x i32> %tmp1neg 270 ret <8 x i32> %abs 271} 272 273define <16 x i16> @test_abs_gt_v16i16(<16 x i16> %a) nounwind { 274; SSE2-LABEL: test_abs_gt_v16i16: 275; SSE2: # %bb.0: 276; SSE2-NEXT: movdqa %xmm0, %xmm2 277; SSE2-NEXT: psraw $15, %xmm2 278; SSE2-NEXT: paddw %xmm2, %xmm0 279; SSE2-NEXT: pxor %xmm2, %xmm0 280; SSE2-NEXT: movdqa %xmm1, %xmm2 281; SSE2-NEXT: psraw $15, %xmm2 282; SSE2-NEXT: paddw %xmm2, %xmm1 283; SSE2-NEXT: pxor %xmm2, %xmm1 284; SSE2-NEXT: retq 285; 286; SSSE3-LABEL: test_abs_gt_v16i16: 287; SSSE3: # %bb.0: 288; SSSE3-NEXT: pabsw %xmm0, %xmm0 289; SSSE3-NEXT: pabsw %xmm1, %xmm1 290; SSSE3-NEXT: retq 291; 292; AVX1-LABEL: test_abs_gt_v16i16: 293; AVX1: # %bb.0: 294; AVX1-NEXT: vpabsw %xmm0, %xmm1 295; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 296; AVX1-NEXT: vpabsw %xmm0, %xmm0 297; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 298; AVX1-NEXT: retq 299; 300; AVX2-LABEL: test_abs_gt_v16i16: 301; AVX2: # %bb.0: 302; AVX2-NEXT: vpabsw %ymm0, %ymm0 303; AVX2-NEXT: retq 304; 305; AVX512F-LABEL: test_abs_gt_v16i16: 306; AVX512F: # %bb.0: 307; AVX512F-NEXT: vpabsw %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1d,0xc0] 308; AVX512F-NEXT: retq # encoding: [0xc3] 309; 310; AVX512BW-LABEL: test_abs_gt_v16i16: 311; AVX512BW: # %bb.0: 312; AVX512BW-NEXT: vpabsw %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1d,0xc0] 313; AVX512BW-NEXT: retq # encoding: [0xc3] 314 %tmp1neg = sub <16 x i16> zeroinitializer, %a 315 %b = icmp sgt <16 x i16> %a, zeroinitializer 316 %abs = select <16 x i1> %b, <16 x i16> %a, <16 x i16> %tmp1neg 317 ret <16 x i16> %abs 318} 319 320define <32 x i8> @test_abs_lt_v32i8(<32 x i8> %a) nounwind { 321; SSE2-LABEL: test_abs_lt_v32i8: 322; SSE2: # %bb.0: 323; SSE2-NEXT: pxor %xmm2, %xmm2 324; SSE2-NEXT: pxor %xmm3, %xmm3 325; SSE2-NEXT: pcmpgtb %xmm0, %xmm3 326; SSE2-NEXT: paddb %xmm3, %xmm0 327; SSE2-NEXT: pxor %xmm3, %xmm0 328; SSE2-NEXT: pcmpgtb %xmm1, %xmm2 329; SSE2-NEXT: paddb %xmm2, %xmm1 330; SSE2-NEXT: pxor %xmm2, %xmm1 331; SSE2-NEXT: retq 332; 333; SSSE3-LABEL: test_abs_lt_v32i8: 334; SSSE3: # %bb.0: 335; SSSE3-NEXT: pabsb %xmm0, %xmm0 336; SSSE3-NEXT: pabsb %xmm1, %xmm1 337; SSSE3-NEXT: retq 338; 339; AVX1-LABEL: test_abs_lt_v32i8: 340; AVX1: # %bb.0: 341; AVX1-NEXT: vpabsb %xmm0, %xmm1 342; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 343; AVX1-NEXT: vpabsb %xmm0, %xmm0 344; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 345; AVX1-NEXT: retq 346; 347; AVX2-LABEL: test_abs_lt_v32i8: 348; AVX2: # %bb.0: 349; AVX2-NEXT: vpabsb %ymm0, %ymm0 350; AVX2-NEXT: retq 351; 352; AVX512F-LABEL: test_abs_lt_v32i8: 353; AVX512F: # %bb.0: 354; AVX512F-NEXT: vpabsb %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1c,0xc0] 355; AVX512F-NEXT: retq # encoding: [0xc3] 356; 357; AVX512BW-LABEL: test_abs_lt_v32i8: 358; AVX512BW: # %bb.0: 359; AVX512BW-NEXT: vpabsb %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1c,0xc0] 360; AVX512BW-NEXT: retq # encoding: [0xc3] 361 %tmp1neg = sub <32 x i8> zeroinitializer, %a 362 %b = icmp slt <32 x i8> %a, zeroinitializer 363 %abs = select <32 x i1> %b, <32 x i8> %tmp1neg, <32 x i8> %a 364 ret <32 x i8> %abs 365} 366 367define <8 x i32> @test_abs_le_v8i32(<8 x i32> %a) nounwind { 368; SSE2-LABEL: test_abs_le_v8i32: 369; SSE2: # %bb.0: 370; SSE2-NEXT: movdqa %xmm0, %xmm2 371; SSE2-NEXT: psrad $31, %xmm2 372; SSE2-NEXT: paddd %xmm2, %xmm0 373; SSE2-NEXT: pxor %xmm2, %xmm0 374; SSE2-NEXT: movdqa %xmm1, %xmm2 375; SSE2-NEXT: psrad $31, %xmm2 376; SSE2-NEXT: paddd %xmm2, %xmm1 377; SSE2-NEXT: pxor %xmm2, %xmm1 378; SSE2-NEXT: retq 379; 380; SSSE3-LABEL: test_abs_le_v8i32: 381; SSSE3: # %bb.0: 382; SSSE3-NEXT: pabsd %xmm0, %xmm0 383; SSSE3-NEXT: pabsd %xmm1, %xmm1 384; SSSE3-NEXT: retq 385; 386; AVX1-LABEL: test_abs_le_v8i32: 387; AVX1: # %bb.0: 388; AVX1-NEXT: vpabsd %xmm0, %xmm1 389; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 390; AVX1-NEXT: vpabsd %xmm0, %xmm0 391; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 392; AVX1-NEXT: retq 393; 394; AVX2-LABEL: test_abs_le_v8i32: 395; AVX2: # %bb.0: 396; AVX2-NEXT: vpabsd %ymm0, %ymm0 397; AVX2-NEXT: retq 398; 399; AVX512-LABEL: test_abs_le_v8i32: 400; AVX512: # %bb.0: 401; AVX512-NEXT: vpabsd %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1e,0xc0] 402; AVX512-NEXT: retq # encoding: [0xc3] 403 %tmp1neg = sub <8 x i32> zeroinitializer, %a 404 %b = icmp sle <8 x i32> %a, zeroinitializer 405 %abs = select <8 x i1> %b, <8 x i32> %tmp1neg, <8 x i32> %a 406 ret <8 x i32> %abs 407} 408 409define <16 x i32> @test_abs_le_16i32(<16 x i32> %a) nounwind { 410; SSE2-LABEL: test_abs_le_16i32: 411; SSE2: # %bb.0: 412; SSE2-NEXT: movdqa %xmm0, %xmm4 413; SSE2-NEXT: psrad $31, %xmm4 414; SSE2-NEXT: paddd %xmm4, %xmm0 415; SSE2-NEXT: pxor %xmm4, %xmm0 416; SSE2-NEXT: movdqa %xmm1, %xmm4 417; SSE2-NEXT: psrad $31, %xmm4 418; SSE2-NEXT: paddd %xmm4, %xmm1 419; SSE2-NEXT: pxor %xmm4, %xmm1 420; SSE2-NEXT: movdqa %xmm2, %xmm4 421; SSE2-NEXT: psrad $31, %xmm4 422; SSE2-NEXT: paddd %xmm4, %xmm2 423; SSE2-NEXT: pxor %xmm4, %xmm2 424; SSE2-NEXT: movdqa %xmm3, %xmm4 425; SSE2-NEXT: psrad $31, %xmm4 426; SSE2-NEXT: paddd %xmm4, %xmm3 427; SSE2-NEXT: pxor %xmm4, %xmm3 428; SSE2-NEXT: retq 429; 430; SSSE3-LABEL: test_abs_le_16i32: 431; SSSE3: # %bb.0: 432; SSSE3-NEXT: pabsd %xmm0, %xmm0 433; SSSE3-NEXT: pabsd %xmm1, %xmm1 434; SSSE3-NEXT: pabsd %xmm2, %xmm2 435; SSSE3-NEXT: pabsd %xmm3, %xmm3 436; SSSE3-NEXT: retq 437; 438; AVX1-LABEL: test_abs_le_16i32: 439; AVX1: # %bb.0: 440; AVX1-NEXT: vpabsd %xmm0, %xmm2 441; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 442; AVX1-NEXT: vpabsd %xmm0, %xmm0 443; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 444; AVX1-NEXT: vpabsd %xmm1, %xmm2 445; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 446; AVX1-NEXT: vpabsd %xmm1, %xmm1 447; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 448; AVX1-NEXT: retq 449; 450; AVX2-LABEL: test_abs_le_16i32: 451; AVX2: # %bb.0: 452; AVX2-NEXT: vpabsd %ymm0, %ymm0 453; AVX2-NEXT: vpabsd %ymm1, %ymm1 454; AVX2-NEXT: retq 455; 456; AVX512-LABEL: test_abs_le_16i32: 457; AVX512: # %bb.0: 458; AVX512-NEXT: vpabsd %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x1e,0xc0] 459; AVX512-NEXT: retq # encoding: [0xc3] 460 %tmp1neg = sub <16 x i32> zeroinitializer, %a 461 %b = icmp sle <16 x i32> %a, zeroinitializer 462 %abs = select <16 x i1> %b, <16 x i32> %tmp1neg, <16 x i32> %a 463 ret <16 x i32> %abs 464} 465 466define <2 x i64> @test_abs_ge_v2i64(<2 x i64> %a) nounwind { 467; SSE-LABEL: test_abs_ge_v2i64: 468; SSE: # %bb.0: 469; SSE-NEXT: movdqa %xmm0, %xmm1 470; SSE-NEXT: psrad $31, %xmm1 471; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 472; SSE-NEXT: paddq %xmm1, %xmm0 473; SSE-NEXT: pxor %xmm1, %xmm0 474; SSE-NEXT: retq 475; 476; AVX1-LABEL: test_abs_ge_v2i64: 477; AVX1: # %bb.0: 478; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 479; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm1 480; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 481; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 482; AVX1-NEXT: retq 483; 484; AVX2-LABEL: test_abs_ge_v2i64: 485; AVX2: # %bb.0: 486; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 487; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm1 488; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 489; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 490; AVX2-NEXT: retq 491; 492; AVX512-LABEL: test_abs_ge_v2i64: 493; AVX512: # %bb.0: 494; AVX512-NEXT: vpabsq %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x1f,0xc0] 495; AVX512-NEXT: retq # encoding: [0xc3] 496 %tmp1neg = sub <2 x i64> zeroinitializer, %a 497 %b = icmp sge <2 x i64> %a, zeroinitializer 498 %abs = select <2 x i1> %b, <2 x i64> %a, <2 x i64> %tmp1neg 499 ret <2 x i64> %abs 500} 501 502define <4 x i64> @test_abs_gt_v4i64(<4 x i64> %a) nounwind { 503; SSE-LABEL: test_abs_gt_v4i64: 504; SSE: # %bb.0: 505; SSE-NEXT: movdqa %xmm0, %xmm2 506; SSE-NEXT: psrad $31, %xmm2 507; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 508; SSE-NEXT: paddq %xmm2, %xmm0 509; SSE-NEXT: pxor %xmm2, %xmm0 510; SSE-NEXT: movdqa %xmm1, %xmm2 511; SSE-NEXT: psrad $31, %xmm2 512; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 513; SSE-NEXT: paddq %xmm2, %xmm1 514; SSE-NEXT: pxor %xmm2, %xmm1 515; SSE-NEXT: retq 516; 517; AVX1-LABEL: test_abs_gt_v4i64: 518; AVX1: # %bb.0: 519; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 520; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 521; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm3 522; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm2 523; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm4 524; AVX1-NEXT: vpaddq %xmm3, %xmm1, %xmm1 525; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm0 526; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 527; AVX1-NEXT: vxorps %ymm4, %ymm0, %ymm0 528; AVX1-NEXT: retq 529; 530; AVX2-LABEL: test_abs_gt_v4i64: 531; AVX2: # %bb.0: 532; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 533; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm1 534; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0 535; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 536; AVX2-NEXT: retq 537; 538; AVX512-LABEL: test_abs_gt_v4i64: 539; AVX512: # %bb.0: 540; AVX512-NEXT: vpabsq %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x1f,0xc0] 541; AVX512-NEXT: retq # encoding: [0xc3] 542 %tmp1neg = sub <4 x i64> zeroinitializer, %a 543 %b = icmp sgt <4 x i64> %a, <i64 -1, i64 -1, i64 -1, i64 -1> 544 %abs = select <4 x i1> %b, <4 x i64> %a, <4 x i64> %tmp1neg 545 ret <4 x i64> %abs 546} 547 548define <8 x i64> @test_abs_le_v8i64(<8 x i64> %a) nounwind { 549; SSE-LABEL: test_abs_le_v8i64: 550; SSE: # %bb.0: 551; SSE-NEXT: movdqa %xmm0, %xmm4 552; SSE-NEXT: psrad $31, %xmm4 553; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 554; SSE-NEXT: paddq %xmm4, %xmm0 555; SSE-NEXT: pxor %xmm4, %xmm0 556; SSE-NEXT: movdqa %xmm1, %xmm4 557; SSE-NEXT: psrad $31, %xmm4 558; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 559; SSE-NEXT: paddq %xmm4, %xmm1 560; SSE-NEXT: pxor %xmm4, %xmm1 561; SSE-NEXT: movdqa %xmm2, %xmm4 562; SSE-NEXT: psrad $31, %xmm4 563; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 564; SSE-NEXT: paddq %xmm4, %xmm2 565; SSE-NEXT: pxor %xmm4, %xmm2 566; SSE-NEXT: movdqa %xmm3, %xmm4 567; SSE-NEXT: psrad $31, %xmm4 568; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 569; SSE-NEXT: paddq %xmm4, %xmm3 570; SSE-NEXT: pxor %xmm4, %xmm3 571; SSE-NEXT: retq 572; 573; AVX1-LABEL: test_abs_le_v8i64: 574; AVX1: # %bb.0: 575; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 576; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 577; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm4 578; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm5 579; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm6 580; AVX1-NEXT: vpaddq %xmm4, %xmm2, %xmm2 581; AVX1-NEXT: vpaddq %xmm5, %xmm0, %xmm0 582; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 583; AVX1-NEXT: vxorps %ymm6, %ymm0, %ymm0 584; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 585; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm4 586; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm3 587; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm5 588; AVX1-NEXT: vpaddq %xmm4, %xmm2, %xmm2 589; AVX1-NEXT: vpaddq %xmm3, %xmm1, %xmm1 590; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 591; AVX1-NEXT: vxorps %ymm5, %ymm1, %ymm1 592; AVX1-NEXT: retq 593; 594; AVX2-LABEL: test_abs_le_v8i64: 595; AVX2: # %bb.0: 596; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 597; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3 598; AVX2-NEXT: vpaddq %ymm3, %ymm0, %ymm0 599; AVX2-NEXT: vpxor %ymm3, %ymm0, %ymm0 600; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm2 601; AVX2-NEXT: vpaddq %ymm2, %ymm1, %ymm1 602; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1 603; AVX2-NEXT: retq 604; 605; AVX512-LABEL: test_abs_le_v8i64: 606; AVX512: # %bb.0: 607; AVX512-NEXT: vpabsq %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xc0] 608; AVX512-NEXT: retq # encoding: [0xc3] 609 %tmp1neg = sub <8 x i64> zeroinitializer, %a 610 %b = icmp sle <8 x i64> %a, zeroinitializer 611 %abs = select <8 x i1> %b, <8 x i64> %tmp1neg, <8 x i64> %a 612 ret <8 x i64> %abs 613} 614 615define <8 x i64> @test_abs_le_v8i64_fold(<8 x i64>* %a.ptr) nounwind { 616; SSE-LABEL: test_abs_le_v8i64_fold: 617; SSE: # %bb.0: 618; SSE-NEXT: movdqu (%rdi), %xmm0 619; SSE-NEXT: movdqu 16(%rdi), %xmm1 620; SSE-NEXT: movdqu 32(%rdi), %xmm2 621; SSE-NEXT: movdqu 48(%rdi), %xmm3 622; SSE-NEXT: movdqa %xmm0, %xmm4 623; SSE-NEXT: psrad $31, %xmm4 624; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 625; SSE-NEXT: paddq %xmm4, %xmm0 626; SSE-NEXT: pxor %xmm4, %xmm0 627; SSE-NEXT: movdqa %xmm1, %xmm4 628; SSE-NEXT: psrad $31, %xmm4 629; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 630; SSE-NEXT: paddq %xmm4, %xmm1 631; SSE-NEXT: pxor %xmm4, %xmm1 632; SSE-NEXT: movdqa %xmm2, %xmm4 633; SSE-NEXT: psrad $31, %xmm4 634; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 635; SSE-NEXT: paddq %xmm4, %xmm2 636; SSE-NEXT: pxor %xmm4, %xmm2 637; SSE-NEXT: movdqa %xmm3, %xmm4 638; SSE-NEXT: psrad $31, %xmm4 639; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 640; SSE-NEXT: paddq %xmm4, %xmm3 641; SSE-NEXT: pxor %xmm4, %xmm3 642; SSE-NEXT: retq 643; 644; AVX1-LABEL: test_abs_le_v8i64_fold: 645; AVX1: # %bb.0: 646; AVX1-NEXT: vmovdqu (%rdi), %ymm0 647; AVX1-NEXT: vmovdqu 32(%rdi), %ymm1 648; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 649; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 650; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm4 651; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm5 652; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm6 653; AVX1-NEXT: vpaddq %xmm4, %xmm2, %xmm2 654; AVX1-NEXT: vpaddq %xmm5, %xmm0, %xmm0 655; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 656; AVX1-NEXT: vxorps %ymm6, %ymm0, %ymm0 657; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 658; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm4 659; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm3 660; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm5 661; AVX1-NEXT: vpaddq %xmm4, %xmm2, %xmm2 662; AVX1-NEXT: vpaddq %xmm3, %xmm1, %xmm1 663; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 664; AVX1-NEXT: vxorps %ymm5, %ymm1, %ymm1 665; AVX1-NEXT: retq 666; 667; AVX2-LABEL: test_abs_le_v8i64_fold: 668; AVX2: # %bb.0: 669; AVX2-NEXT: vmovdqu (%rdi), %ymm0 670; AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 671; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 672; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3 673; AVX2-NEXT: vpaddq %ymm3, %ymm0, %ymm0 674; AVX2-NEXT: vpxor %ymm3, %ymm0, %ymm0 675; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm2 676; AVX2-NEXT: vpaddq %ymm2, %ymm1, %ymm1 677; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1 678; AVX2-NEXT: retq 679; 680; AVX512-LABEL: test_abs_le_v8i64_fold: 681; AVX512: # %bb.0: 682; AVX512-NEXT: vpabsq (%rdi), %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x1f,0x07] 683; AVX512-NEXT: retq # encoding: [0xc3] 684 %a = load <8 x i64>, <8 x i64>* %a.ptr, align 8 685 %tmp1neg = sub <8 x i64> zeroinitializer, %a 686 %b = icmp sle <8 x i64> %a, zeroinitializer 687 %abs = select <8 x i1> %b, <8 x i64> %tmp1neg, <8 x i64> %a 688 ret <8 x i64> %abs 689} 690 691define <64 x i8> @test_abs_lt_v64i8(<64 x i8> %a) nounwind { 692; SSE2-LABEL: test_abs_lt_v64i8: 693; SSE2: # %bb.0: 694; SSE2-NEXT: pxor %xmm4, %xmm4 695; SSE2-NEXT: pxor %xmm5, %xmm5 696; SSE2-NEXT: pcmpgtb %xmm0, %xmm5 697; SSE2-NEXT: paddb %xmm5, %xmm0 698; SSE2-NEXT: pxor %xmm5, %xmm0 699; SSE2-NEXT: pxor %xmm5, %xmm5 700; SSE2-NEXT: pcmpgtb %xmm1, %xmm5 701; SSE2-NEXT: paddb %xmm5, %xmm1 702; SSE2-NEXT: pxor %xmm5, %xmm1 703; SSE2-NEXT: pxor %xmm5, %xmm5 704; SSE2-NEXT: pcmpgtb %xmm2, %xmm5 705; SSE2-NEXT: paddb %xmm5, %xmm2 706; SSE2-NEXT: pxor %xmm5, %xmm2 707; SSE2-NEXT: pcmpgtb %xmm3, %xmm4 708; SSE2-NEXT: paddb %xmm4, %xmm3 709; SSE2-NEXT: pxor %xmm4, %xmm3 710; SSE2-NEXT: retq 711; 712; SSSE3-LABEL: test_abs_lt_v64i8: 713; SSSE3: # %bb.0: 714; SSSE3-NEXT: pabsb %xmm0, %xmm0 715; SSSE3-NEXT: pabsb %xmm1, %xmm1 716; SSSE3-NEXT: pabsb %xmm2, %xmm2 717; SSSE3-NEXT: pabsb %xmm3, %xmm3 718; SSSE3-NEXT: retq 719; 720; AVX1-LABEL: test_abs_lt_v64i8: 721; AVX1: # %bb.0: 722; AVX1-NEXT: vpabsb %xmm0, %xmm2 723; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 724; AVX1-NEXT: vpabsb %xmm0, %xmm0 725; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 726; AVX1-NEXT: vpabsb %xmm1, %xmm2 727; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 728; AVX1-NEXT: vpabsb %xmm1, %xmm1 729; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 730; AVX1-NEXT: retq 731; 732; AVX2-LABEL: test_abs_lt_v64i8: 733; AVX2: # %bb.0: 734; AVX2-NEXT: vpabsb %ymm0, %ymm0 735; AVX2-NEXT: vpabsb %ymm1, %ymm1 736; AVX2-NEXT: retq 737; 738; AVX512F-LABEL: test_abs_lt_v64i8: 739; AVX512F: # %bb.0: 740; AVX512F-NEXT: vpabsb %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1c,0xc0] 741; AVX512F-NEXT: vpabsb %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x7d,0x1c,0xc9] 742; AVX512F-NEXT: retq # encoding: [0xc3] 743; 744; AVX512BW-LABEL: test_abs_lt_v64i8: 745; AVX512BW: # %bb.0: 746; AVX512BW-NEXT: vpabsb %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x1c,0xc0] 747; AVX512BW-NEXT: retq # encoding: [0xc3] 748 %tmp1neg = sub <64 x i8> zeroinitializer, %a 749 %b = icmp slt <64 x i8> %a, zeroinitializer 750 %abs = select <64 x i1> %b, <64 x i8> %tmp1neg, <64 x i8> %a 751 ret <64 x i8> %abs 752} 753 754define <32 x i16> @test_abs_gt_v32i16(<32 x i16> %a) nounwind { 755; SSE2-LABEL: test_abs_gt_v32i16: 756; SSE2: # %bb.0: 757; SSE2-NEXT: movdqa %xmm0, %xmm4 758; SSE2-NEXT: psraw $15, %xmm4 759; SSE2-NEXT: paddw %xmm4, %xmm0 760; SSE2-NEXT: pxor %xmm4, %xmm0 761; SSE2-NEXT: movdqa %xmm1, %xmm4 762; SSE2-NEXT: psraw $15, %xmm4 763; SSE2-NEXT: paddw %xmm4, %xmm1 764; SSE2-NEXT: pxor %xmm4, %xmm1 765; SSE2-NEXT: movdqa %xmm2, %xmm4 766; SSE2-NEXT: psraw $15, %xmm4 767; SSE2-NEXT: paddw %xmm4, %xmm2 768; SSE2-NEXT: pxor %xmm4, %xmm2 769; SSE2-NEXT: movdqa %xmm3, %xmm4 770; SSE2-NEXT: psraw $15, %xmm4 771; SSE2-NEXT: paddw %xmm4, %xmm3 772; SSE2-NEXT: pxor %xmm4, %xmm3 773; SSE2-NEXT: retq 774; 775; SSSE3-LABEL: test_abs_gt_v32i16: 776; SSSE3: # %bb.0: 777; SSSE3-NEXT: pabsw %xmm0, %xmm0 778; SSSE3-NEXT: pabsw %xmm1, %xmm1 779; SSSE3-NEXT: pabsw %xmm2, %xmm2 780; SSSE3-NEXT: pabsw %xmm3, %xmm3 781; SSSE3-NEXT: retq 782; 783; AVX1-LABEL: test_abs_gt_v32i16: 784; AVX1: # %bb.0: 785; AVX1-NEXT: vpabsw %xmm0, %xmm2 786; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 787; AVX1-NEXT: vpabsw %xmm0, %xmm0 788; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 789; AVX1-NEXT: vpabsw %xmm1, %xmm2 790; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 791; AVX1-NEXT: vpabsw %xmm1, %xmm1 792; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 793; AVX1-NEXT: retq 794; 795; AVX2-LABEL: test_abs_gt_v32i16: 796; AVX2: # %bb.0: 797; AVX2-NEXT: vpabsw %ymm0, %ymm0 798; AVX2-NEXT: vpabsw %ymm1, %ymm1 799; AVX2-NEXT: retq 800; 801; AVX512F-LABEL: test_abs_gt_v32i16: 802; AVX512F: # %bb.0: 803; AVX512F-NEXT: vpabsw %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1d,0xc0] 804; AVX512F-NEXT: vpabsw %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x7d,0x1d,0xc9] 805; AVX512F-NEXT: retq # encoding: [0xc3] 806; 807; AVX512BW-LABEL: test_abs_gt_v32i16: 808; AVX512BW: # %bb.0: 809; AVX512BW-NEXT: vpabsw %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x1d,0xc0] 810; AVX512BW-NEXT: retq # encoding: [0xc3] 811 %tmp1neg = sub <32 x i16> zeroinitializer, %a 812 %b = icmp sgt <32 x i16> %a, zeroinitializer 813 %abs = select <32 x i1> %b, <32 x i16> %a, <32 x i16> %tmp1neg 814 ret <32 x i16> %abs 815} 816