1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s 3; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s 5 6define i8 @v_saddsat_i8(i8 %lhs, i8 %rhs) { 7; GFX6-LABEL: v_saddsat_i8: 8; GFX6: ; %bb.0: 9; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 8 11; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 12; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 13; GFX6-NEXT: v_min_i32_e32 v0, 0x7f, v0 14; GFX6-NEXT: v_max_i32_e32 v0, 0xffffff80, v0 15; GFX6-NEXT: s_setpc_b64 s[30:31] 16; 17; GFX8-LABEL: v_saddsat_i8: 18; GFX8: ; %bb.0: 19; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20; GFX8-NEXT: v_add_u16_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 21; GFX8-NEXT: v_min_i16_e32 v0, 0x7f, v0 22; GFX8-NEXT: v_max_i16_e32 v0, 0xff80, v0 23; GFX8-NEXT: s_setpc_b64 s[30:31] 24; 25; GFX9-LABEL: v_saddsat_i8: 26; GFX9: ; %bb.0: 27; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 28; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 29; GFX9-NEXT: v_lshlrev_b16_e32 v0, 8, v0 30; GFX9-NEXT: v_add_i16 v0, v0, v1 clamp 31; GFX9-NEXT: v_ashrrev_i16_e32 v0, 8, v0 32; GFX9-NEXT: s_setpc_b64 s[30:31] 33 %result = call i8 @llvm.sadd.sat.i8(i8 %lhs, i8 %rhs) 34 ret i8 %result 35} 36 37define i16 @v_saddsat_i16(i16 %lhs, i16 %rhs) { 38; GFX6-LABEL: v_saddsat_i16: 39; GFX6: ; %bb.0: 40; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 41; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 42; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 43; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 44; GFX6-NEXT: v_min_i32_e32 v0, 0x7fff, v0 45; GFX6-NEXT: v_max_i32_e32 v0, 0xffff8000, v0 46; GFX6-NEXT: s_setpc_b64 s[30:31] 47; 48; GFX8-LABEL: v_saddsat_i16: 49; GFX8: ; %bb.0: 50; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 51; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v1 52; GFX8-NEXT: v_add_u16_e32 v1, v0, v1 53; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v1, v0 54; GFX8-NEXT: v_mov_b32_e32 v0, 0xffff8000 55; GFX8-NEXT: v_mov_b32_e32 v2, 0x7fff 56; GFX8-NEXT: v_cmp_gt_i16_e64 s[6:7], 0, v1 57; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[6:7] 58; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 59; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 60; GFX8-NEXT: s_setpc_b64 s[30:31] 61; 62; GFX9-LABEL: v_saddsat_i16: 63; GFX9: ; %bb.0: 64; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 65; GFX9-NEXT: v_add_i16 v0, v0, v1 clamp 66; GFX9-NEXT: s_setpc_b64 s[30:31] 67 %result = call i16 @llvm.sadd.sat.i16(i16 %lhs, i16 %rhs) 68 ret i16 %result 69} 70 71define i32 @v_saddsat_i32(i32 %lhs, i32 %rhs) { 72; GFX6-LABEL: v_saddsat_i32: 73; GFX6: ; %bb.0: 74; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 75; GFX6-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1 76; GFX6-NEXT: v_add_i32_e64 v1, s[4:5], v0, v1 77; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, v0 78; GFX6-NEXT: v_bfrev_b32_e32 v0, 1 79; GFX6-NEXT: v_bfrev_b32_e32 v2, -2 80; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v1 81; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[6:7] 82; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 83; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 84; GFX6-NEXT: s_setpc_b64 s[30:31] 85; 86; GFX8-LABEL: v_saddsat_i32: 87; GFX8: ; %bb.0: 88; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 89; GFX8-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1 90; GFX8-NEXT: v_add_u32_e64 v1, s[4:5], v0, v1 91; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, v0 92; GFX8-NEXT: v_bfrev_b32_e32 v0, 1 93; GFX8-NEXT: v_bfrev_b32_e32 v2, -2 94; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v1 95; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[6:7] 96; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 97; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 98; GFX8-NEXT: s_setpc_b64 s[30:31] 99; 100; GFX9-LABEL: v_saddsat_i32: 101; GFX9: ; %bb.0: 102; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 103; GFX9-NEXT: v_add_i32 v0, v0, v1 clamp 104; GFX9-NEXT: s_setpc_b64 s[30:31] 105 %result = call i32 @llvm.sadd.sat.i32(i32 %lhs, i32 %rhs) 106 ret i32 %result 107} 108 109define <2 x i16> @v_saddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) { 110; GFX6-LABEL: v_saddsat_v2i16: 111; GFX6: ; %bb.0: 112; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 113; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16 114; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 115; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16 116; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 117; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v3 118; GFX6-NEXT: s_movk_i32 s4, 0x7fff 119; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 120; GFX6-NEXT: v_min_i32_e32 v1, s4, v1 121; GFX6-NEXT: s_movk_i32 s5, 0x8000 122; GFX6-NEXT: v_min_i32_e32 v0, s4, v0 123; GFX6-NEXT: v_max_i32_e32 v1, s5, v1 124; GFX6-NEXT: v_max_i32_e32 v0, s5, v0 125; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 126; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 127; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 128; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 129; GFX6-NEXT: s_setpc_b64 s[30:31] 130; 131; GFX8-LABEL: v_saddsat_v2i16: 132; GFX8: ; %bb.0: 133; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 134; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1 135; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0 136; GFX8-NEXT: v_add_u16_e32 v4, v3, v2 137; GFX8-NEXT: v_mov_b32_e32 v5, 0xffff8000 138; GFX8-NEXT: v_mov_b32_e32 v6, 0x7fff 139; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v4 140; GFX8-NEXT: v_cndmask_b32_e32 v7, v5, v6, vcc 141; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v4, v3 142; GFX8-NEXT: v_cmp_gt_i16_e64 s[4:5], 0, v2 143; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc 144; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v7, vcc 145; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v1 146; GFX8-NEXT: v_add_u16_e32 v1, v0, v1 147; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v1, v0 148; GFX8-NEXT: v_cmp_gt_i16_e64 s[6:7], 0, v1 149; GFX8-NEXT: v_cndmask_b32_e64 v0, v5, v6, s[6:7] 150; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 151; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 152; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 153; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 154; GFX8-NEXT: s_setpc_b64 s[30:31] 155; 156; GFX9-LABEL: v_saddsat_v2i16: 157; GFX9: ; %bb.0: 158; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 159; GFX9-NEXT: v_pk_add_i16 v0, v0, v1 clamp 160; GFX9-NEXT: s_setpc_b64 s[30:31] 161 %result = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs) 162 ret <2 x i16> %result 163} 164 165define <3 x i16> @v_saddsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) { 166; GFX6-LABEL: v_saddsat_v3i16: 167; GFX6: ; %bb.0: 168; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 169; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16 170; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 171; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16 172; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 173; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16 174; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16 175; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v4 176; GFX6-NEXT: s_movk_i32 s4, 0x7fff 177; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v3 178; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v5 179; GFX6-NEXT: v_min_i32_e32 v1, s4, v1 180; GFX6-NEXT: s_movk_i32 s5, 0x8000 181; GFX6-NEXT: v_min_i32_e32 v0, s4, v0 182; GFX6-NEXT: v_max_i32_e32 v1, s5, v1 183; GFX6-NEXT: v_max_i32_e32 v0, s5, v0 184; GFX6-NEXT: v_min_i32_e32 v2, s4, v2 185; GFX6-NEXT: v_max_i32_e32 v3, s5, v2 186; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 187; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 188; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 189; GFX6-NEXT: v_or_b32_e32 v2, 0xffff0000, v3 190; GFX6-NEXT: v_alignbit_b32 v1, v3, v1, 16 191; GFX6-NEXT: s_setpc_b64 s[30:31] 192; 193; GFX8-LABEL: v_saddsat_v3i16: 194; GFX8: ; %bb.0: 195; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 196; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2 197; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0 198; GFX8-NEXT: v_add_u16_e32 v6, v5, v4 199; GFX8-NEXT: v_mov_b32_e32 v7, 0xffff8000 200; GFX8-NEXT: v_mov_b32_e32 v8, 0x7fff 201; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v6 202; GFX8-NEXT: v_cndmask_b32_e32 v9, v7, v8, vcc 203; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v6, v5 204; GFX8-NEXT: v_cmp_gt_i16_e64 s[4:5], 0, v4 205; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc 206; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v9, vcc 207; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v3 208; GFX8-NEXT: v_add_u16_e32 v3, v1, v3 209; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v3, v1 210; GFX8-NEXT: v_cmp_gt_i16_e64 s[6:7], 0, v3 211; GFX8-NEXT: v_cndmask_b32_e64 v1, v7, v8, s[6:7] 212; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 213; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 214; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v2 215; GFX8-NEXT: v_add_u16_e32 v2, v0, v2 216; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v2, v0 217; GFX8-NEXT: v_cmp_gt_i16_e64 s[6:7], 0, v2 218; GFX8-NEXT: v_cndmask_b32_e64 v0, v7, v8, s[6:7] 219; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 220; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 221; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4 222; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 223; GFX8-NEXT: s_setpc_b64 s[30:31] 224; 225; GFX9-LABEL: v_saddsat_v3i16: 226; GFX9: ; %bb.0: 227; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 228; GFX9-NEXT: v_pk_add_i16 v1, v1, v3 clamp 229; GFX9-NEXT: v_pk_add_i16 v0, v0, v2 clamp 230; GFX9-NEXT: s_setpc_b64 s[30:31] 231 %result = call <3 x i16> @llvm.sadd.sat.v3i16(<3 x i16> %lhs, <3 x i16> %rhs) 232 ret <3 x i16> %result 233} 234 235define <2 x float> @v_saddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { 236; GFX6-LABEL: v_saddsat_v4i16: 237; GFX6: ; %bb.0: 238; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 239; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16 240; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 241; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16 242; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 243; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v5 244; GFX6-NEXT: s_movk_i32 s4, 0x7fff 245; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v4 246; GFX6-NEXT: v_min_i32_e32 v1, s4, v1 247; GFX6-NEXT: s_movk_i32 s5, 0x8000 248; GFX6-NEXT: v_min_i32_e32 v0, s4, v0 249; GFX6-NEXT: v_max_i32_e32 v1, s5, v1 250; GFX6-NEXT: v_max_i32_e32 v0, s5, v0 251; GFX6-NEXT: s_mov_b32 s6, 0xffff 252; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 16 253; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16 254; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 16 255; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16 256; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 257; GFX6-NEXT: v_and_b32_e32 v0, s6, v0 258; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v6 259; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 260; GFX6-NEXT: v_add_i32_e32 v1, vcc, v3, v7 261; GFX6-NEXT: v_min_i32_e32 v1, s4, v1 262; GFX6-NEXT: v_min_i32_e32 v2, s4, v2 263; GFX6-NEXT: v_max_i32_e32 v1, s5, v1 264; GFX6-NEXT: v_max_i32_e32 v2, s5, v2 265; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 266; GFX6-NEXT: v_and_b32_e32 v2, s6, v2 267; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 268; GFX6-NEXT: s_setpc_b64 s[30:31] 269; 270; GFX8-LABEL: v_saddsat_v4i16: 271; GFX8: ; %bb.0: 272; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 273; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2 274; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0 275; GFX8-NEXT: v_add_u16_e32 v6, v5, v4 276; GFX8-NEXT: v_mov_b32_e32 v7, 0xffff8000 277; GFX8-NEXT: v_mov_b32_e32 v8, 0x7fff 278; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v6 279; GFX8-NEXT: v_cndmask_b32_e32 v9, v7, v8, vcc 280; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v6, v5 281; GFX8-NEXT: v_cmp_gt_i16_e64 s[4:5], 0, v4 282; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc 283; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v9, vcc 284; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v2 285; GFX8-NEXT: v_add_u16_e32 v2, v0, v2 286; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v2, v0 287; GFX8-NEXT: v_cmp_gt_i16_e64 s[6:7], 0, v2 288; GFX8-NEXT: v_cndmask_b32_e64 v0, v7, v8, s[6:7] 289; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 290; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 291; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v4 292; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 293; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v3 294; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1 295; GFX8-NEXT: v_add_u16_e32 v5, v4, v2 296; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v5 297; GFX8-NEXT: v_cndmask_b32_e32 v6, v7, v8, vcc 298; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v5, v4 299; GFX8-NEXT: v_cmp_gt_i16_e64 s[4:5], 0, v2 300; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc 301; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc 302; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v3 303; GFX8-NEXT: v_add_u16_e32 v3, v1, v3 304; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v3, v1 305; GFX8-NEXT: v_cmp_gt_i16_e64 s[6:7], 0, v3 306; GFX8-NEXT: v_cndmask_b32_e64 v1, v7, v8, s[6:7] 307; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 308; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 309; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 310; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 311; GFX8-NEXT: s_setpc_b64 s[30:31] 312; 313; GFX9-LABEL: v_saddsat_v4i16: 314; GFX9: ; %bb.0: 315; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 316; GFX9-NEXT: v_pk_add_i16 v0, v0, v2 clamp 317; GFX9-NEXT: v_pk_add_i16 v1, v1, v3 clamp 318; GFX9-NEXT: s_setpc_b64 s[30:31] 319 %result = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 320 %cast = bitcast <4 x i16> %result to <2 x float> 321 ret <2 x float> %cast 322} 323 324define <2 x i32> @v_saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 325; GFX6-LABEL: v_saddsat_v2i32: 326; GFX6: ; %bb.0: 327; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 328; GFX6-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2 329; GFX6-NEXT: v_add_i32_e64 v2, s[4:5], v0, v2 330; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v0 331; GFX6-NEXT: v_bfrev_b32_e32 v4, 1 332; GFX6-NEXT: v_bfrev_b32_e32 v5, -2 333; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v2 334; GFX6-NEXT: v_cndmask_b32_e64 v0, v4, v5, s[6:7] 335; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 336; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 337; GFX6-NEXT: v_add_i32_e64 v2, s[4:5], v1, v3 338; GFX6-NEXT: v_cmp_gt_i32_e32 vcc, 0, v3 339; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v1 340; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v2 341; GFX6-NEXT: v_cndmask_b32_e64 v1, v4, v5, s[6:7] 342; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 343; GFX6-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 344; GFX6-NEXT: s_setpc_b64 s[30:31] 345; 346; GFX8-LABEL: v_saddsat_v2i32: 347; GFX8: ; %bb.0: 348; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 349; GFX8-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2 350; GFX8-NEXT: v_add_u32_e64 v2, s[4:5], v0, v2 351; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v0 352; GFX8-NEXT: v_bfrev_b32_e32 v4, 1 353; GFX8-NEXT: v_bfrev_b32_e32 v5, -2 354; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v2 355; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, v5, s[6:7] 356; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 357; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 358; GFX8-NEXT: v_add_u32_e64 v2, s[4:5], v1, v3 359; GFX8-NEXT: v_cmp_gt_i32_e32 vcc, 0, v3 360; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v1 361; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v2 362; GFX8-NEXT: v_cndmask_b32_e64 v1, v4, v5, s[6:7] 363; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 364; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 365; GFX8-NEXT: s_setpc_b64 s[30:31] 366; 367; GFX9-LABEL: v_saddsat_v2i32: 368; GFX9: ; %bb.0: 369; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 370; GFX9-NEXT: v_add_i32 v0, v0, v2 clamp 371; GFX9-NEXT: v_add_i32 v1, v1, v3 clamp 372; GFX9-NEXT: s_setpc_b64 s[30:31] 373 %result = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 374 ret <2 x i32> %result 375} 376 377define i64 @v_saddsat_i64(i64 %lhs, i64 %rhs) { 378; GFX6-LABEL: v_saddsat_i64: 379; GFX6: ; %bb.0: 380; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 381; GFX6-NEXT: v_add_i32_e32 v4, vcc, v0, v2 382; GFX6-NEXT: v_addc_u32_e32 v5, vcc, v1, v3, vcc 383; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1] 384; GFX6-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[2:3] 385; GFX6-NEXT: v_bfrev_b32_e32 v1, 1 386; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc 387; GFX6-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[4:5] 388; GFX6-NEXT: v_bfrev_b32_e32 v2, -2 389; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v5 390; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5] 391; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 392; GFX6-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 393; GFX6-NEXT: s_setpc_b64 s[30:31] 394; 395; GFX8-LABEL: v_saddsat_i64: 396; GFX8: ; %bb.0: 397; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 398; GFX8-NEXT: v_add_u32_e32 v4, vcc, v0, v2 399; GFX8-NEXT: v_addc_u32_e32 v5, vcc, v1, v3, vcc 400; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1] 401; GFX8-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[2:3] 402; GFX8-NEXT: v_bfrev_b32_e32 v1, 1 403; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc 404; GFX8-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[4:5] 405; GFX8-NEXT: v_bfrev_b32_e32 v2, -2 406; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v5 407; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5] 408; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 409; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 410; GFX8-NEXT: s_setpc_b64 s[30:31] 411; 412; GFX9-LABEL: v_saddsat_i64: 413; GFX9: ; %bb.0: 414; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 415; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v0, v2 416; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v1, v3, vcc 417; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1] 418; GFX9-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[2:3] 419; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 420; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc 421; GFX9-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[4:5] 422; GFX9-NEXT: v_bfrev_b32_e32 v2, -2 423; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v5 424; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5] 425; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 426; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 427; GFX9-NEXT: s_setpc_b64 s[30:31] 428 %result = call i64 @llvm.sadd.sat.i64(i64 %lhs, i64 %rhs) 429 ret i64 %result 430} 431 432declare i8 @llvm.sadd.sat.i8(i8, i8) #0 433declare i16 @llvm.sadd.sat.i16(i16, i16) #0 434declare <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16>, <2 x i16>) #0 435declare <3 x i16> @llvm.sadd.sat.v3i16(<3 x i16>, <3 x i16>) #0 436declare <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16>, <4 x i16>) #0 437declare i32 @llvm.sadd.sat.i32(i32, i32) #0 438declare <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>) #0 439declare i64 @llvm.sadd.sat.i64(i64, i64) #0 440