1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 3; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s 4 5; Test gfx9+ s_shl[1-4]_add_u32 pattern matching 6 7define amdgpu_ps i32 @s_shl1_add_u32(i32 inreg %src0, i32 inreg %src1) { 8; GFX9-LABEL: s_shl1_add_u32: 9; GFX9: ; %bb.0: 10; GFX9-NEXT: s_lshl1_add_u32 s0, s0, s1 11; GFX9-NEXT: ; return to shader part epilog 12; 13; GFX8-LABEL: s_shl1_add_u32: 14; GFX8: ; %bb.0: 15; GFX8-NEXT: s_lshl_b32 s0, s0, 1 16; GFX8-NEXT: s_add_i32 s0, s0, s1 17; GFX8-NEXT: ; return to shader part epilog 18 %shl = shl i32 %src0, 1 19 %add = add i32 %shl, %src1 20 ret i32 %add 21} 22 23define amdgpu_ps i32 @s_shl2_add_u32(i32 inreg %src0, i32 inreg %src1) { 24; GFX9-LABEL: s_shl2_add_u32: 25; GFX9: ; %bb.0: 26; GFX9-NEXT: s_lshl2_add_u32 s0, s0, s1 27; GFX9-NEXT: ; return to shader part epilog 28; 29; GFX8-LABEL: s_shl2_add_u32: 30; GFX8: ; %bb.0: 31; GFX8-NEXT: s_lshl_b32 s0, s0, 2 32; GFX8-NEXT: s_add_i32 s0, s0, s1 33; GFX8-NEXT: ; return to shader part epilog 34 %shl = shl i32 %src0, 2 35 %add = add i32 %shl, %src1 36 ret i32 %add 37} 38 39define amdgpu_ps i32 @s_shl3_add_u32(i32 inreg %src0, i32 inreg %src1) { 40; GFX9-LABEL: s_shl3_add_u32: 41; GFX9: ; %bb.0: 42; GFX9-NEXT: s_lshl3_add_u32 s0, s0, s1 43; GFX9-NEXT: ; return to shader part epilog 44; 45; GFX8-LABEL: s_shl3_add_u32: 46; GFX8: ; %bb.0: 47; GFX8-NEXT: s_lshl_b32 s0, s0, 3 48; GFX8-NEXT: s_add_i32 s0, s0, s1 49; GFX8-NEXT: ; return to shader part epilog 50 %shl = shl i32 %src0, 3 51 %add = add i32 %shl, %src1 52 ret i32 %add 53} 54 55define amdgpu_ps i32 @s_shl4_add_u32(i32 inreg %src0, i32 inreg %src1) { 56; GFX9-LABEL: s_shl4_add_u32: 57; GFX9: ; %bb.0: 58; GFX9-NEXT: s_lshl4_add_u32 s0, s0, s1 59; GFX9-NEXT: ; return to shader part epilog 60; 61; GFX8-LABEL: s_shl4_add_u32: 62; GFX8: ; %bb.0: 63; GFX8-NEXT: s_lshl_b32 s0, s0, 4 64; GFX8-NEXT: s_add_i32 s0, s0, s1 65; GFX8-NEXT: ; return to shader part epilog 66 %shl = shl i32 %src0, 4 67 %add = add i32 %shl, %src1 68 ret i32 %add 69} 70 71define amdgpu_ps i32 @s_shl5_add_u32(i32 inreg %src0, i32 inreg %src1) { 72; GCN-LABEL: s_shl5_add_u32: 73; GCN: ; %bb.0: 74; GCN-NEXT: s_lshl_b32 s0, s0, 5 75; GCN-NEXT: s_add_i32 s0, s0, s1 76; GCN-NEXT: ; return to shader part epilog 77 %shl = shl i32 %src0, 5 78 %add = add i32 %shl, %src1 79 ret i32 %add 80} 81 82define i32 @v_shl1_add_u32(i32 %src0, i32 %src1) { 83; GFX9-LABEL: v_shl1_add_u32: 84; GFX9: ; %bb.0: 85; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 86; GFX9-NEXT: v_lshl_add_u32 v0, v0, 1, v1 87; GFX9-NEXT: s_setpc_b64 s[30:31] 88; 89; GFX8-LABEL: v_shl1_add_u32: 90; GFX8: ; %bb.0: 91; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 92; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0 93; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 94; GFX8-NEXT: s_setpc_b64 s[30:31] 95 %shl = shl i32 %src0, 1 96 %add = add i32 %shl, %src1 97 ret i32 %add 98} 99 100define i32 @v_shl2_add_u32(i32 %src0, i32 %src1) { 101; GFX9-LABEL: v_shl2_add_u32: 102; GFX9: ; %bb.0: 103; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 104; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v1 105; GFX9-NEXT: s_setpc_b64 s[30:31] 106; 107; GFX8-LABEL: v_shl2_add_u32: 108; GFX8: ; %bb.0: 109; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 110; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 111; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 112; GFX8-NEXT: s_setpc_b64 s[30:31] 113 %shl = shl i32 %src0, 2 114 %add = add i32 %shl, %src1 115 ret i32 %add 116} 117 118define i32 @v_shl3_add_u32(i32 %src0, i32 %src1) { 119; GFX9-LABEL: v_shl3_add_u32: 120; GFX9: ; %bb.0: 121; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 122; GFX9-NEXT: v_lshl_add_u32 v0, v0, 3, v1 123; GFX9-NEXT: s_setpc_b64 s[30:31] 124; 125; GFX8-LABEL: v_shl3_add_u32: 126; GFX8: ; %bb.0: 127; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 128; GFX8-NEXT: v_lshlrev_b32_e32 v0, 3, v0 129; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 130; GFX8-NEXT: s_setpc_b64 s[30:31] 131 %shl = shl i32 %src0, 3 132 %add = add i32 %shl, %src1 133 ret i32 %add 134} 135 136define i32 @v_shl4_add_u32(i32 %src0, i32 %src1) { 137; GFX9-LABEL: v_shl4_add_u32: 138; GFX9: ; %bb.0: 139; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 140; GFX9-NEXT: v_lshl_add_u32 v0, v0, 4, v1 141; GFX9-NEXT: s_setpc_b64 s[30:31] 142; 143; GFX8-LABEL: v_shl4_add_u32: 144; GFX8: ; %bb.0: 145; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 146; GFX8-NEXT: v_lshlrev_b32_e32 v0, 4, v0 147; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 148; GFX8-NEXT: s_setpc_b64 s[30:31] 149 %shl = shl i32 %src0, 4 150 %add = add i32 %shl, %src1 151 ret i32 %add 152} 153 154define i32 @v_shl5_add_u32(i32 %src0, i32 %src1) { 155; GFX9-LABEL: v_shl5_add_u32: 156; GFX9: ; %bb.0: 157; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 158; GFX9-NEXT: v_lshl_add_u32 v0, v0, 5, v1 159; GFX9-NEXT: s_setpc_b64 s[30:31] 160; 161; GFX8-LABEL: v_shl5_add_u32: 162; GFX8: ; %bb.0: 163; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 164; GFX8-NEXT: v_lshlrev_b32_e32 v0, 5, v0 165; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 166; GFX8-NEXT: s_setpc_b64 s[30:31] 167 %shl = shl i32 %src0, 5 168 %add = add i32 %shl, %src1 169 ret i32 %add 170} 171 172; FIXME: Use v_lshl_add_u32 173; shift is scalar, but add is vector. 174define amdgpu_ps float @shl1_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { 175; GFX9-LABEL: shl1_add_u32_vgpr1: 176; GFX9: ; %bb.0: 177; GFX9-NEXT: s_lshl_b32 s0, s0, 1 178; GFX9-NEXT: v_add_u32_e32 v0, s0, v0 179; GFX9-NEXT: ; return to shader part epilog 180; 181; GFX8-LABEL: shl1_add_u32_vgpr1: 182; GFX8: ; %bb.0: 183; GFX8-NEXT: s_lshl_b32 s0, s0, 1 184; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0 185; GFX8-NEXT: ; return to shader part epilog 186 %shl = shl i32 %src0, 1 187 %add = add i32 %shl, %src1 188 %cast = bitcast i32 %add to float 189 ret float %cast 190} 191 192define amdgpu_ps float @shl2_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { 193; GFX9-LABEL: shl2_add_u32_vgpr1: 194; GFX9: ; %bb.0: 195; GFX9-NEXT: s_lshl_b32 s0, s0, 2 196; GFX9-NEXT: v_add_u32_e32 v0, s0, v0 197; GFX9-NEXT: ; return to shader part epilog 198; 199; GFX8-LABEL: shl2_add_u32_vgpr1: 200; GFX8: ; %bb.0: 201; GFX8-NEXT: s_lshl_b32 s0, s0, 2 202; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0 203; GFX8-NEXT: ; return to shader part epilog 204 %shl = shl i32 %src0, 2 205 %add = add i32 %shl, %src1 206 %cast = bitcast i32 %add to float 207 ret float %cast 208} 209 210define amdgpu_ps float @shl3_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { 211; GFX9-LABEL: shl3_add_u32_vgpr1: 212; GFX9: ; %bb.0: 213; GFX9-NEXT: s_lshl_b32 s0, s0, 3 214; GFX9-NEXT: v_add_u32_e32 v0, s0, v0 215; GFX9-NEXT: ; return to shader part epilog 216; 217; GFX8-LABEL: shl3_add_u32_vgpr1: 218; GFX8: ; %bb.0: 219; GFX8-NEXT: s_lshl_b32 s0, s0, 3 220; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0 221; GFX8-NEXT: ; return to shader part epilog 222 %shl = shl i32 %src0, 3 223 %add = add i32 %shl, %src1 224 %cast = bitcast i32 %add to float 225 ret float %cast 226} 227 228define amdgpu_ps float @shl4_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { 229; GFX9-LABEL: shl4_add_u32_vgpr1: 230; GFX9: ; %bb.0: 231; GFX9-NEXT: s_lshl_b32 s0, s0, 4 232; GFX9-NEXT: v_add_u32_e32 v0, s0, v0 233; GFX9-NEXT: ; return to shader part epilog 234; 235; GFX8-LABEL: shl4_add_u32_vgpr1: 236; GFX8: ; %bb.0: 237; GFX8-NEXT: s_lshl_b32 s0, s0, 4 238; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0 239; GFX8-NEXT: ; return to shader part epilog 240 %shl = shl i32 %src0, 4 241 %add = add i32 %shl, %src1 242 %cast = bitcast i32 %add to float 243 ret float %cast 244} 245 246define amdgpu_ps float @shl5_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { 247; GFX9-LABEL: shl5_add_u32_vgpr1: 248; GFX9: ; %bb.0: 249; GFX9-NEXT: s_lshl_b32 s0, s0, 5 250; GFX9-NEXT: v_add_u32_e32 v0, s0, v0 251; GFX9-NEXT: ; return to shader part epilog 252; 253; GFX8-LABEL: shl5_add_u32_vgpr1: 254; GFX8: ; %bb.0: 255; GFX8-NEXT: s_lshl_b32 s0, s0, 5 256; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0 257; GFX8-NEXT: ; return to shader part epilog 258 %shl = shl i32 %src0, 5 259 %add = add i32 %shl, %src1 260 %cast = bitcast i32 %add to float 261 ret float %cast 262} 263 264define amdgpu_ps <2 x i32> @s_shl1_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { 265; GFX9-LABEL: s_shl1_add_u32_v2: 266; GFX9: ; %bb.0: 267; GFX9-NEXT: s_lshl1_add_u32 s0, s0, s2 268; GFX9-NEXT: s_lshl1_add_u32 s1, s1, s3 269; GFX9-NEXT: ; return to shader part epilog 270; 271; GFX8-LABEL: s_shl1_add_u32_v2: 272; GFX8: ; %bb.0: 273; GFX8-NEXT: s_lshl_b32 s0, s0, 1 274; GFX8-NEXT: s_lshl_b32 s1, s1, 1 275; GFX8-NEXT: s_add_i32 s0, s0, s2 276; GFX8-NEXT: s_add_i32 s1, s1, s3 277; GFX8-NEXT: ; return to shader part epilog 278 %shl = shl <2 x i32> %src0, <i32 1, i32 1> 279 %add = add <2 x i32> %shl, %src1 280 ret <2 x i32> %add 281} 282 283define amdgpu_ps <2 x i32> @s_shl2_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { 284; GFX9-LABEL: s_shl2_add_u32_v2: 285; GFX9: ; %bb.0: 286; GFX9-NEXT: s_lshl2_add_u32 s0, s0, s2 287; GFX9-NEXT: s_lshl2_add_u32 s1, s1, s3 288; GFX9-NEXT: ; return to shader part epilog 289; 290; GFX8-LABEL: s_shl2_add_u32_v2: 291; GFX8: ; %bb.0: 292; GFX8-NEXT: s_lshl_b32 s0, s0, 2 293; GFX8-NEXT: s_lshl_b32 s1, s1, 2 294; GFX8-NEXT: s_add_i32 s0, s0, s2 295; GFX8-NEXT: s_add_i32 s1, s1, s3 296; GFX8-NEXT: ; return to shader part epilog 297 %shl = shl <2 x i32> %src0, <i32 2, i32 2> 298 %add = add <2 x i32> %shl, %src1 299 ret <2 x i32> %add 300} 301 302define amdgpu_ps <2 x i32> @s_shl3_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { 303; GFX9-LABEL: s_shl3_add_u32_v2: 304; GFX9: ; %bb.0: 305; GFX9-NEXT: s_lshl3_add_u32 s0, s0, s2 306; GFX9-NEXT: s_lshl3_add_u32 s1, s1, s3 307; GFX9-NEXT: ; return to shader part epilog 308; 309; GFX8-LABEL: s_shl3_add_u32_v2: 310; GFX8: ; %bb.0: 311; GFX8-NEXT: s_lshl_b32 s0, s0, 3 312; GFX8-NEXT: s_lshl_b32 s1, s1, 3 313; GFX8-NEXT: s_add_i32 s0, s0, s2 314; GFX8-NEXT: s_add_i32 s1, s1, s3 315; GFX8-NEXT: ; return to shader part epilog 316 %shl = shl <2 x i32> %src0, <i32 3, i32 3> 317 %add = add <2 x i32> %shl, %src1 318 ret <2 x i32> %add 319} 320 321define amdgpu_ps <2 x i32> @s_shl4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { 322; GFX9-LABEL: s_shl4_add_u32_v2: 323; GFX9: ; %bb.0: 324; GFX9-NEXT: s_lshl4_add_u32 s0, s0, s2 325; GFX9-NEXT: s_lshl4_add_u32 s1, s1, s3 326; GFX9-NEXT: ; return to shader part epilog 327; 328; GFX8-LABEL: s_shl4_add_u32_v2: 329; GFX8: ; %bb.0: 330; GFX8-NEXT: s_lshl_b32 s0, s0, 4 331; GFX8-NEXT: s_lshl_b32 s1, s1, 4 332; GFX8-NEXT: s_add_i32 s0, s0, s2 333; GFX8-NEXT: s_add_i32 s1, s1, s3 334; GFX8-NEXT: ; return to shader part epilog 335 %shl = shl <2 x i32> %src0, <i32 4, i32 4> 336 %add = add <2 x i32> %shl, %src1 337 ret <2 x i32> %add 338} 339 340define amdgpu_ps <2 x i32> @s_shl_2_4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { 341; GFX9-LABEL: s_shl_2_4_add_u32_v2: 342; GFX9: ; %bb.0: 343; GFX9-NEXT: s_lshl2_add_u32 s0, s0, s2 344; GFX9-NEXT: s_lshl4_add_u32 s1, s1, s3 345; GFX9-NEXT: ; return to shader part epilog 346; 347; GFX8-LABEL: s_shl_2_4_add_u32_v2: 348; GFX8: ; %bb.0: 349; GFX8-NEXT: s_lshl_b32 s0, s0, 2 350; GFX8-NEXT: s_lshl_b32 s1, s1, 4 351; GFX8-NEXT: s_add_i32 s0, s0, s2 352; GFX8-NEXT: s_add_i32 s1, s1, s3 353; GFX8-NEXT: ; return to shader part epilog 354 %shl = shl <2 x i32> %src0, <i32 2, i32 4> 355 %add = add <2 x i32> %shl, %src1 356 ret <2 x i32> %add 357} 358 359define amdgpu_ps { i32, i32 } @s_shl4_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) { 360; GCN-LABEL: s_shl4_add_u32_multi_use: 361; GCN: ; %bb.0: 362; GCN-NEXT: s_lshl_b32 s0, s0, 4 363; GCN-NEXT: s_add_i32 s1, s0, s1 364; GCN-NEXT: ; return to shader part epilog 365 %shl = shl i32 %src0, 4 366 %add = add i32 %shl, %src1 367 %insert0 = insertvalue { i32, i32 } undef, i32 %shl, 0 368 %insert1 = insertvalue { i32, i32 } %insert0, i32 %add, 1 369 ret { i32, i32 } %insert1 370} 371 372define amdgpu_ps { i32, i32 } @s_shl3_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) { 373; GCN-LABEL: s_shl3_add_u32_multi_use: 374; GCN: ; %bb.0: 375; GCN-NEXT: s_lshl_b32 s0, s0, 3 376; GCN-NEXT: s_add_i32 s1, s0, s1 377; GCN-NEXT: ; return to shader part epilog 378 %shl = shl i32 %src0, 3 379 %add = add i32 %shl, %src1 380 %insert0 = insertvalue { i32, i32 } undef, i32 %shl, 0 381 %insert1 = insertvalue { i32, i32 } %insert0, i32 %add, 1 382 ret { i32, i32 } %insert1 383} 384 385define amdgpu_ps { i32, i32 } @s_shl2_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) { 386; GCN-LABEL: s_shl2_add_u32_multi_use: 387; GCN: ; %bb.0: 388; GCN-NEXT: s_lshl_b32 s0, s0, 2 389; GCN-NEXT: s_add_i32 s1, s0, s1 390; GCN-NEXT: ; return to shader part epilog 391 %shl = shl i32 %src0, 2 392 %add = add i32 %shl, %src1 393 %insert0 = insertvalue { i32, i32 } undef, i32 %shl, 0 394 %insert1 = insertvalue { i32, i32 } %insert0, i32 %add, 1 395 ret { i32, i32 } %insert1 396} 397 398 399define amdgpu_ps { i32, i32 } @s_shl1_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) { 400; GCN-LABEL: s_shl1_add_u32_multi_use: 401; GCN: ; %bb.0: 402; GCN-NEXT: s_lshl_b32 s0, s0, 1 403; GCN-NEXT: s_add_i32 s1, s0, s1 404; GCN-NEXT: ; return to shader part epilog 405 %shl = shl i32 %src0, 1 406 %add = add i32 %shl, %src1 407 %insert0 = insertvalue { i32, i32 } undef, i32 %shl, 0 408 %insert1 = insertvalue { i32, i32 } %insert0, i32 %add, 1 409 ret { i32, i32 } %insert1 410} 411