1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -O0 -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 3 4; FIXME: we should disable sdwa peephole because dead-code elimination, that 5; runs after peephole, ruins this test (different register numbers) 6 7; Spill all SGPRs so multiple VGPRs are required for spilling all of them. 8 9; Ideally we only need 2 VGPRs for all spilling. The VGPRs are 10; allocated per-frame index, so it's possible to get up with more. 11define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out, i32 %in) #0 { 12; GCN-LABEL: spill_sgprs_to_multiple_vgprs: 13; GCN: ; %bb.0: 14; GCN-NEXT: s_load_dword s0, s[0:1], 0xb 15; GCN-NEXT: ;;#ASMSTART 16; GCN-NEXT: ; def s[4:11] 17; GCN-NEXT: ;;#ASMEND 18; GCN-NEXT: v_writelane_b32 v0, s4, 0 19; GCN-NEXT: v_writelane_b32 v0, s5, 1 20; GCN-NEXT: v_writelane_b32 v0, s6, 2 21; GCN-NEXT: v_writelane_b32 v0, s7, 3 22; GCN-NEXT: v_writelane_b32 v0, s8, 4 23; GCN-NEXT: v_writelane_b32 v0, s9, 5 24; GCN-NEXT: v_writelane_b32 v0, s10, 6 25; GCN-NEXT: v_writelane_b32 v0, s11, 7 26; GCN-NEXT: ;;#ASMSTART 27; GCN-NEXT: ; def s[4:11] 28; GCN-NEXT: ;;#ASMEND 29; GCN-NEXT: v_writelane_b32 v0, s4, 8 30; GCN-NEXT: v_writelane_b32 v0, s5, 9 31; GCN-NEXT: v_writelane_b32 v0, s6, 10 32; GCN-NEXT: v_writelane_b32 v0, s7, 11 33; GCN-NEXT: v_writelane_b32 v0, s8, 12 34; GCN-NEXT: v_writelane_b32 v0, s9, 13 35; GCN-NEXT: v_writelane_b32 v0, s10, 14 36; GCN-NEXT: v_writelane_b32 v0, s11, 15 37; GCN-NEXT: ;;#ASMSTART 38; GCN-NEXT: ; def s[4:11] 39; GCN-NEXT: ;;#ASMEND 40; GCN-NEXT: v_writelane_b32 v0, s4, 16 41; GCN-NEXT: v_writelane_b32 v0, s5, 17 42; GCN-NEXT: v_writelane_b32 v0, s6, 18 43; GCN-NEXT: v_writelane_b32 v0, s7, 19 44; GCN-NEXT: v_writelane_b32 v0, s8, 20 45; GCN-NEXT: v_writelane_b32 v0, s9, 21 46; GCN-NEXT: v_writelane_b32 v0, s10, 22 47; GCN-NEXT: v_writelane_b32 v0, s11, 23 48; GCN-NEXT: ;;#ASMSTART 49; GCN-NEXT: ; def s[4:11] 50; GCN-NEXT: ;;#ASMEND 51; GCN-NEXT: v_writelane_b32 v0, s4, 24 52; GCN-NEXT: v_writelane_b32 v0, s5, 25 53; GCN-NEXT: v_writelane_b32 v0, s6, 26 54; GCN-NEXT: v_writelane_b32 v0, s7, 27 55; GCN-NEXT: v_writelane_b32 v0, s8, 28 56; GCN-NEXT: v_writelane_b32 v0, s9, 29 57; GCN-NEXT: v_writelane_b32 v0, s10, 30 58; GCN-NEXT: v_writelane_b32 v0, s11, 31 59; GCN-NEXT: ;;#ASMSTART 60; GCN-NEXT: ; def s[4:11] 61; GCN-NEXT: ;;#ASMEND 62; GCN-NEXT: v_writelane_b32 v0, s4, 32 63; GCN-NEXT: v_writelane_b32 v0, s5, 33 64; GCN-NEXT: v_writelane_b32 v0, s6, 34 65; GCN-NEXT: v_writelane_b32 v0, s7, 35 66; GCN-NEXT: v_writelane_b32 v0, s8, 36 67; GCN-NEXT: v_writelane_b32 v0, s9, 37 68; GCN-NEXT: v_writelane_b32 v0, s10, 38 69; GCN-NEXT: v_writelane_b32 v0, s11, 39 70; GCN-NEXT: ;;#ASMSTART 71; GCN-NEXT: ; def s[4:11] 72; GCN-NEXT: ;;#ASMEND 73; GCN-NEXT: v_writelane_b32 v0, s4, 40 74; GCN-NEXT: v_writelane_b32 v0, s5, 41 75; GCN-NEXT: v_writelane_b32 v0, s6, 42 76; GCN-NEXT: v_writelane_b32 v0, s7, 43 77; GCN-NEXT: v_writelane_b32 v0, s8, 44 78; GCN-NEXT: v_writelane_b32 v0, s9, 45 79; GCN-NEXT: v_writelane_b32 v0, s10, 46 80; GCN-NEXT: v_writelane_b32 v0, s11, 47 81; GCN-NEXT: ;;#ASMSTART 82; GCN-NEXT: ; def s[4:11] 83; GCN-NEXT: ;;#ASMEND 84; GCN-NEXT: v_writelane_b32 v0, s4, 48 85; GCN-NEXT: v_writelane_b32 v0, s5, 49 86; GCN-NEXT: v_writelane_b32 v0, s6, 50 87; GCN-NEXT: v_writelane_b32 v0, s7, 51 88; GCN-NEXT: v_writelane_b32 v0, s8, 52 89; GCN-NEXT: v_writelane_b32 v0, s9, 53 90; GCN-NEXT: v_writelane_b32 v0, s10, 54 91; GCN-NEXT: v_writelane_b32 v0, s11, 55 92; GCN-NEXT: ;;#ASMSTART 93; GCN-NEXT: ; def s[4:11] 94; GCN-NEXT: ;;#ASMEND 95; GCN-NEXT: v_writelane_b32 v0, s4, 56 96; GCN-NEXT: v_writelane_b32 v0, s5, 57 97; GCN-NEXT: v_writelane_b32 v0, s6, 58 98; GCN-NEXT: v_writelane_b32 v0, s7, 59 99; GCN-NEXT: v_writelane_b32 v0, s8, 60 100; GCN-NEXT: v_writelane_b32 v0, s9, 61 101; GCN-NEXT: v_writelane_b32 v0, s10, 62 102; GCN-NEXT: v_writelane_b32 v0, s11, 63 103; GCN-NEXT: ;;#ASMSTART 104; GCN-NEXT: ; def s[4:11] 105; GCN-NEXT: ;;#ASMEND 106; GCN-NEXT: v_writelane_b32 v1, s4, 0 107; GCN-NEXT: v_writelane_b32 v1, s5, 1 108; GCN-NEXT: v_writelane_b32 v1, s6, 2 109; GCN-NEXT: v_writelane_b32 v1, s7, 3 110; GCN-NEXT: v_writelane_b32 v1, s8, 4 111; GCN-NEXT: v_writelane_b32 v1, s9, 5 112; GCN-NEXT: v_writelane_b32 v1, s10, 6 113; GCN-NEXT: v_writelane_b32 v1, s11, 7 114; GCN-NEXT: ;;#ASMSTART 115; GCN-NEXT: ; def s[4:11] 116; GCN-NEXT: ;;#ASMEND 117; GCN-NEXT: v_writelane_b32 v1, s4, 8 118; GCN-NEXT: v_writelane_b32 v1, s5, 9 119; GCN-NEXT: v_writelane_b32 v1, s6, 10 120; GCN-NEXT: v_writelane_b32 v1, s7, 11 121; GCN-NEXT: v_writelane_b32 v1, s8, 12 122; GCN-NEXT: v_writelane_b32 v1, s9, 13 123; GCN-NEXT: v_writelane_b32 v1, s10, 14 124; GCN-NEXT: v_writelane_b32 v1, s11, 15 125; GCN-NEXT: ;;#ASMSTART 126; GCN-NEXT: ; def s[4:11] 127; GCN-NEXT: ;;#ASMEND 128; GCN-NEXT: v_writelane_b32 v1, s4, 16 129; GCN-NEXT: v_writelane_b32 v1, s5, 17 130; GCN-NEXT: v_writelane_b32 v1, s6, 18 131; GCN-NEXT: v_writelane_b32 v1, s7, 19 132; GCN-NEXT: v_writelane_b32 v1, s8, 20 133; GCN-NEXT: v_writelane_b32 v1, s9, 21 134; GCN-NEXT: v_writelane_b32 v1, s10, 22 135; GCN-NEXT: v_writelane_b32 v1, s11, 23 136; GCN-NEXT: ;;#ASMSTART 137; GCN-NEXT: ; def s[4:11] 138; GCN-NEXT: ;;#ASMEND 139; GCN-NEXT: v_writelane_b32 v1, s4, 24 140; GCN-NEXT: v_writelane_b32 v1, s5, 25 141; GCN-NEXT: v_writelane_b32 v1, s6, 26 142; GCN-NEXT: v_writelane_b32 v1, s7, 27 143; GCN-NEXT: v_writelane_b32 v1, s8, 28 144; GCN-NEXT: v_writelane_b32 v1, s9, 29 145; GCN-NEXT: v_writelane_b32 v1, s10, 30 146; GCN-NEXT: v_writelane_b32 v1, s11, 31 147; GCN-NEXT: ;;#ASMSTART 148; GCN-NEXT: ; def s[4:11] 149; GCN-NEXT: ;;#ASMEND 150; GCN-NEXT: v_writelane_b32 v1, s4, 32 151; GCN-NEXT: v_writelane_b32 v1, s5, 33 152; GCN-NEXT: v_writelane_b32 v1, s6, 34 153; GCN-NEXT: v_writelane_b32 v1, s7, 35 154; GCN-NEXT: v_writelane_b32 v1, s8, 36 155; GCN-NEXT: v_writelane_b32 v1, s9, 37 156; GCN-NEXT: v_writelane_b32 v1, s10, 38 157; GCN-NEXT: v_writelane_b32 v1, s11, 39 158; GCN-NEXT: ;;#ASMSTART 159; GCN-NEXT: ; def s[4:11] 160; GCN-NEXT: ;;#ASMEND 161; GCN-NEXT: v_writelane_b32 v1, s4, 40 162; GCN-NEXT: v_writelane_b32 v1, s5, 41 163; GCN-NEXT: v_writelane_b32 v1, s6, 42 164; GCN-NEXT: v_writelane_b32 v1, s7, 43 165; GCN-NEXT: v_writelane_b32 v1, s8, 44 166; GCN-NEXT: v_writelane_b32 v1, s9, 45 167; GCN-NEXT: v_writelane_b32 v1, s10, 46 168; GCN-NEXT: v_writelane_b32 v1, s11, 47 169; GCN-NEXT: ;;#ASMSTART 170; GCN-NEXT: ; def s[4:11] 171; GCN-NEXT: ;;#ASMEND 172; GCN-NEXT: v_writelane_b32 v1, s4, 48 173; GCN-NEXT: v_writelane_b32 v1, s5, 49 174; GCN-NEXT: v_writelane_b32 v1, s6, 50 175; GCN-NEXT: v_writelane_b32 v1, s7, 51 176; GCN-NEXT: v_writelane_b32 v1, s8, 52 177; GCN-NEXT: v_writelane_b32 v1, s9, 53 178; GCN-NEXT: v_writelane_b32 v1, s10, 54 179; GCN-NEXT: v_writelane_b32 v1, s11, 55 180; GCN-NEXT: ;;#ASMSTART 181; GCN-NEXT: ; def s[4:11] 182; GCN-NEXT: ;;#ASMEND 183; GCN-NEXT: v_writelane_b32 v1, s4, 56 184; GCN-NEXT: v_writelane_b32 v1, s5, 57 185; GCN-NEXT: v_writelane_b32 v1, s6, 58 186; GCN-NEXT: v_writelane_b32 v1, s7, 59 187; GCN-NEXT: v_writelane_b32 v1, s8, 60 188; GCN-NEXT: v_writelane_b32 v1, s9, 61 189; GCN-NEXT: v_writelane_b32 v1, s10, 62 190; GCN-NEXT: v_writelane_b32 v1, s11, 63 191; GCN-NEXT: ;;#ASMSTART 192; GCN-NEXT: ; def s[4:11] 193; GCN-NEXT: ;;#ASMEND 194; GCN-NEXT: v_writelane_b32 v2, s4, 0 195; GCN-NEXT: v_writelane_b32 v2, s5, 1 196; GCN-NEXT: v_writelane_b32 v2, s6, 2 197; GCN-NEXT: v_writelane_b32 v2, s7, 3 198; GCN-NEXT: v_writelane_b32 v2, s8, 4 199; GCN-NEXT: v_writelane_b32 v2, s9, 5 200; GCN-NEXT: v_writelane_b32 v2, s10, 6 201; GCN-NEXT: v_writelane_b32 v2, s11, 7 202; GCN-NEXT: s_mov_b32 s1, 0 203; GCN-NEXT: s_waitcnt lgkmcnt(0) 204; GCN-NEXT: s_cmp_lg_u32 s0, s1 205; GCN-NEXT: s_cbranch_scc1 BB0_2 206; GCN-NEXT: ; %bb.1: ; %bb0 207; GCN-NEXT: v_readlane_b32 s8, v1, 56 208; GCN-NEXT: v_readlane_b32 s9, v1, 57 209; GCN-NEXT: v_readlane_b32 s10, v1, 58 210; GCN-NEXT: v_readlane_b32 s11, v1, 59 211; GCN-NEXT: v_readlane_b32 s12, v1, 60 212; GCN-NEXT: v_readlane_b32 s13, v1, 61 213; GCN-NEXT: v_readlane_b32 s14, v1, 62 214; GCN-NEXT: v_readlane_b32 s15, v1, 63 215; GCN-NEXT: v_readlane_b32 s16, v1, 48 216; GCN-NEXT: v_readlane_b32 s17, v1, 49 217; GCN-NEXT: v_readlane_b32 s18, v1, 50 218; GCN-NEXT: v_readlane_b32 s19, v1, 51 219; GCN-NEXT: v_readlane_b32 s20, v1, 52 220; GCN-NEXT: v_readlane_b32 s21, v1, 53 221; GCN-NEXT: v_readlane_b32 s22, v1, 54 222; GCN-NEXT: v_readlane_b32 s23, v1, 55 223; GCN-NEXT: v_readlane_b32 s24, v1, 40 224; GCN-NEXT: v_readlane_b32 s25, v1, 41 225; GCN-NEXT: v_readlane_b32 s26, v1, 42 226; GCN-NEXT: v_readlane_b32 s27, v1, 43 227; GCN-NEXT: v_readlane_b32 s28, v1, 44 228; GCN-NEXT: v_readlane_b32 s29, v1, 45 229; GCN-NEXT: v_readlane_b32 s30, v1, 46 230; GCN-NEXT: v_readlane_b32 s31, v1, 47 231; GCN-NEXT: v_readlane_b32 s36, v1, 32 232; GCN-NEXT: v_readlane_b32 s37, v1, 33 233; GCN-NEXT: v_readlane_b32 s38, v1, 34 234; GCN-NEXT: v_readlane_b32 s39, v1, 35 235; GCN-NEXT: v_readlane_b32 s40, v1, 36 236; GCN-NEXT: v_readlane_b32 s41, v1, 37 237; GCN-NEXT: v_readlane_b32 s42, v1, 38 238; GCN-NEXT: v_readlane_b32 s43, v1, 39 239; GCN-NEXT: v_readlane_b32 s44, v1, 24 240; GCN-NEXT: v_readlane_b32 s45, v1, 25 241; GCN-NEXT: v_readlane_b32 s46, v1, 26 242; GCN-NEXT: v_readlane_b32 s47, v1, 27 243; GCN-NEXT: v_readlane_b32 s48, v1, 28 244; GCN-NEXT: v_readlane_b32 s49, v1, 29 245; GCN-NEXT: v_readlane_b32 s50, v1, 30 246; GCN-NEXT: v_readlane_b32 s51, v1, 31 247; GCN-NEXT: v_readlane_b32 s52, v1, 16 248; GCN-NEXT: v_readlane_b32 s53, v1, 17 249; GCN-NEXT: v_readlane_b32 s54, v1, 18 250; GCN-NEXT: v_readlane_b32 s55, v1, 19 251; GCN-NEXT: v_readlane_b32 s56, v1, 20 252; GCN-NEXT: v_readlane_b32 s57, v1, 21 253; GCN-NEXT: v_readlane_b32 s58, v1, 22 254; GCN-NEXT: v_readlane_b32 s59, v1, 23 255; GCN-NEXT: v_readlane_b32 s60, v1, 8 256; GCN-NEXT: v_readlane_b32 s61, v1, 9 257; GCN-NEXT: v_readlane_b32 s62, v1, 10 258; GCN-NEXT: v_readlane_b32 s63, v1, 11 259; GCN-NEXT: v_readlane_b32 s64, v1, 12 260; GCN-NEXT: v_readlane_b32 s65, v1, 13 261; GCN-NEXT: v_readlane_b32 s66, v1, 14 262; GCN-NEXT: v_readlane_b32 s67, v1, 15 263; GCN-NEXT: v_readlane_b32 s68, v1, 0 264; GCN-NEXT: v_readlane_b32 s69, v1, 1 265; GCN-NEXT: v_readlane_b32 s70, v1, 2 266; GCN-NEXT: v_readlane_b32 s71, v1, 3 267; GCN-NEXT: v_readlane_b32 s72, v1, 4 268; GCN-NEXT: v_readlane_b32 s73, v1, 5 269; GCN-NEXT: v_readlane_b32 s74, v1, 6 270; GCN-NEXT: v_readlane_b32 s75, v1, 7 271; GCN-NEXT: v_readlane_b32 s76, v0, 56 272; GCN-NEXT: v_readlane_b32 s77, v0, 57 273; GCN-NEXT: v_readlane_b32 s78, v0, 58 274; GCN-NEXT: v_readlane_b32 s79, v0, 59 275; GCN-NEXT: v_readlane_b32 s80, v0, 60 276; GCN-NEXT: v_readlane_b32 s81, v0, 61 277; GCN-NEXT: v_readlane_b32 s82, v0, 62 278; GCN-NEXT: v_readlane_b32 s83, v0, 63 279; GCN-NEXT: v_readlane_b32 s84, v0, 48 280; GCN-NEXT: v_readlane_b32 s85, v0, 49 281; GCN-NEXT: v_readlane_b32 s86, v0, 50 282; GCN-NEXT: v_readlane_b32 s87, v0, 51 283; GCN-NEXT: v_readlane_b32 s88, v0, 52 284; GCN-NEXT: v_readlane_b32 s89, v0, 53 285; GCN-NEXT: v_readlane_b32 s90, v0, 54 286; GCN-NEXT: v_readlane_b32 s91, v0, 55 287; GCN-NEXT: v_readlane_b32 s0, v0, 0 288; GCN-NEXT: v_readlane_b32 s1, v0, 1 289; GCN-NEXT: v_readlane_b32 s2, v0, 2 290; GCN-NEXT: v_readlane_b32 s3, v0, 3 291; GCN-NEXT: v_readlane_b32 s4, v0, 4 292; GCN-NEXT: v_readlane_b32 s5, v0, 5 293; GCN-NEXT: v_readlane_b32 s6, v0, 6 294; GCN-NEXT: v_readlane_b32 s7, v0, 7 295; GCN-NEXT: ;;#ASMSTART 296; GCN-NEXT: ; use s[0:7] 297; GCN-NEXT: ;;#ASMEND 298; GCN-NEXT: v_readlane_b32 s0, v0, 8 299; GCN-NEXT: v_readlane_b32 s1, v0, 9 300; GCN-NEXT: v_readlane_b32 s2, v0, 10 301; GCN-NEXT: v_readlane_b32 s3, v0, 11 302; GCN-NEXT: v_readlane_b32 s4, v0, 12 303; GCN-NEXT: v_readlane_b32 s5, v0, 13 304; GCN-NEXT: v_readlane_b32 s6, v0, 14 305; GCN-NEXT: v_readlane_b32 s7, v0, 15 306; GCN-NEXT: ;;#ASMSTART 307; GCN-NEXT: ; use s[0:7] 308; GCN-NEXT: ;;#ASMEND 309; GCN-NEXT: v_readlane_b32 s0, v0, 16 310; GCN-NEXT: v_readlane_b32 s1, v0, 17 311; GCN-NEXT: v_readlane_b32 s2, v0, 18 312; GCN-NEXT: v_readlane_b32 s3, v0, 19 313; GCN-NEXT: v_readlane_b32 s4, v0, 20 314; GCN-NEXT: v_readlane_b32 s5, v0, 21 315; GCN-NEXT: v_readlane_b32 s6, v0, 22 316; GCN-NEXT: v_readlane_b32 s7, v0, 23 317; GCN-NEXT: ;;#ASMSTART 318; GCN-NEXT: ; use s[0:7] 319; GCN-NEXT: ;;#ASMEND 320; GCN-NEXT: v_readlane_b32 s0, v0, 24 321; GCN-NEXT: v_readlane_b32 s1, v0, 25 322; GCN-NEXT: v_readlane_b32 s2, v0, 26 323; GCN-NEXT: v_readlane_b32 s3, v0, 27 324; GCN-NEXT: v_readlane_b32 s4, v0, 28 325; GCN-NEXT: v_readlane_b32 s5, v0, 29 326; GCN-NEXT: v_readlane_b32 s6, v0, 30 327; GCN-NEXT: v_readlane_b32 s7, v0, 31 328; GCN-NEXT: ;;#ASMSTART 329; GCN-NEXT: ; use s[0:7] 330; GCN-NEXT: ;;#ASMEND 331; GCN-NEXT: v_readlane_b32 s0, v0, 32 332; GCN-NEXT: v_readlane_b32 s1, v0, 33 333; GCN-NEXT: v_readlane_b32 s2, v0, 34 334; GCN-NEXT: v_readlane_b32 s3, v0, 35 335; GCN-NEXT: v_readlane_b32 s4, v0, 36 336; GCN-NEXT: v_readlane_b32 s5, v0, 37 337; GCN-NEXT: v_readlane_b32 s6, v0, 38 338; GCN-NEXT: v_readlane_b32 s7, v0, 39 339; GCN-NEXT: ;;#ASMSTART 340; GCN-NEXT: ; use s[0:7] 341; GCN-NEXT: ;;#ASMEND 342; GCN-NEXT: v_readlane_b32 s0, v0, 40 343; GCN-NEXT: v_readlane_b32 s1, v0, 41 344; GCN-NEXT: v_readlane_b32 s2, v0, 42 345; GCN-NEXT: v_readlane_b32 s3, v0, 43 346; GCN-NEXT: v_readlane_b32 s4, v0, 44 347; GCN-NEXT: v_readlane_b32 s5, v0, 45 348; GCN-NEXT: v_readlane_b32 s6, v0, 46 349; GCN-NEXT: v_readlane_b32 s7, v0, 47 350; GCN-NEXT: ;;#ASMSTART 351; GCN-NEXT: ; use s[0:7] 352; GCN-NEXT: ;;#ASMEND 353; GCN-NEXT: v_readlane_b32 s0, v2, 0 354; GCN-NEXT: v_readlane_b32 s1, v2, 1 355; GCN-NEXT: v_readlane_b32 s2, v2, 2 356; GCN-NEXT: v_readlane_b32 s3, v2, 3 357; GCN-NEXT: v_readlane_b32 s4, v2, 4 358; GCN-NEXT: v_readlane_b32 s5, v2, 5 359; GCN-NEXT: v_readlane_b32 s6, v2, 6 360; GCN-NEXT: v_readlane_b32 s7, v2, 7 361; GCN-NEXT: ;;#ASMSTART 362; GCN-NEXT: ; use s[84:91] 363; GCN-NEXT: ;;#ASMEND 364; GCN-NEXT: ;;#ASMSTART 365; GCN-NEXT: ; use s[76:83] 366; GCN-NEXT: ;;#ASMEND 367; GCN-NEXT: ;;#ASMSTART 368; GCN-NEXT: ; use s[68:75] 369; GCN-NEXT: ;;#ASMEND 370; GCN-NEXT: ;;#ASMSTART 371; GCN-NEXT: ; use s[60:67] 372; GCN-NEXT: ;;#ASMEND 373; GCN-NEXT: ;;#ASMSTART 374; GCN-NEXT: ; use s[52:59] 375; GCN-NEXT: ;;#ASMEND 376; GCN-NEXT: ;;#ASMSTART 377; GCN-NEXT: ; use s[44:51] 378; GCN-NEXT: ;;#ASMEND 379; GCN-NEXT: ;;#ASMSTART 380; GCN-NEXT: ; use s[36:43] 381; GCN-NEXT: ;;#ASMEND 382; GCN-NEXT: ;;#ASMSTART 383; GCN-NEXT: ; use s[24:31] 384; GCN-NEXT: ;;#ASMEND 385; GCN-NEXT: ;;#ASMSTART 386; GCN-NEXT: ; use s[16:23] 387; GCN-NEXT: ;;#ASMEND 388; GCN-NEXT: ;;#ASMSTART 389; GCN-NEXT: ; use s[8:15] 390; GCN-NEXT: ;;#ASMEND 391; GCN-NEXT: ;;#ASMSTART 392; GCN-NEXT: ; use s[0:7] 393; GCN-NEXT: ;;#ASMEND 394; GCN-NEXT: BB0_2: ; %ret 395; GCN-NEXT: s_endpgm 396 %wide.sgpr0 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 397 %wide.sgpr1 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 398 %wide.sgpr2 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 399 %wide.sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 400 %wide.sgpr4 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 401 %wide.sgpr5 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 402 %wide.sgpr6 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 403 %wide.sgpr7 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 404 %wide.sgpr8 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 405 %wide.sgpr9 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 406 %wide.sgpr10 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 407 %wide.sgpr11 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 408 %wide.sgpr12 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 409 %wide.sgpr13 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 410 %wide.sgpr14 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 411 %wide.sgpr15 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 412 %wide.sgpr16 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 413 %cmp = icmp eq i32 %in, 0 414 br i1 %cmp, label %bb0, label %ret 415 416bb0: 417 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr0) #0 418 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr1) #0 419 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr2) #0 420 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr3) #0 421 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr4) #0 422 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr5) #0 423 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr6) #0 424 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr7) #0 425 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr8) #0 426 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr9) #0 427 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr10) #0 428 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr11) #0 429 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr12) #0 430 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr13) #0 431 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr14) #0 432 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr15) #0 433 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr16) #0 434 br label %ret 435 436ret: 437 ret void 438} 439 440; Some of the lanes of an SGPR spill are in one VGPR and some forced 441; into the next available VGPR. 442define amdgpu_kernel void @split_sgpr_spill_2_vgprs(i32 addrspace(1)* %out, i32 %in) #1 { 443; GCN-LABEL: split_sgpr_spill_2_vgprs: 444; GCN: ; %bb.0: 445; GCN-NEXT: s_load_dword s0, s[0:1], 0xb 446; GCN-NEXT: ;;#ASMSTART 447; GCN-NEXT: ; def s[4:19] 448; GCN-NEXT: ;;#ASMEND 449; GCN-NEXT: v_writelane_b32 v0, s4, 0 450; GCN-NEXT: v_writelane_b32 v0, s5, 1 451; GCN-NEXT: v_writelane_b32 v0, s6, 2 452; GCN-NEXT: v_writelane_b32 v0, s7, 3 453; GCN-NEXT: v_writelane_b32 v0, s8, 4 454; GCN-NEXT: v_writelane_b32 v0, s9, 5 455; GCN-NEXT: v_writelane_b32 v0, s10, 6 456; GCN-NEXT: v_writelane_b32 v0, s11, 7 457; GCN-NEXT: v_writelane_b32 v0, s12, 8 458; GCN-NEXT: v_writelane_b32 v0, s13, 9 459; GCN-NEXT: v_writelane_b32 v0, s14, 10 460; GCN-NEXT: v_writelane_b32 v0, s15, 11 461; GCN-NEXT: v_writelane_b32 v0, s16, 12 462; GCN-NEXT: v_writelane_b32 v0, s17, 13 463; GCN-NEXT: v_writelane_b32 v0, s18, 14 464; GCN-NEXT: v_writelane_b32 v0, s19, 15 465; GCN-NEXT: ;;#ASMSTART 466; GCN-NEXT: ; def s[4:19] 467; GCN-NEXT: ;;#ASMEND 468; GCN-NEXT: v_writelane_b32 v0, s4, 16 469; GCN-NEXT: v_writelane_b32 v0, s5, 17 470; GCN-NEXT: v_writelane_b32 v0, s6, 18 471; GCN-NEXT: v_writelane_b32 v0, s7, 19 472; GCN-NEXT: v_writelane_b32 v0, s8, 20 473; GCN-NEXT: v_writelane_b32 v0, s9, 21 474; GCN-NEXT: v_writelane_b32 v0, s10, 22 475; GCN-NEXT: v_writelane_b32 v0, s11, 23 476; GCN-NEXT: v_writelane_b32 v0, s12, 24 477; GCN-NEXT: v_writelane_b32 v0, s13, 25 478; GCN-NEXT: v_writelane_b32 v0, s14, 26 479; GCN-NEXT: v_writelane_b32 v0, s15, 27 480; GCN-NEXT: v_writelane_b32 v0, s16, 28 481; GCN-NEXT: v_writelane_b32 v0, s17, 29 482; GCN-NEXT: v_writelane_b32 v0, s18, 30 483; GCN-NEXT: v_writelane_b32 v0, s19, 31 484; GCN-NEXT: ;;#ASMSTART 485; GCN-NEXT: ; def s[4:19] 486; GCN-NEXT: ;;#ASMEND 487; GCN-NEXT: v_writelane_b32 v0, s4, 32 488; GCN-NEXT: v_writelane_b32 v0, s5, 33 489; GCN-NEXT: v_writelane_b32 v0, s6, 34 490; GCN-NEXT: v_writelane_b32 v0, s7, 35 491; GCN-NEXT: v_writelane_b32 v0, s8, 36 492; GCN-NEXT: v_writelane_b32 v0, s9, 37 493; GCN-NEXT: v_writelane_b32 v0, s10, 38 494; GCN-NEXT: v_writelane_b32 v0, s11, 39 495; GCN-NEXT: v_writelane_b32 v0, s12, 40 496; GCN-NEXT: v_writelane_b32 v0, s13, 41 497; GCN-NEXT: v_writelane_b32 v0, s14, 42 498; GCN-NEXT: v_writelane_b32 v0, s15, 43 499; GCN-NEXT: v_writelane_b32 v0, s16, 44 500; GCN-NEXT: v_writelane_b32 v0, s17, 45 501; GCN-NEXT: v_writelane_b32 v0, s18, 46 502; GCN-NEXT: v_writelane_b32 v0, s19, 47 503; GCN-NEXT: ;;#ASMSTART 504; GCN-NEXT: ; def s[4:19] 505; GCN-NEXT: ;;#ASMEND 506; GCN-NEXT: v_writelane_b32 v0, s4, 48 507; GCN-NEXT: v_writelane_b32 v0, s5, 49 508; GCN-NEXT: v_writelane_b32 v0, s6, 50 509; GCN-NEXT: v_writelane_b32 v0, s7, 51 510; GCN-NEXT: v_writelane_b32 v0, s8, 52 511; GCN-NEXT: v_writelane_b32 v0, s9, 53 512; GCN-NEXT: v_writelane_b32 v0, s10, 54 513; GCN-NEXT: v_writelane_b32 v0, s11, 55 514; GCN-NEXT: v_writelane_b32 v0, s12, 56 515; GCN-NEXT: v_writelane_b32 v0, s13, 57 516; GCN-NEXT: v_writelane_b32 v0, s14, 58 517; GCN-NEXT: v_writelane_b32 v0, s15, 59 518; GCN-NEXT: v_writelane_b32 v0, s16, 60 519; GCN-NEXT: v_writelane_b32 v0, s17, 61 520; GCN-NEXT: v_writelane_b32 v0, s18, 62 521; GCN-NEXT: v_writelane_b32 v0, s19, 63 522; GCN-NEXT: ;;#ASMSTART 523; GCN-NEXT: ; def s[4:11] 524; GCN-NEXT: ;;#ASMEND 525; GCN-NEXT: v_writelane_b32 v1, s4, 0 526; GCN-NEXT: v_writelane_b32 v1, s5, 1 527; GCN-NEXT: v_writelane_b32 v1, s6, 2 528; GCN-NEXT: v_writelane_b32 v1, s7, 3 529; GCN-NEXT: v_writelane_b32 v1, s8, 4 530; GCN-NEXT: v_writelane_b32 v1, s9, 5 531; GCN-NEXT: v_writelane_b32 v1, s10, 6 532; GCN-NEXT: v_writelane_b32 v1, s11, 7 533; GCN-NEXT: ;;#ASMSTART 534; GCN-NEXT: ; def s[2:3] 535; GCN-NEXT: ;;#ASMEND 536; GCN-NEXT: v_writelane_b32 v1, s2, 8 537; GCN-NEXT: v_writelane_b32 v1, s3, 9 538; GCN-NEXT: s_mov_b32 s1, 0 539; GCN-NEXT: s_waitcnt lgkmcnt(0) 540; GCN-NEXT: s_cmp_lg_u32 s0, s1 541; GCN-NEXT: s_cbranch_scc1 BB1_2 542; GCN-NEXT: ; %bb.1: ; %bb0 543; GCN-NEXT: v_readlane_b32 s16, v1, 8 544; GCN-NEXT: v_readlane_b32 s17, v1, 9 545; GCN-NEXT: v_readlane_b32 s20, v1, 0 546; GCN-NEXT: v_readlane_b32 s21, v1, 1 547; GCN-NEXT: v_readlane_b32 s22, v1, 2 548; GCN-NEXT: v_readlane_b32 s23, v1, 3 549; GCN-NEXT: v_readlane_b32 s24, v1, 4 550; GCN-NEXT: v_readlane_b32 s25, v1, 5 551; GCN-NEXT: v_readlane_b32 s26, v1, 6 552; GCN-NEXT: v_readlane_b32 s27, v1, 7 553; GCN-NEXT: v_readlane_b32 s36, v0, 32 554; GCN-NEXT: v_readlane_b32 s37, v0, 33 555; GCN-NEXT: v_readlane_b32 s38, v0, 34 556; GCN-NEXT: v_readlane_b32 s39, v0, 35 557; GCN-NEXT: v_readlane_b32 s40, v0, 36 558; GCN-NEXT: v_readlane_b32 s41, v0, 37 559; GCN-NEXT: v_readlane_b32 s42, v0, 38 560; GCN-NEXT: v_readlane_b32 s43, v0, 39 561; GCN-NEXT: v_readlane_b32 s44, v0, 40 562; GCN-NEXT: v_readlane_b32 s45, v0, 41 563; GCN-NEXT: v_readlane_b32 s46, v0, 42 564; GCN-NEXT: v_readlane_b32 s47, v0, 43 565; GCN-NEXT: v_readlane_b32 s48, v0, 44 566; GCN-NEXT: v_readlane_b32 s49, v0, 45 567; GCN-NEXT: v_readlane_b32 s50, v0, 46 568; GCN-NEXT: v_readlane_b32 s51, v0, 47 569; GCN-NEXT: v_readlane_b32 s0, v0, 0 570; GCN-NEXT: v_readlane_b32 s1, v0, 1 571; GCN-NEXT: v_readlane_b32 s2, v0, 2 572; GCN-NEXT: v_readlane_b32 s3, v0, 3 573; GCN-NEXT: v_readlane_b32 s4, v0, 4 574; GCN-NEXT: v_readlane_b32 s5, v0, 5 575; GCN-NEXT: v_readlane_b32 s6, v0, 6 576; GCN-NEXT: v_readlane_b32 s7, v0, 7 577; GCN-NEXT: v_readlane_b32 s8, v0, 8 578; GCN-NEXT: v_readlane_b32 s9, v0, 9 579; GCN-NEXT: v_readlane_b32 s10, v0, 10 580; GCN-NEXT: v_readlane_b32 s11, v0, 11 581; GCN-NEXT: v_readlane_b32 s12, v0, 12 582; GCN-NEXT: v_readlane_b32 s13, v0, 13 583; GCN-NEXT: v_readlane_b32 s14, v0, 14 584; GCN-NEXT: v_readlane_b32 s15, v0, 15 585; GCN-NEXT: ;;#ASMSTART 586; GCN-NEXT: ; use s[0:15] 587; GCN-NEXT: ;;#ASMEND 588; GCN-NEXT: v_readlane_b32 s0, v0, 16 589; GCN-NEXT: v_readlane_b32 s1, v0, 17 590; GCN-NEXT: v_readlane_b32 s2, v0, 18 591; GCN-NEXT: v_readlane_b32 s3, v0, 19 592; GCN-NEXT: v_readlane_b32 s4, v0, 20 593; GCN-NEXT: v_readlane_b32 s5, v0, 21 594; GCN-NEXT: v_readlane_b32 s6, v0, 22 595; GCN-NEXT: v_readlane_b32 s7, v0, 23 596; GCN-NEXT: v_readlane_b32 s8, v0, 24 597; GCN-NEXT: v_readlane_b32 s9, v0, 25 598; GCN-NEXT: v_readlane_b32 s10, v0, 26 599; GCN-NEXT: v_readlane_b32 s11, v0, 27 600; GCN-NEXT: v_readlane_b32 s12, v0, 28 601; GCN-NEXT: v_readlane_b32 s13, v0, 29 602; GCN-NEXT: v_readlane_b32 s14, v0, 30 603; GCN-NEXT: v_readlane_b32 s15, v0, 31 604; GCN-NEXT: ;;#ASMSTART 605; GCN-NEXT: ; use s[0:15] 606; GCN-NEXT: ;;#ASMEND 607; GCN-NEXT: v_readlane_b32 s0, v0, 48 608; GCN-NEXT: v_readlane_b32 s1, v0, 49 609; GCN-NEXT: v_readlane_b32 s2, v0, 50 610; GCN-NEXT: v_readlane_b32 s3, v0, 51 611; GCN-NEXT: v_readlane_b32 s4, v0, 52 612; GCN-NEXT: v_readlane_b32 s5, v0, 53 613; GCN-NEXT: v_readlane_b32 s6, v0, 54 614; GCN-NEXT: v_readlane_b32 s7, v0, 55 615; GCN-NEXT: v_readlane_b32 s8, v0, 56 616; GCN-NEXT: v_readlane_b32 s9, v0, 57 617; GCN-NEXT: v_readlane_b32 s10, v0, 58 618; GCN-NEXT: v_readlane_b32 s11, v0, 59 619; GCN-NEXT: v_readlane_b32 s12, v0, 60 620; GCN-NEXT: v_readlane_b32 s13, v0, 61 621; GCN-NEXT: v_readlane_b32 s14, v0, 62 622; GCN-NEXT: v_readlane_b32 s15, v0, 63 623; GCN-NEXT: ;;#ASMSTART 624; GCN-NEXT: ; use s[36:51] 625; GCN-NEXT: ;;#ASMEND 626; GCN-NEXT: ;;#ASMSTART 627; GCN-NEXT: ; use s[20:27] 628; GCN-NEXT: ;;#ASMEND 629; GCN-NEXT: ;;#ASMSTART 630; GCN-NEXT: ; use s[16:17] 631; GCN-NEXT: ;;#ASMEND 632; GCN-NEXT: ;;#ASMSTART 633; GCN-NEXT: ; use s[0:15] 634; GCN-NEXT: ;;#ASMEND 635; GCN-NEXT: BB1_2: ; %ret 636; GCN-NEXT: s_endpgm 637 %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 638 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 639 %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 640 %wide.sgpr5 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 641 %wide.sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 642 %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0 643 644 %cmp = icmp eq i32 %in, 0 645 br i1 %cmp, label %bb0, label %ret 646 647bb0: 648 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0 649 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0 650 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0 651 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr3) #0 652 call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0 653 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr5) #0 654 br label %ret 655 656ret: 657 ret void 658} 659 660; The first 64 SGPR spills can go to a VGPR, but there isn't a second 661; so some spills must be to memory. The last 16 element spill runs out 662; of lanes at the 15th element. 663define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 %in) #1 { 664; GCN-LABEL: no_vgprs_last_sgpr_spill: 665; GCN: ; %bb.0: 666; GCN-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 667; GCN-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 668; GCN-NEXT: s_mov_b32 s54, -1 669; GCN-NEXT: s_mov_b32 s55, 0xe8f000 670; GCN-NEXT: s_add_u32 s52, s52, s3 671; GCN-NEXT: s_addc_u32 s53, s53, 0 672; GCN-NEXT: s_load_dword s0, s[0:1], 0xb 673; GCN-NEXT: ;;#ASMSTART 674; GCN-NEXT: ;;#ASMEND 675; GCN-NEXT: ;;#ASMSTART 676; GCN-NEXT: ;;#ASMEND 677; GCN-NEXT: ;;#ASMSTART 678; GCN-NEXT: ;;#ASMEND 679; GCN-NEXT: ;;#ASMSTART 680; GCN-NEXT: ;;#ASMEND 681; GCN-NEXT: ;;#ASMSTART 682; GCN-NEXT: ;;#ASMEND 683; GCN-NEXT: ;;#ASMSTART 684; GCN-NEXT: ;;#ASMEND 685; GCN-NEXT: ;;#ASMSTART 686; GCN-NEXT: ; def s[4:19] 687; GCN-NEXT: ;;#ASMEND 688; GCN-NEXT: v_writelane_b32 v31, s4, 0 689; GCN-NEXT: v_writelane_b32 v31, s5, 1 690; GCN-NEXT: v_writelane_b32 v31, s6, 2 691; GCN-NEXT: v_writelane_b32 v31, s7, 3 692; GCN-NEXT: v_writelane_b32 v31, s8, 4 693; GCN-NEXT: v_writelane_b32 v31, s9, 5 694; GCN-NEXT: v_writelane_b32 v31, s10, 6 695; GCN-NEXT: v_writelane_b32 v31, s11, 7 696; GCN-NEXT: v_writelane_b32 v31, s12, 8 697; GCN-NEXT: v_writelane_b32 v31, s13, 9 698; GCN-NEXT: v_writelane_b32 v31, s14, 10 699; GCN-NEXT: v_writelane_b32 v31, s15, 11 700; GCN-NEXT: v_writelane_b32 v31, s16, 12 701; GCN-NEXT: v_writelane_b32 v31, s17, 13 702; GCN-NEXT: v_writelane_b32 v31, s18, 14 703; GCN-NEXT: v_writelane_b32 v31, s19, 15 704; GCN-NEXT: ;;#ASMSTART 705; GCN-NEXT: ; def s[4:19] 706; GCN-NEXT: ;;#ASMEND 707; GCN-NEXT: v_writelane_b32 v31, s4, 16 708; GCN-NEXT: v_writelane_b32 v31, s5, 17 709; GCN-NEXT: v_writelane_b32 v31, s6, 18 710; GCN-NEXT: v_writelane_b32 v31, s7, 19 711; GCN-NEXT: v_writelane_b32 v31, s8, 20 712; GCN-NEXT: v_writelane_b32 v31, s9, 21 713; GCN-NEXT: v_writelane_b32 v31, s10, 22 714; GCN-NEXT: v_writelane_b32 v31, s11, 23 715; GCN-NEXT: v_writelane_b32 v31, s12, 24 716; GCN-NEXT: v_writelane_b32 v31, s13, 25 717; GCN-NEXT: v_writelane_b32 v31, s14, 26 718; GCN-NEXT: v_writelane_b32 v31, s15, 27 719; GCN-NEXT: v_writelane_b32 v31, s16, 28 720; GCN-NEXT: v_writelane_b32 v31, s17, 29 721; GCN-NEXT: v_writelane_b32 v31, s18, 30 722; GCN-NEXT: v_writelane_b32 v31, s19, 31 723; GCN-NEXT: ;;#ASMSTART 724; GCN-NEXT: ; def s[4:19] 725; GCN-NEXT: ;;#ASMEND 726; GCN-NEXT: v_writelane_b32 v31, s4, 32 727; GCN-NEXT: v_writelane_b32 v31, s5, 33 728; GCN-NEXT: v_writelane_b32 v31, s6, 34 729; GCN-NEXT: v_writelane_b32 v31, s7, 35 730; GCN-NEXT: v_writelane_b32 v31, s8, 36 731; GCN-NEXT: v_writelane_b32 v31, s9, 37 732; GCN-NEXT: v_writelane_b32 v31, s10, 38 733; GCN-NEXT: v_writelane_b32 v31, s11, 39 734; GCN-NEXT: v_writelane_b32 v31, s12, 40 735; GCN-NEXT: v_writelane_b32 v31, s13, 41 736; GCN-NEXT: v_writelane_b32 v31, s14, 42 737; GCN-NEXT: v_writelane_b32 v31, s15, 43 738; GCN-NEXT: v_writelane_b32 v31, s16, 44 739; GCN-NEXT: v_writelane_b32 v31, s17, 45 740; GCN-NEXT: v_writelane_b32 v31, s18, 46 741; GCN-NEXT: v_writelane_b32 v31, s19, 47 742; GCN-NEXT: ;;#ASMSTART 743; GCN-NEXT: ; def s[4:19] 744; GCN-NEXT: ;;#ASMEND 745; GCN-NEXT: v_writelane_b32 v31, s4, 48 746; GCN-NEXT: v_writelane_b32 v31, s5, 49 747; GCN-NEXT: v_writelane_b32 v31, s6, 50 748; GCN-NEXT: v_writelane_b32 v31, s7, 51 749; GCN-NEXT: v_writelane_b32 v31, s8, 52 750; GCN-NEXT: v_writelane_b32 v31, s9, 53 751; GCN-NEXT: v_writelane_b32 v31, s10, 54 752; GCN-NEXT: v_writelane_b32 v31, s11, 55 753; GCN-NEXT: v_writelane_b32 v31, s12, 56 754; GCN-NEXT: v_writelane_b32 v31, s13, 57 755; GCN-NEXT: v_writelane_b32 v31, s14, 58 756; GCN-NEXT: v_writelane_b32 v31, s15, 59 757; GCN-NEXT: v_writelane_b32 v31, s16, 60 758; GCN-NEXT: v_writelane_b32 v31, s17, 61 759; GCN-NEXT: v_writelane_b32 v31, s18, 62 760; GCN-NEXT: v_writelane_b32 v31, s19, 63 761; GCN-NEXT: ;;#ASMSTART 762; GCN-NEXT: ; def s[2:3] 763; GCN-NEXT: ;;#ASMEND 764; GCN-NEXT: v_writelane_b32 v0, s2, 0 765; GCN-NEXT: v_writelane_b32 v0, s3, 1 766; GCN-NEXT: s_mov_b64 s[2:3], exec 767; GCN-NEXT: s_mov_b64 exec, 3 768; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill 769; GCN-NEXT: s_mov_b64 exec, s[2:3] 770; GCN-NEXT: s_mov_b32 s1, 0 771; GCN-NEXT: s_waitcnt lgkmcnt(0) 772; GCN-NEXT: s_cmp_lg_u32 s0, s1 773; GCN-NEXT: s_cbranch_scc1 BB2_2 774; GCN-NEXT: ; %bb.1: ; %bb0 775; GCN-NEXT: v_readlane_b32 s36, v31, 32 776; GCN-NEXT: v_readlane_b32 s37, v31, 33 777; GCN-NEXT: v_readlane_b32 s38, v31, 34 778; GCN-NEXT: v_readlane_b32 s39, v31, 35 779; GCN-NEXT: v_readlane_b32 s40, v31, 36 780; GCN-NEXT: v_readlane_b32 s41, v31, 37 781; GCN-NEXT: v_readlane_b32 s42, v31, 38 782; GCN-NEXT: v_readlane_b32 s43, v31, 39 783; GCN-NEXT: v_readlane_b32 s44, v31, 40 784; GCN-NEXT: v_readlane_b32 s45, v31, 41 785; GCN-NEXT: v_readlane_b32 s46, v31, 42 786; GCN-NEXT: v_readlane_b32 s47, v31, 43 787; GCN-NEXT: v_readlane_b32 s48, v31, 44 788; GCN-NEXT: v_readlane_b32 s49, v31, 45 789; GCN-NEXT: v_readlane_b32 s50, v31, 46 790; GCN-NEXT: v_readlane_b32 s51, v31, 47 791; GCN-NEXT: v_readlane_b32 s0, v31, 16 792; GCN-NEXT: v_readlane_b32 s1, v31, 17 793; GCN-NEXT: v_readlane_b32 s2, v31, 18 794; GCN-NEXT: v_readlane_b32 s3, v31, 19 795; GCN-NEXT: v_readlane_b32 s4, v31, 20 796; GCN-NEXT: v_readlane_b32 s5, v31, 21 797; GCN-NEXT: v_readlane_b32 s6, v31, 22 798; GCN-NEXT: v_readlane_b32 s7, v31, 23 799; GCN-NEXT: v_readlane_b32 s8, v31, 24 800; GCN-NEXT: v_readlane_b32 s9, v31, 25 801; GCN-NEXT: v_readlane_b32 s10, v31, 26 802; GCN-NEXT: v_readlane_b32 s11, v31, 27 803; GCN-NEXT: v_readlane_b32 s12, v31, 28 804; GCN-NEXT: v_readlane_b32 s13, v31, 29 805; GCN-NEXT: v_readlane_b32 s14, v31, 30 806; GCN-NEXT: v_readlane_b32 s15, v31, 31 807; GCN-NEXT: v_readlane_b32 s16, v31, 0 808; GCN-NEXT: v_readlane_b32 s17, v31, 1 809; GCN-NEXT: v_readlane_b32 s18, v31, 2 810; GCN-NEXT: v_readlane_b32 s19, v31, 3 811; GCN-NEXT: v_readlane_b32 s20, v31, 4 812; GCN-NEXT: v_readlane_b32 s21, v31, 5 813; GCN-NEXT: v_readlane_b32 s22, v31, 6 814; GCN-NEXT: v_readlane_b32 s23, v31, 7 815; GCN-NEXT: v_readlane_b32 s24, v31, 8 816; GCN-NEXT: v_readlane_b32 s25, v31, 9 817; GCN-NEXT: v_readlane_b32 s26, v31, 10 818; GCN-NEXT: v_readlane_b32 s27, v31, 11 819; GCN-NEXT: v_readlane_b32 s28, v31, 12 820; GCN-NEXT: v_readlane_b32 s29, v31, 13 821; GCN-NEXT: v_readlane_b32 s30, v31, 14 822; GCN-NEXT: v_readlane_b32 s31, v31, 15 823; GCN-NEXT: ;;#ASMSTART 824; GCN-NEXT: ; use s[16:31] 825; GCN-NEXT: ;;#ASMEND 826; GCN-NEXT: ;;#ASMSTART 827; GCN-NEXT: ; use s[0:15] 828; GCN-NEXT: ;;#ASMEND 829; GCN-NEXT: v_readlane_b32 s4, v31, 48 830; GCN-NEXT: v_readlane_b32 s5, v31, 49 831; GCN-NEXT: v_readlane_b32 s6, v31, 50 832; GCN-NEXT: v_readlane_b32 s7, v31, 51 833; GCN-NEXT: v_readlane_b32 s8, v31, 52 834; GCN-NEXT: v_readlane_b32 s9, v31, 53 835; GCN-NEXT: v_readlane_b32 s10, v31, 54 836; GCN-NEXT: v_readlane_b32 s11, v31, 55 837; GCN-NEXT: v_readlane_b32 s12, v31, 56 838; GCN-NEXT: v_readlane_b32 s13, v31, 57 839; GCN-NEXT: v_readlane_b32 s14, v31, 58 840; GCN-NEXT: v_readlane_b32 s15, v31, 59 841; GCN-NEXT: v_readlane_b32 s16, v31, 60 842; GCN-NEXT: v_readlane_b32 s17, v31, 61 843; GCN-NEXT: v_readlane_b32 s18, v31, 62 844; GCN-NEXT: v_readlane_b32 s19, v31, 63 845; GCN-NEXT: s_mov_b64 s[0:1], exec 846; GCN-NEXT: s_mov_b64 exec, 3 847; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload 848; GCN-NEXT: s_mov_b64 exec, s[0:1] 849; GCN-NEXT: s_waitcnt vmcnt(0) 850; GCN-NEXT: v_readlane_b32 s0, v0, 0 851; GCN-NEXT: v_readlane_b32 s1, v0, 1 852; GCN-NEXT: ;;#ASMSTART 853; GCN-NEXT: ; use s[36:51] 854; GCN-NEXT: ;;#ASMEND 855; GCN-NEXT: ;;#ASMSTART 856; GCN-NEXT: ; use s[4:19] 857; GCN-NEXT: ;;#ASMEND 858; GCN-NEXT: ;;#ASMSTART 859; GCN-NEXT: ; use s[0:1] 860; GCN-NEXT: ;;#ASMEND 861; GCN-NEXT: BB2_2: ; %ret 862; GCN-NEXT: s_endpgm 863 call void asm sideeffect "", "~{v[0:7]}" () #0 864 call void asm sideeffect "", "~{v[8:15]}" () #0 865 call void asm sideeffect "", "~{v[16:23]}" () #0 866 call void asm sideeffect "", "~{v[24:27]}"() #0 867 call void asm sideeffect "", "~{v[28:29]}"() #0 868 call void asm sideeffect "", "~{v30}"() #0 869 870 %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 871 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 872 %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 873 %wide.sgpr3 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 874 %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0 875 %cmp = icmp eq i32 %in, 0 876 br i1 %cmp, label %bb0, label %ret 877 878bb0: 879 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0 880 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0 881 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0 882 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr3) #0 883 call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0 884 br label %ret 885 886ret: 887 ret void 888} 889 890attributes #0 = { nounwind } 891attributes #1 = { nounwind "amdgpu-waves-per-eu"="8,8" } 892