1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=VI %s 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 3 4; Make sure the stack is never realigned for entry functions. 5 6define amdgpu_kernel void @max_alignment_128() #0 { 7; VI-LABEL: max_alignment_128: 8; VI: ; %bb.0: 9; VI-NEXT: s_add_u32 s4, s4, s7 10; VI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 11; VI-NEXT: s_add_u32 s0, s0, s7 12; VI-NEXT: s_addc_u32 s1, s1, 0 13; VI-NEXT: v_mov_b32_e32 v0, 9 14; VI-NEXT: s_mov_b32 flat_scratch_lo, s5 15; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:128 16; VI-NEXT: s_endpgm 17; VI-NEXT: .section .rodata,#alloc 18; VI-NEXT: .p2align 6 19; VI-NEXT: .amdhsa_kernel max_alignment_128 20; VI-NEXT: .amdhsa_group_segment_fixed_size 0 21; VI-NEXT: .amdhsa_private_segment_fixed_size 256 22; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 23; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 24; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0 25; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 26; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0 27; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 28; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0 29; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 30; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 31; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 32; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 33; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0 34; VI-NEXT: .amdhsa_system_vgpr_workitem_id 0 35; VI-NEXT: .amdhsa_next_free_vgpr 1 36; VI-NEXT: .amdhsa_next_free_sgpr 8 37; VI-NEXT: .amdhsa_reserve_vcc 0 38; VI-NEXT: .amdhsa_float_round_mode_32 0 39; VI-NEXT: .amdhsa_float_round_mode_16_64 0 40; VI-NEXT: .amdhsa_float_denorm_mode_32 3 41; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3 42; VI-NEXT: .amdhsa_dx10_clamp 1 43; VI-NEXT: .amdhsa_ieee_mode 1 44; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 45; VI-NEXT: .amdhsa_exception_fp_denorm_src 0 46; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 47; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0 48; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0 49; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0 50; VI-NEXT: .amdhsa_exception_int_div_zero 0 51; VI-NEXT: .end_amdhsa_kernel 52; VI-NEXT: .text 53; 54; GFX9-LABEL: max_alignment_128: 55; GFX9: ; %bb.0: 56; GFX9-NEXT: s_add_u32 flat_scratch_lo, s4, s7 57; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 58; GFX9-NEXT: s_add_u32 s0, s0, s7 59; GFX9-NEXT: s_addc_u32 s1, s1, 0 60; GFX9-NEXT: v_mov_b32_e32 v0, 9 61; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:128 62; GFX9-NEXT: s_endpgm 63; GFX9-NEXT: .section .rodata,#alloc 64; GFX9-NEXT: .p2align 6 65; GFX9-NEXT: .amdhsa_kernel max_alignment_128 66; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0 67; GFX9-NEXT: .amdhsa_private_segment_fixed_size 256 68; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 69; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 70; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0 71; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 72; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0 73; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 74; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0 75; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 76; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 77; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 78; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 79; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0 80; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 0 81; GFX9-NEXT: .amdhsa_next_free_vgpr 1 82; GFX9-NEXT: .amdhsa_next_free_sgpr 8 83; GFX9-NEXT: .amdhsa_reserve_vcc 0 84; GFX9-NEXT: .amdhsa_float_round_mode_32 0 85; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0 86; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3 87; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3 88; GFX9-NEXT: .amdhsa_dx10_clamp 1 89; GFX9-NEXT: .amdhsa_ieee_mode 1 90; GFX9-NEXT: .amdhsa_fp16_overflow 0 91; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 92; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0 93; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 94; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0 95; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0 96; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0 97; GFX9-NEXT: .amdhsa_exception_int_div_zero 0 98; GFX9-NEXT: .end_amdhsa_kernel 99; GFX9-NEXT: .text 100 %alloca.align = alloca i32, align 128, addrspace(5) 101 store volatile i32 9, i32 addrspace(5)* %alloca.align, align 128 102 ret void 103} 104 105define amdgpu_kernel void @stackrealign_attr() #1 { 106; VI-LABEL: stackrealign_attr: 107; VI: ; %bb.0: 108; VI-NEXT: s_add_u32 s4, s4, s7 109; VI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 110; VI-NEXT: s_add_u32 s0, s0, s7 111; VI-NEXT: s_addc_u32 s1, s1, 0 112; VI-NEXT: v_mov_b32_e32 v0, 9 113; VI-NEXT: s_mov_b32 flat_scratch_lo, s5 114; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 115; VI-NEXT: s_endpgm 116; VI-NEXT: .section .rodata,#alloc 117; VI-NEXT: .p2align 6 118; VI-NEXT: .amdhsa_kernel stackrealign_attr 119; VI-NEXT: .amdhsa_group_segment_fixed_size 0 120; VI-NEXT: .amdhsa_private_segment_fixed_size 8 121; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 122; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 123; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0 124; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 125; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0 126; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 127; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0 128; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 129; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 130; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 131; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 132; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0 133; VI-NEXT: .amdhsa_system_vgpr_workitem_id 0 134; VI-NEXT: .amdhsa_next_free_vgpr 1 135; VI-NEXT: .amdhsa_next_free_sgpr 8 136; VI-NEXT: .amdhsa_reserve_vcc 0 137; VI-NEXT: .amdhsa_float_round_mode_32 0 138; VI-NEXT: .amdhsa_float_round_mode_16_64 0 139; VI-NEXT: .amdhsa_float_denorm_mode_32 3 140; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3 141; VI-NEXT: .amdhsa_dx10_clamp 1 142; VI-NEXT: .amdhsa_ieee_mode 1 143; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 144; VI-NEXT: .amdhsa_exception_fp_denorm_src 0 145; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 146; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0 147; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0 148; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0 149; VI-NEXT: .amdhsa_exception_int_div_zero 0 150; VI-NEXT: .end_amdhsa_kernel 151; VI-NEXT: .text 152; 153; GFX9-LABEL: stackrealign_attr: 154; GFX9: ; %bb.0: 155; GFX9-NEXT: s_add_u32 flat_scratch_lo, s4, s7 156; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 157; GFX9-NEXT: s_add_u32 s0, s0, s7 158; GFX9-NEXT: s_addc_u32 s1, s1, 0 159; GFX9-NEXT: v_mov_b32_e32 v0, 9 160; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 161; GFX9-NEXT: s_endpgm 162; GFX9-NEXT: .section .rodata,#alloc 163; GFX9-NEXT: .p2align 6 164; GFX9-NEXT: .amdhsa_kernel stackrealign_attr 165; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0 166; GFX9-NEXT: .amdhsa_private_segment_fixed_size 8 167; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 168; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 169; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0 170; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 171; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0 172; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 173; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0 174; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 175; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 176; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 177; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 178; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0 179; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 0 180; GFX9-NEXT: .amdhsa_next_free_vgpr 1 181; GFX9-NEXT: .amdhsa_next_free_sgpr 8 182; GFX9-NEXT: .amdhsa_reserve_vcc 0 183; GFX9-NEXT: .amdhsa_float_round_mode_32 0 184; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0 185; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3 186; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3 187; GFX9-NEXT: .amdhsa_dx10_clamp 1 188; GFX9-NEXT: .amdhsa_ieee_mode 1 189; GFX9-NEXT: .amdhsa_fp16_overflow 0 190; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 191; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0 192; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 193; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0 194; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0 195; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0 196; GFX9-NEXT: .amdhsa_exception_int_div_zero 0 197; GFX9-NEXT: .end_amdhsa_kernel 198; GFX9-NEXT: .text 199 %alloca.align = alloca i32, align 4, addrspace(5) 200 store volatile i32 9, i32 addrspace(5)* %alloca.align, align 4 201 ret void 202} 203 204define amdgpu_kernel void @alignstack_attr() #2 { 205; VI-LABEL: alignstack_attr: 206; VI: ; %bb.0: 207; VI-NEXT: s_add_u32 s4, s4, s7 208; VI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 209; VI-NEXT: s_add_u32 s0, s0, s7 210; VI-NEXT: s_addc_u32 s1, s1, 0 211; VI-NEXT: v_mov_b32_e32 v0, 9 212; VI-NEXT: s_mov_b32 flat_scratch_lo, s5 213; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 214; VI-NEXT: s_endpgm 215; VI-NEXT: .section .rodata,#alloc 216; VI-NEXT: .p2align 6 217; VI-NEXT: .amdhsa_kernel alignstack_attr 218; VI-NEXT: .amdhsa_group_segment_fixed_size 0 219; VI-NEXT: .amdhsa_private_segment_fixed_size 128 220; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 221; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 222; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0 223; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 224; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0 225; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 226; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0 227; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 228; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 229; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 230; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 231; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0 232; VI-NEXT: .amdhsa_system_vgpr_workitem_id 0 233; VI-NEXT: .amdhsa_next_free_vgpr 1 234; VI-NEXT: .amdhsa_next_free_sgpr 8 235; VI-NEXT: .amdhsa_reserve_vcc 0 236; VI-NEXT: .amdhsa_float_round_mode_32 0 237; VI-NEXT: .amdhsa_float_round_mode_16_64 0 238; VI-NEXT: .amdhsa_float_denorm_mode_32 3 239; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3 240; VI-NEXT: .amdhsa_dx10_clamp 1 241; VI-NEXT: .amdhsa_ieee_mode 1 242; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 243; VI-NEXT: .amdhsa_exception_fp_denorm_src 0 244; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 245; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0 246; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0 247; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0 248; VI-NEXT: .amdhsa_exception_int_div_zero 0 249; VI-NEXT: .end_amdhsa_kernel 250; VI-NEXT: .text 251; 252; GFX9-LABEL: alignstack_attr: 253; GFX9: ; %bb.0: 254; GFX9-NEXT: s_add_u32 flat_scratch_lo, s4, s7 255; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 256; GFX9-NEXT: s_add_u32 s0, s0, s7 257; GFX9-NEXT: s_addc_u32 s1, s1, 0 258; GFX9-NEXT: v_mov_b32_e32 v0, 9 259; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 260; GFX9-NEXT: s_endpgm 261; GFX9-NEXT: .section .rodata,#alloc 262; GFX9-NEXT: .p2align 6 263; GFX9-NEXT: .amdhsa_kernel alignstack_attr 264; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0 265; GFX9-NEXT: .amdhsa_private_segment_fixed_size 128 266; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 267; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 268; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0 269; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 270; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0 271; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 272; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0 273; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 274; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 275; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 276; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 277; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0 278; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 0 279; GFX9-NEXT: .amdhsa_next_free_vgpr 1 280; GFX9-NEXT: .amdhsa_next_free_sgpr 8 281; GFX9-NEXT: .amdhsa_reserve_vcc 0 282; GFX9-NEXT: .amdhsa_float_round_mode_32 0 283; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0 284; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3 285; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3 286; GFX9-NEXT: .amdhsa_dx10_clamp 1 287; GFX9-NEXT: .amdhsa_ieee_mode 1 288; GFX9-NEXT: .amdhsa_fp16_overflow 0 289; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 290; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0 291; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 292; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0 293; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0 294; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0 295; GFX9-NEXT: .amdhsa_exception_int_div_zero 0 296; GFX9-NEXT: .end_amdhsa_kernel 297; GFX9-NEXT: .text 298 %alloca.align = alloca i32, align 4, addrspace(5) 299 store volatile i32 9, i32 addrspace(5)* %alloca.align, align 4 300 ret void 301} 302 303attributes #0 = { nounwind } 304attributes #1 = { nounwind "stackrealign" } 305attributes #2 = { nounwind alignstack=128 } 306