1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=VI %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900  < %s | FileCheck -check-prefix=GFX9 %s
3
4; Make sure the stack is never realigned for entry functions.
5
6define amdgpu_kernel void @max_alignment_128() #0 {
7; VI-LABEL: max_alignment_128:
8; VI:       ; %bb.0:
9; VI-NEXT:    s_add_u32 s4, s4, s7
10; VI-NEXT:    s_lshr_b32 flat_scratch_hi, s4, 8
11; VI-NEXT:    s_add_u32 s0, s0, s7
12; VI-NEXT:    s_addc_u32 s1, s1, 0
13; VI-NEXT:    v_mov_b32_e32 v0, 9
14; VI-NEXT:    s_mov_b32 flat_scratch_lo, s5
15; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:128
16; VI-NEXT:    s_endpgm
17; VI-NEXT:    .section .rodata,#alloc
18; VI-NEXT:    .p2align 6
19; VI-NEXT:    .amdhsa_kernel max_alignment_128
20; VI-NEXT:     .amdhsa_group_segment_fixed_size 0
21; VI-NEXT:     .amdhsa_private_segment_fixed_size 256
22; VI-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
23; VI-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
24; VI-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
25; VI-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
26; VI-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
27; VI-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
28; VI-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
29; VI-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
30; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
31; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
32; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
33; VI-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
34; VI-NEXT:     .amdhsa_system_vgpr_workitem_id 0
35; VI-NEXT:     .amdhsa_next_free_vgpr 1
36; VI-NEXT:     .amdhsa_next_free_sgpr 8
37; VI-NEXT:     .amdhsa_reserve_vcc 0
38; VI-NEXT:     .amdhsa_float_round_mode_32 0
39; VI-NEXT:     .amdhsa_float_round_mode_16_64 0
40; VI-NEXT:     .amdhsa_float_denorm_mode_32 3
41; VI-NEXT:     .amdhsa_float_denorm_mode_16_64 3
42; VI-NEXT:     .amdhsa_dx10_clamp 1
43; VI-NEXT:     .amdhsa_ieee_mode 1
44; VI-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
45; VI-NEXT:     .amdhsa_exception_fp_denorm_src 0
46; VI-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
47; VI-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
48; VI-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
49; VI-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
50; VI-NEXT:     .amdhsa_exception_int_div_zero 0
51; VI-NEXT:    .end_amdhsa_kernel
52; VI-NEXT:    .text
53;
54; GFX9-LABEL: max_alignment_128:
55; GFX9:       ; %bb.0:
56; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s4, s7
57; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s5, 0
58; GFX9-NEXT:    s_add_u32 s0, s0, s7
59; GFX9-NEXT:    s_addc_u32 s1, s1, 0
60; GFX9-NEXT:    v_mov_b32_e32 v0, 9
61; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:128
62; GFX9-NEXT:    s_endpgm
63; GFX9-NEXT:    .section .rodata,#alloc
64; GFX9-NEXT:    .p2align 6
65; GFX9-NEXT:    .amdhsa_kernel max_alignment_128
66; GFX9-NEXT:     .amdhsa_group_segment_fixed_size 0
67; GFX9-NEXT:     .amdhsa_private_segment_fixed_size 256
68; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
69; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
70; GFX9-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
71; GFX9-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
72; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
73; GFX9-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
74; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
75; GFX9-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
76; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
77; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
78; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
79; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
80; GFX9-NEXT:     .amdhsa_system_vgpr_workitem_id 0
81; GFX9-NEXT:     .amdhsa_next_free_vgpr 1
82; GFX9-NEXT:     .amdhsa_next_free_sgpr 8
83; GFX9-NEXT:     .amdhsa_reserve_vcc 0
84; GFX9-NEXT:     .amdhsa_float_round_mode_32 0
85; GFX9-NEXT:     .amdhsa_float_round_mode_16_64 0
86; GFX9-NEXT:     .amdhsa_float_denorm_mode_32 3
87; GFX9-NEXT:     .amdhsa_float_denorm_mode_16_64 3
88; GFX9-NEXT:     .amdhsa_dx10_clamp 1
89; GFX9-NEXT:     .amdhsa_ieee_mode 1
90; GFX9-NEXT:     .amdhsa_fp16_overflow 0
91; GFX9-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
92; GFX9-NEXT:     .amdhsa_exception_fp_denorm_src 0
93; GFX9-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
94; GFX9-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
95; GFX9-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
96; GFX9-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
97; GFX9-NEXT:     .amdhsa_exception_int_div_zero 0
98; GFX9-NEXT:    .end_amdhsa_kernel
99; GFX9-NEXT:    .text
100  %alloca.align = alloca i32, align 128, addrspace(5)
101  store volatile i32 9, i32 addrspace(5)* %alloca.align, align 128
102  ret void
103}
104
105define amdgpu_kernel void @stackrealign_attr() #1 {
106; VI-LABEL: stackrealign_attr:
107; VI:       ; %bb.0:
108; VI-NEXT:    s_add_u32 s4, s4, s7
109; VI-NEXT:    s_lshr_b32 flat_scratch_hi, s4, 8
110; VI-NEXT:    s_add_u32 s0, s0, s7
111; VI-NEXT:    s_addc_u32 s1, s1, 0
112; VI-NEXT:    v_mov_b32_e32 v0, 9
113; VI-NEXT:    s_mov_b32 flat_scratch_lo, s5
114; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
115; VI-NEXT:    s_endpgm
116; VI-NEXT:    .section .rodata,#alloc
117; VI-NEXT:    .p2align 6
118; VI-NEXT:    .amdhsa_kernel stackrealign_attr
119; VI-NEXT:     .amdhsa_group_segment_fixed_size 0
120; VI-NEXT:     .amdhsa_private_segment_fixed_size 8
121; VI-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
122; VI-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
123; VI-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
124; VI-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
125; VI-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
126; VI-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
127; VI-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
128; VI-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
129; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
130; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
131; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
132; VI-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
133; VI-NEXT:     .amdhsa_system_vgpr_workitem_id 0
134; VI-NEXT:     .amdhsa_next_free_vgpr 1
135; VI-NEXT:     .amdhsa_next_free_sgpr 8
136; VI-NEXT:     .amdhsa_reserve_vcc 0
137; VI-NEXT:     .amdhsa_float_round_mode_32 0
138; VI-NEXT:     .amdhsa_float_round_mode_16_64 0
139; VI-NEXT:     .amdhsa_float_denorm_mode_32 3
140; VI-NEXT:     .amdhsa_float_denorm_mode_16_64 3
141; VI-NEXT:     .amdhsa_dx10_clamp 1
142; VI-NEXT:     .amdhsa_ieee_mode 1
143; VI-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
144; VI-NEXT:     .amdhsa_exception_fp_denorm_src 0
145; VI-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
146; VI-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
147; VI-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
148; VI-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
149; VI-NEXT:     .amdhsa_exception_int_div_zero 0
150; VI-NEXT:    .end_amdhsa_kernel
151; VI-NEXT:    .text
152;
153; GFX9-LABEL: stackrealign_attr:
154; GFX9:       ; %bb.0:
155; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s4, s7
156; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s5, 0
157; GFX9-NEXT:    s_add_u32 s0, s0, s7
158; GFX9-NEXT:    s_addc_u32 s1, s1, 0
159; GFX9-NEXT:    v_mov_b32_e32 v0, 9
160; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
161; GFX9-NEXT:    s_endpgm
162; GFX9-NEXT:    .section .rodata,#alloc
163; GFX9-NEXT:    .p2align 6
164; GFX9-NEXT:    .amdhsa_kernel stackrealign_attr
165; GFX9-NEXT:     .amdhsa_group_segment_fixed_size 0
166; GFX9-NEXT:     .amdhsa_private_segment_fixed_size 8
167; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
168; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
169; GFX9-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
170; GFX9-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
171; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
172; GFX9-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
173; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
174; GFX9-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
175; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
176; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
177; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
178; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
179; GFX9-NEXT:     .amdhsa_system_vgpr_workitem_id 0
180; GFX9-NEXT:     .amdhsa_next_free_vgpr 1
181; GFX9-NEXT:     .amdhsa_next_free_sgpr 8
182; GFX9-NEXT:     .amdhsa_reserve_vcc 0
183; GFX9-NEXT:     .amdhsa_float_round_mode_32 0
184; GFX9-NEXT:     .amdhsa_float_round_mode_16_64 0
185; GFX9-NEXT:     .amdhsa_float_denorm_mode_32 3
186; GFX9-NEXT:     .amdhsa_float_denorm_mode_16_64 3
187; GFX9-NEXT:     .amdhsa_dx10_clamp 1
188; GFX9-NEXT:     .amdhsa_ieee_mode 1
189; GFX9-NEXT:     .amdhsa_fp16_overflow 0
190; GFX9-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
191; GFX9-NEXT:     .amdhsa_exception_fp_denorm_src 0
192; GFX9-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
193; GFX9-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
194; GFX9-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
195; GFX9-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
196; GFX9-NEXT:     .amdhsa_exception_int_div_zero 0
197; GFX9-NEXT:    .end_amdhsa_kernel
198; GFX9-NEXT:    .text
199  %alloca.align = alloca i32, align 4, addrspace(5)
200  store volatile i32 9, i32 addrspace(5)* %alloca.align, align 4
201  ret void
202}
203
204define amdgpu_kernel void @alignstack_attr() #2 {
205; VI-LABEL: alignstack_attr:
206; VI:       ; %bb.0:
207; VI-NEXT:    s_add_u32 s4, s4, s7
208; VI-NEXT:    s_lshr_b32 flat_scratch_hi, s4, 8
209; VI-NEXT:    s_add_u32 s0, s0, s7
210; VI-NEXT:    s_addc_u32 s1, s1, 0
211; VI-NEXT:    v_mov_b32_e32 v0, 9
212; VI-NEXT:    s_mov_b32 flat_scratch_lo, s5
213; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
214; VI-NEXT:    s_endpgm
215; VI-NEXT:    .section .rodata,#alloc
216; VI-NEXT:    .p2align 6
217; VI-NEXT:    .amdhsa_kernel alignstack_attr
218; VI-NEXT:     .amdhsa_group_segment_fixed_size 0
219; VI-NEXT:     .amdhsa_private_segment_fixed_size 128
220; VI-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
221; VI-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
222; VI-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
223; VI-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
224; VI-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
225; VI-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
226; VI-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
227; VI-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
228; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
229; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
230; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
231; VI-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
232; VI-NEXT:     .amdhsa_system_vgpr_workitem_id 0
233; VI-NEXT:     .amdhsa_next_free_vgpr 1
234; VI-NEXT:     .amdhsa_next_free_sgpr 8
235; VI-NEXT:     .amdhsa_reserve_vcc 0
236; VI-NEXT:     .amdhsa_float_round_mode_32 0
237; VI-NEXT:     .amdhsa_float_round_mode_16_64 0
238; VI-NEXT:     .amdhsa_float_denorm_mode_32 3
239; VI-NEXT:     .amdhsa_float_denorm_mode_16_64 3
240; VI-NEXT:     .amdhsa_dx10_clamp 1
241; VI-NEXT:     .amdhsa_ieee_mode 1
242; VI-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
243; VI-NEXT:     .amdhsa_exception_fp_denorm_src 0
244; VI-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
245; VI-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
246; VI-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
247; VI-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
248; VI-NEXT:     .amdhsa_exception_int_div_zero 0
249; VI-NEXT:    .end_amdhsa_kernel
250; VI-NEXT:    .text
251;
252; GFX9-LABEL: alignstack_attr:
253; GFX9:       ; %bb.0:
254; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s4, s7
255; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s5, 0
256; GFX9-NEXT:    s_add_u32 s0, s0, s7
257; GFX9-NEXT:    s_addc_u32 s1, s1, 0
258; GFX9-NEXT:    v_mov_b32_e32 v0, 9
259; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
260; GFX9-NEXT:    s_endpgm
261; GFX9-NEXT:    .section .rodata,#alloc
262; GFX9-NEXT:    .p2align 6
263; GFX9-NEXT:    .amdhsa_kernel alignstack_attr
264; GFX9-NEXT:     .amdhsa_group_segment_fixed_size 0
265; GFX9-NEXT:     .amdhsa_private_segment_fixed_size 128
266; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
267; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
268; GFX9-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
269; GFX9-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
270; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
271; GFX9-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
272; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
273; GFX9-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
274; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
275; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
276; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
277; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
278; GFX9-NEXT:     .amdhsa_system_vgpr_workitem_id 0
279; GFX9-NEXT:     .amdhsa_next_free_vgpr 1
280; GFX9-NEXT:     .amdhsa_next_free_sgpr 8
281; GFX9-NEXT:     .amdhsa_reserve_vcc 0
282; GFX9-NEXT:     .amdhsa_float_round_mode_32 0
283; GFX9-NEXT:     .amdhsa_float_round_mode_16_64 0
284; GFX9-NEXT:     .amdhsa_float_denorm_mode_32 3
285; GFX9-NEXT:     .amdhsa_float_denorm_mode_16_64 3
286; GFX9-NEXT:     .amdhsa_dx10_clamp 1
287; GFX9-NEXT:     .amdhsa_ieee_mode 1
288; GFX9-NEXT:     .amdhsa_fp16_overflow 0
289; GFX9-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
290; GFX9-NEXT:     .amdhsa_exception_fp_denorm_src 0
291; GFX9-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
292; GFX9-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
293; GFX9-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
294; GFX9-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
295; GFX9-NEXT:     .amdhsa_exception_int_div_zero 0
296; GFX9-NEXT:    .end_amdhsa_kernel
297; GFX9-NEXT:    .text
298  %alloca.align = alloca i32, align 4, addrspace(5)
299  store volatile i32 9, i32 addrspace(5)* %alloca.align, align 4
300  ret void
301}
302
303attributes #0 = { nounwind }
304attributes #1 = { nounwind "stackrealign" }
305attributes #2 = { nounwind alignstack=128 }
306