1; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s --check-prefix=GCN
2; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4; GCN-LABEL: {{^}}full_mask:
5; GCN: s_mov_b64 exec, -1
6; GCN: v_add_f32_e32 v0,
7define amdgpu_ps float @full_mask(float %a, float %b) {
8main_body:
9  %s = fadd float %a, %b
10  call void @llvm.amdgcn.init.exec(i64 -1)
11  ret float %s
12}
13
14; GCN-LABEL: {{^}}partial_mask:
15; GCN: s_mov_b64 exec, 0x1e240
16; GCN: v_add_f32_e32 v0,
17define amdgpu_ps float @partial_mask(float %a, float %b) {
18main_body:
19  %s = fadd float %a, %b
20  call void @llvm.amdgcn.init.exec(i64 123456)
21  ret float %s
22}
23
24; GCN-LABEL: {{^}}input_s3off8:
25; GCN: s_bfe_u32 s0, s3, 0x70008
26; GCN: s_bfm_b64 exec, s0, 0
27; GCN: s_cmp_eq_u32 s0, 64
28; GCN: s_cmov_b64 exec, -1
29; GCN: v_add_f32_e32 v0,
30define amdgpu_ps float @input_s3off8(i32 inreg, i32 inreg, i32 inreg, i32 inreg %count, float %a, float %b) {
31main_body:
32  %s = fadd float %a, %b
33  call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 8)
34  ret float %s
35}
36
37; GCN-LABEL: {{^}}input_s0off19:
38; GCN: s_bfe_u32 s0, s0, 0x70013
39; GCN: s_bfm_b64 exec, s0, 0
40; GCN: s_cmp_eq_u32 s0, 64
41; GCN: s_cmov_b64 exec, -1
42; GCN: v_add_f32_e32 v0,
43define amdgpu_ps float @input_s0off19(i32 inreg %count, float %a, float %b) {
44main_body:
45  %s = fadd float %a, %b
46  call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 19)
47  ret float %s
48}
49
50; GCN-LABEL: {{^}}reuse_input:
51; GCN: s_bfe_u32 s1, s0, 0x70013
52; GCN: s_bfm_b64 exec, s1, 0
53; GCN: s_cmp_eq_u32 s1, 64
54; GCN: s_cmov_b64 exec, -1
55; GCN: v_add{{(_nc)?}}_u32_e32 v0, s0, v0
56define amdgpu_ps float @reuse_input(i32 inreg %count, i32 %a) {
57main_body:
58  call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 19)
59  %s = add i32 %a, %count
60  %f = sitofp i32 %s to float
61  ret float %f
62}
63
64; GCN-LABEL: {{^}}reuse_input2:
65; GCN: s_bfe_u32 s1, s0, 0x70013
66; GCN: s_bfm_b64 exec, s1, 0
67; GCN: s_cmp_eq_u32 s1, 64
68; GCN: s_cmov_b64 exec, -1
69; GCN: v_add{{(_nc)?}}_u32_e32 v0, s0, v0
70define amdgpu_ps float @reuse_input2(i32 inreg %count, i32 %a) {
71main_body:
72  %s = add i32 %a, %count
73  %f = sitofp i32 %s to float
74  call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 19)
75  ret float %f
76}
77
78; GCN-LABEL: {{^}}init_unreachable:
79;
80; This used to crash.
81define amdgpu_ps void @init_unreachable() {
82main_body:
83  call void @llvm.amdgcn.init.exec(i64 -1)
84  unreachable
85}
86
87declare void @llvm.amdgcn.init.exec(i64) #1
88declare void @llvm.amdgcn.init.exec.from.input(i32, i32) #1
89
90attributes #1 = { convergent }
91