1; RUN: llc -march=amdgcn < %s | FileCheck -enable-var-scope -check-prefixes=GCN,O2 %s
2; RUN: llc -O0 -march=amdgcn < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
3; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-intrinsics < %s | FileCheck -check-prefix=OPT %s
4
5; GCN-LABEL: {{^}}zext_grp_size_128:
6; GCN-NOT: and_b32
7
8; OPT-LABEL: @zext_grp_size_128
9; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
10; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !0
11; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !0
12define amdgpu_kernel void @zext_grp_size_128(i32 addrspace(1)* nocapture %arg) #0 {
13bb:
14  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
15  %tmp1 = and i32 %tmp, 127
16  store i32 %tmp1, i32 addrspace(1)* %arg, align 4
17  %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y()
18  %tmp3 = and i32 %tmp2, 127
19  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
20  store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4
21  %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z()
22  %tmp6 = and i32 %tmp5, 127
23  %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
24  store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4
25  ret void
26}
27
28; GCN-LABEL: {{^}}zext_grp_size_32x4x1:
29; GCN-NOT: and_b32
30
31; OPT-LABEL: @zext_grp_size_32x4x1
32; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !2
33; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !3
34; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !4
35define amdgpu_kernel void @zext_grp_size_32x4x1(i32 addrspace(1)* nocapture %arg) #0 !reqd_work_group_size !0 {
36bb:
37  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
38  %tmp1 = and i32 %tmp, 31
39  store i32 %tmp1, i32 addrspace(1)* %arg, align 4
40  %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y()
41  %tmp3 = and i32 %tmp2, 3
42  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
43  store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4
44  %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z()
45  %tmp6 = and i32 %tmp5, 1
46  %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
47  store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4
48  ret void
49}
50
51; GCN-LABEL: {{^}}zext_grp_size_1x1x1:
52; GCN-NOT: and_b32
53
54; When EarlyCSE is not run this call produces a range max with 0 active bits,
55; which is a special case as an AssertZext from width 0 is invalid.
56; OPT-LABEL: @zext_grp_size_1x1x1
57; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !4
58define amdgpu_kernel void @zext_grp_size_1x1x1(i32 addrspace(1)* nocapture %arg) #0 !reqd_work_group_size !1 {
59  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
60  %tmp1 = and i32 %tmp, 1
61  store i32 %tmp1, i32 addrspace(1)* %arg, align 4
62  ret void
63}
64
65; GCN-LABEL: {{^}}zext_grp_size_512:
66; GCN-NOT: and_b32
67
68; OPT-LABEL: @zext_grp_size_512
69; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !6
70; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !6
71; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !6
72define amdgpu_kernel void @zext_grp_size_512(i32 addrspace(1)* nocapture %arg) #1 {
73bb:
74  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
75  %tmp1 = and i32 %tmp, 65535
76  store i32 %tmp1, i32 addrspace(1)* %arg, align 4
77  %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y()
78  %tmp3 = and i32 %tmp2, 65535
79  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
80  store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4
81  %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z()
82  %tmp6 = and i32 %tmp5, 65535
83  %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
84  store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4
85  ret void
86}
87
88; GCN-LABEL: {{^}}func_test_workitem_id_x_known_max_range:
89; O2-NOT: and_b32
90; O2: v_and_b32_e32 v{{[0-9]+}}, 0x3ff,
91; O2-NOT: and_b32
92
93; OPT-LABEL: @func_test_workitem_id_x_known_max_range(
94; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
95define void @func_test_workitem_id_x_known_max_range(i32 addrspace(1)* nocapture %out) #0 {
96entry:
97  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
98  %and = and i32 %id, 1023
99  store i32 %and, i32 addrspace(1)* %out, align 4
100  ret void
101}
102
103; GCN-LABEL: {{^}}func_test_workitem_id_x_default_range:
104; O2-NOT: and_b32
105; O2: v_and_b32_e32 v{{[0-9]+}}, 0x3ff,
106; O2-NOT: and_b32
107
108; OPT-LABEL: @func_test_workitem_id_x_default_range(
109; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !7
110define void @func_test_workitem_id_x_default_range(i32 addrspace(1)* nocapture %out) #4 {
111entry:
112  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
113  %and = and i32 %id, 1023
114  store i32 %and, i32 addrspace(1)* %out, align 4
115  ret void
116}
117
118declare i32 @llvm.amdgcn.workitem.id.x() #2
119
120declare i32 @llvm.amdgcn.workitem.id.y() #2
121
122declare i32 @llvm.amdgcn.workitem.id.z() #2
123
124attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,128" }
125attributes #1 = { nounwind "amdgpu-flat-work-group-size"="512,512" }
126attributes #2 = { nounwind readnone speculatable }
127attributes #3 = { nounwind readnone }
128attributes #4 = { nounwind }
129
130!0 = !{i32 32, i32 4, i32 1}
131!1 = !{i32 1, i32 1, i32 1}
132
133; OPT: !0 = !{i32 0, i32 128}
134; OPT: !1 = !{i32 32, i32 4, i32 1}
135; OPT: !2 = !{i32 0, i32 32}
136; OPT: !3 = !{i32 0, i32 4}
137; OPT: !4 = !{i32 0, i32 1}
138; OPT: !5 = !{i32 1, i32 1, i32 1}
139; OPT: !6 = !{i32 0, i32 512}
140; OPT: !7 = !{i32 0, i32 1024}
141