1; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s
2
3; FUNC-LABEL: {{^}}s_add_i32:
4; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
5define amdgpu_kernel void @s_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
6  %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
7  %a = load i32, i32 addrspace(1)* %in
8  %b = load i32, i32 addrspace(1)* %b_ptr
9  %result = add i32 %a, %b
10  store i32 %result, i32 addrspace(1)* %out
11  ret void
12}
13
14; FUNC-LABEL: {{^}}s_add_v2i32:
15; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
16; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
17define amdgpu_kernel void @s_add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
18  %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
19  %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
20  %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
21  %result = add <2 x i32> %a, %b
22  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
23  ret void
24}
25
26; FUNC-LABEL: {{^}}s_add_v4i32:
27; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
28; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
29; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
30; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
31define amdgpu_kernel void @s_add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
32  %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
33  %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
34  %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
35  %result = add <4 x i32> %a, %b
36  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
37  ret void
38}
39
40; FUNC-LABEL: {{^}}s_add_v8i32:
41; EG: ADD_INT
42; EG: ADD_INT
43; EG: ADD_INT
44; EG: ADD_INT
45; EG: ADD_INT
46; EG: ADD_INT
47; EG: ADD_INT
48; EG: ADD_INT
49define amdgpu_kernel void @s_add_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) {
50entry:
51  %0 = add <8 x i32> %a, %b
52  store <8 x i32> %0, <8 x i32> addrspace(1)* %out
53  ret void
54}
55
56; FUNC-LABEL: {{^}}s_add_v16i32:
57; EG: ADD_INT
58; EG: ADD_INT
59; EG: ADD_INT
60; EG: ADD_INT
61; EG: ADD_INT
62; EG: ADD_INT
63; EG: ADD_INT
64; EG: ADD_INT
65; EG: ADD_INT
66; EG: ADD_INT
67; EG: ADD_INT
68; EG: ADD_INT
69; EG: ADD_INT
70; EG: ADD_INT
71; EG: ADD_INT
72; EG: ADD_INT
73define amdgpu_kernel void @s_add_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) {
74entry:
75  %0 = add <16 x i32> %a, %b
76  store <16 x i32> %0, <16 x i32> addrspace(1)* %out
77  ret void
78}
79
80; FUNC-LABEL: {{^}}v_add_i32:
81define amdgpu_kernel void @v_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
82  %tid = call i32 @llvm.r600.read.tidig.x()
83  %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid
84  %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1
85  %a = load volatile i32, i32 addrspace(1)* %gep
86  %b = load volatile i32, i32 addrspace(1)* %b_ptr
87  %result = add i32 %a, %b
88  store i32 %result, i32 addrspace(1)* %out
89  ret void
90}
91
92; FUNC-LABEL: {{^}}v_add_imm_i32:
93define amdgpu_kernel void @v_add_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
94  %tid = call i32 @llvm.r600.read.tidig.x()
95  %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid
96  %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1
97  %a = load volatile i32, i32 addrspace(1)* %gep
98  %result = add i32 %a, 123
99  store i32 %result, i32 addrspace(1)* %out
100  ret void
101}
102
103; FUNC-LABEL: {{^}}add64:
104; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]]
105; EG-DAG: ADD_INT {{[* ]*}}
106; EG-DAG: ADDC_UINT
107; EG-DAG: ADD_INT
108; EG-DAG: ADD_INT {{[* ]*}}
109; EG-NOT: SUB
110define amdgpu_kernel void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
111entry:
112  %add = add i64 %a, %b
113  store i64 %add, i64 addrspace(1)* %out
114  ret void
115}
116
117; The v_addc_u32 and v_add_i32 instruction can't read SGPRs, because they
118; use VCC.  The test is designed so that %a will be stored in an SGPR and
119; %0 will be stored in a VGPR, so the comiler will be forced to copy %a
120; to a VGPR before doing the add.
121
122; FUNC-LABEL: {{^}}add64_sgpr_vgpr:
123; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]]
124; EG-DAG: ADD_INT {{[* ]*}}
125; EG-DAG: ADDC_UINT
126; EG-DAG: ADD_INT
127; EG-DAG: ADD_INT {{[* ]*}}
128; EG-NOT: SUB
129define amdgpu_kernel void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) {
130entry:
131  %0 = load i64, i64 addrspace(1)* %in
132  %1 = add i64 %a, %0
133  store i64 %1, i64 addrspace(1)* %out
134  ret void
135}
136
137; Test i64 add inside a branch.
138; FUNC-LABEL: {{^}}add64_in_branch:
139; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]]
140; EG-DAG: ADD_INT {{[* ]*}}
141; EG-DAG: ADDC_UINT
142; EG-DAG: ADD_INT
143; EG-DAG: ADD_INT {{[* ]*}}
144; EG-NOT: SUB
145define amdgpu_kernel void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
146entry:
147  %0 = icmp eq i64 %a, 0
148  br i1 %0, label %if, label %else
149
150if:
151  %1 = load i64, i64 addrspace(1)* %in
152  br label %endif
153
154else:
155  %2 = add i64 %a, %b
156  br label %endif
157
158endif:
159  %3 = phi i64 [%1, %if], [%2, %else]
160  store i64 %3, i64 addrspace(1)* %out
161  ret void
162}
163
164declare i32 @llvm.r600.read.tidig.x() #1
165
166attributes #0 = { nounwind }
167attributes #1 = { nounwind readnone speculatable }
168