1; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
2
3declare i32 @llvm.amdgcn.workitem.id.x() #0
4
5@lds.obj = addrspace(3) global [256 x i32] undef, align 4
6
7; GCN-LABEL: {{^}}write_ds_sub0_offset0_global:
8; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 2, v0
9; GCN: v_sub_i32_e32 [[BASEPTR:v[0-9]+]], vcc, 0, [[SHL]]
10; GCN: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b
11; GCN: ds_write_b32 [[BASEPTR]], [[VAL]] offset:12
12define void @write_ds_sub0_offset0_global() #0 {
13entry:
14  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #1
15  %sub1 = sub i32 0, %x.i
16  %tmp0 = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds.obj, i32 0, i32 %sub1
17  %arrayidx = getelementptr inbounds i32, i32 addrspace(3)* %tmp0, i32 3
18  store i32 123, i32 addrspace(3)* %arrayidx
19  ret void
20}
21
22; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_max_offset:
23; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
24; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]]
25; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13
26; GCN: ds_write_b8 [[NEG]], [[K]] offset:65535
27define void @add_x_shl_neg_to_sub_max_offset() #1 {
28  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
29  %neg = sub i32 0, %x.i
30  %shl = shl i32 %neg, 2
31  %add = add i32 65535, %shl
32  %ptr = inttoptr i32 %add to i8 addrspace(3)*
33  store i8 13, i8 addrspace(3)* %ptr
34  ret void
35}
36
37; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_max_offset_p1:
38; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
39; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0x10000, [[SCALED]]
40; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13
41; GCN: ds_write_b8 [[NEG]], [[K]]{{$}}
42define void @add_x_shl_neg_to_sub_max_offset_p1() #1 {
43  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
44  %neg = sub i32 0, %x.i
45  %shl = shl i32 %neg, 2
46  %add = add i32 65536, %shl
47  %ptr = inttoptr i32 %add to i8 addrspace(3)*
48  store i8 13, i8 addrspace(3)* %ptr
49  ret void
50}
51
52; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_multi_use:
53; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
54; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]]
55; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13
56; GCN-NOT: v_sub
57; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}}
58; GCN-NOT: v_sub
59; GCN: ds_write_b32 [[NEG]], [[K]] offset:456{{$}}
60; GCN: s_endpgm
61define void @add_x_shl_neg_to_sub_multi_use() #1 {
62  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
63  %neg = sub i32 0, %x.i
64  %shl = shl i32 %neg, 2
65  %add0 = add i32 123, %shl
66  %add1 = add i32 456, %shl
67  %ptr0 = inttoptr i32 %add0 to i32 addrspace(3)*
68  store volatile i32 13, i32 addrspace(3)* %ptr0
69  %ptr1 = inttoptr i32 %add1 to i32 addrspace(3)*
70  store volatile i32 13, i32 addrspace(3)* %ptr1
71  ret void
72}
73
74; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_multi_use_same_offset:
75; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
76; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]]
77; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13
78; GCN-NOT: v_sub
79; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}}
80; GCN-NOT: v_sub
81; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}}
82; GCN: s_endpgm
83define void @add_x_shl_neg_to_sub_multi_use_same_offset() #1 {
84  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
85  %neg = sub i32 0, %x.i
86  %shl = shl i32 %neg, 2
87  %add = add i32 123, %shl
88  %ptr = inttoptr i32 %add to i32 addrspace(3)*
89  store volatile i32 13, i32 addrspace(3)* %ptr
90  store volatile i32 13, i32 addrspace(3)* %ptr
91  ret void
92}
93
94; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_misaligned_i64_max_offset:
95; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
96; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]]
97; GCN: ds_write2_b32 [[NEG]], {{v[0-9]+}}, {{v[0-9]+}} offset0:254 offset1:255
98define void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 {
99  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
100  %neg = sub i32 0, %x.i
101  %shl = shl i32 %neg, 2
102  %add = add i32 1019, %shl
103  %ptr = inttoptr i32 %add to i64 addrspace(3)*
104  store i64 123, i64 addrspace(3)* %ptr, align 4
105  ret void
106}
107
108; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1:
109; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
110; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0x3fc, [[SCALED]]
111; GCN: ds_write2_b32 [[NEG]], {{v[0-9]+}}, {{v[0-9]+}} offset1:1{{$}}
112define void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1() #1 {
113  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
114  %neg = sub i32 0, %x.i
115  %shl = shl i32 %neg, 2
116  %add = add i32 1020, %shl
117  %ptr = inttoptr i32 %add to i64 addrspace(3)*
118  store i64 123, i64 addrspace(3)* %ptr, align 4
119  ret void
120}
121
122attributes #0 = { nounwind readnone }
123attributes #1 = { nounwind }
124attributes #2 = { nounwind convergent }
125