1; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
2
3declare void @llvm.AMDGPU.barrier.local() #2
4declare i32 @llvm.r600.read.tidig.x() #0
5
6@lds.obj = addrspace(3) global [256 x i32] undef, align 4
7
8; GCN-LABEL: {{^}}write_ds_sub0_offset0_global:
9; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 2, v0
10; GCN: v_sub_i32_e32 [[BASEPTR:v[0-9]+]], vcc, 0, [[SHL]]
11; GCN: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b
12; GCN: ds_write_b32 [[BASEPTR]], [[VAL]] offset:12
13define void @write_ds_sub0_offset0_global() #0 {
14entry:
15  %x.i = call i32 @llvm.r600.read.tidig.x() #1
16  %sub1 = sub i32 0, %x.i
17  %tmp0 = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds.obj, i32 0, i32 %sub1
18  %arrayidx = getelementptr inbounds i32, i32 addrspace(3)* %tmp0, i32 3
19  store i32 123, i32 addrspace(3)* %arrayidx
20  ret void
21}
22
23; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_max_offset:
24; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
25; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]]
26; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13
27; GCN: ds_write_b8 [[NEG]], [[K]] offset:65535
28define void @add_x_shl_neg_to_sub_max_offset() #1 {
29  %x.i = call i32 @llvm.r600.read.tidig.x() #0
30  %neg = sub i32 0, %x.i
31  %shl = shl i32 %neg, 2
32  %add = add i32 65535, %shl
33  %ptr = inttoptr i32 %add to i8 addrspace(3)*
34  store i8 13, i8 addrspace(3)* %ptr
35  ret void
36}
37
38; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_max_offset_p1:
39; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
40; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0x10000, [[SCALED]]
41; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13
42; GCN: ds_write_b8 [[NEG]], [[K]]{{$}}
43define void @add_x_shl_neg_to_sub_max_offset_p1() #1 {
44  %x.i = call i32 @llvm.r600.read.tidig.x() #0
45  %neg = sub i32 0, %x.i
46  %shl = shl i32 %neg, 2
47  %add = add i32 65536, %shl
48  %ptr = inttoptr i32 %add to i8 addrspace(3)*
49  store i8 13, i8 addrspace(3)* %ptr
50  ret void
51}
52
53; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_multi_use:
54; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
55; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]]
56; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13
57; GCN-NOT: v_sub
58; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}}
59; GCN-NOT: v_sub
60; GCN: ds_write_b32 [[NEG]], [[K]] offset:456{{$}}
61; GCN: s_endpgm
62define void @add_x_shl_neg_to_sub_multi_use() #1 {
63  %x.i = call i32 @llvm.r600.read.tidig.x() #0
64  %neg = sub i32 0, %x.i
65  %shl = shl i32 %neg, 2
66  %add0 = add i32 123, %shl
67  %add1 = add i32 456, %shl
68  %ptr0 = inttoptr i32 %add0 to i32 addrspace(3)*
69  store volatile i32 13, i32 addrspace(3)* %ptr0
70  %ptr1 = inttoptr i32 %add1 to i32 addrspace(3)*
71  store volatile i32 13, i32 addrspace(3)* %ptr1
72  ret void
73}
74
75; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_multi_use_same_offset:
76; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
77; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]]
78; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13
79; GCN-NOT: v_sub
80; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}}
81; GCN-NOT: v_sub
82; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}}
83; GCN: s_endpgm
84define void @add_x_shl_neg_to_sub_multi_use_same_offset() #1 {
85  %x.i = call i32 @llvm.r600.read.tidig.x() #0
86  %neg = sub i32 0, %x.i
87  %shl = shl i32 %neg, 2
88  %add = add i32 123, %shl
89  %ptr = inttoptr i32 %add to i32 addrspace(3)*
90  store volatile i32 13, i32 addrspace(3)* %ptr
91  store volatile i32 13, i32 addrspace(3)* %ptr
92  ret void
93}
94
95; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_misaligned_i64_max_offset:
96; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
97; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]]
98; GCN: ds_write2_b32 [[NEG]], {{v[0-9]+}}, {{v[0-9]+}} offset0:254 offset1:255
99define void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 {
100  %x.i = call i32 @llvm.r600.read.tidig.x() #0
101  %neg = sub i32 0, %x.i
102  %shl = shl i32 %neg, 2
103  %add = add i32 1019, %shl
104  %ptr = inttoptr i32 %add to i64 addrspace(3)*
105  store i64 123, i64 addrspace(3)* %ptr, align 4
106  ret void
107}
108
109; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1:
110; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
111; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0x3fc, [[SCALED]]
112; GCN: ds_write2_b32 [[NEG]], {{v[0-9]+}}, {{v[0-9]+}} offset1:1{{$}}
113define void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1() #1 {
114  %x.i = call i32 @llvm.r600.read.tidig.x() #0
115  %neg = sub i32 0, %x.i
116  %shl = shl i32 %neg, 2
117  %add = add i32 1020, %shl
118  %ptr = inttoptr i32 %add to i64 addrspace(3)*
119  store i64 123, i64 addrspace(3)* %ptr, align 4
120  ret void
121}
122
123attributes #0 = { nounwind readnone }
124attributes #1 = { nounwind }
125attributes #2 = { nounwind convergent }
126