1; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
2; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
3
4; Tests for indirect addressing on SI, which is implemented using dynamic
5; indexing of vectors.
6
7; CHECK-LABEL: {{^}}extract_w_offset:
8; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0
9; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000
10; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0
11; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0
12; CHECK: s_mov_b32 m0
13; CHECK-NEXT: v_movrels_b32_e32
14define void @extract_w_offset(float addrspace(1)* %out, i32 %in) {
15entry:
16  %idx = add i32 %in, 1
17  %elt = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %idx
18  store float %elt, float addrspace(1)* %out
19  ret void
20}
21
22; XXX: Could do v_or_b32 directly
23; CHECK-LABEL: {{^}}extract_w_offset_salu_use_vector:
24; CHECK-DAG: s_or_b32
25; CHECK-DAG: s_or_b32
26; CHECK-DAG: s_or_b32
27; CHECK-DAG: s_or_b32
28; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
29; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
30; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
31; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
32; CHECK: s_mov_b32 m0
33; CHECK-NEXT: v_movrels_b32_e32
34define void @extract_w_offset_salu_use_vector(i32 addrspace(1)* %out, i32 %in, <4 x i32> %or.val) {
35entry:
36  %idx = add i32 %in, 1
37  %vec = or <4 x i32> %or.val, <i32 1, i32 2, i32 3, i32 4>
38  %elt = extractelement <4 x i32> %vec, i32 %idx
39  store i32 %elt, i32 addrspace(1)* %out
40  ret void
41}
42
43; CHECK-LABEL: {{^}}extract_wo_offset:
44; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0
45; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000
46; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0
47; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0
48; CHECK: s_mov_b32 m0
49; CHECK-NEXT: v_movrels_b32_e32
50define void @extract_wo_offset(float addrspace(1)* %out, i32 %in) {
51entry:
52  %elt = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %in
53  store float %elt, float addrspace(1)* %out
54  ret void
55}
56
57; CHECK-LABEL: {{^}}extract_neg_offset_sgpr:
58; The offset depends on the register that holds the first element of the vector.
59; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
60; CHECK: v_movrels_b32_e32 v{{[0-9]}}, v0
61define void @extract_neg_offset_sgpr(i32 addrspace(1)* %out, i32 %offset) {
62entry:
63  %index = add i32 %offset, -512
64  %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
65  store i32 %value, i32 addrspace(1)* %out
66  ret void
67}
68
69; CHECK-LABEL: {{^}}extract_neg_offset_sgpr_loaded:
70; The offset depends on the register that holds the first element of the vector.
71; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
72; CHECK: v_movrels_b32_e32 v{{[0-9]}}, v0
73define void @extract_neg_offset_sgpr_loaded(i32 addrspace(1)* %out, <4 x i32> %vec0, <4 x i32> %vec1, i32 %offset) {
74entry:
75  %index = add i32 %offset, -512
76  %or = or <4 x i32> %vec0, %vec1
77  %value = extractelement <4 x i32> %or, i32 %index
78  store i32 %value, i32 addrspace(1)* %out
79  ret void
80}
81
82; CHECK-LABEL: {{^}}extract_neg_offset_vgpr:
83; The offset depends on the register that holds the first element of the vector.
84; CHECK: v_readfirstlane_b32
85; CHECK: s_add_i32 m0, m0, 0xfffffe{{[0-9a-z]+}}
86; CHECK-NEXT: v_movrels_b32_e32 v{{[0-9]}}, v0
87; CHECK: s_cbranch_execnz
88define void @extract_neg_offset_vgpr(i32 addrspace(1)* %out) {
89entry:
90  %id = call i32 @llvm.r600.read.tidig.x() #1
91  %index = add i32 %id, -512
92  %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
93  store i32 %value, i32 addrspace(1)* %out
94  ret void
95}
96
97; CHECK-LABEL: {{^}}insert_w_offset:
98; CHECK: s_mov_b32 m0
99; CHECK-NEXT: v_movreld_b32_e32
100define void @insert_w_offset(float addrspace(1)* %out, i32 %in) {
101entry:
102  %0 = add i32 %in, 1
103  %1 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %0
104  %2 = extractelement <4 x float> %1, i32 2
105  store float %2, float addrspace(1)* %out
106  ret void
107}
108
109; CHECK-LABEL: {{^}}insert_wo_offset:
110; CHECK: s_mov_b32 m0
111; CHECK-NEXT: v_movreld_b32_e32
112define void @insert_wo_offset(float addrspace(1)* %out, i32 %in) {
113entry:
114  %0 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %in
115  %1 = extractelement <4 x float> %0, i32 2
116  store float %1, float addrspace(1)* %out
117  ret void
118}
119
120; CHECK-LABEL: {{^}}insert_neg_offset_sgpr:
121; The offset depends on the register that holds the first element of the vector.
122; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
123; CHECK: v_movreld_b32_e32 v0, v{{[0-9]}}
124define void @insert_neg_offset_sgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, i32 %offset) {
125entry:
126  %index = add i32 %offset, -512
127  %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
128  store <4 x i32> %value, <4 x i32> addrspace(1)* %out
129  ret void
130}
131
132; The vector indexed into is originally loaded into an SGPR rather
133; than built with a reg_sequence
134
135; CHECK-LABEL: {{^}}insert_neg_offset_sgpr_loadreg:
136; The offset depends on the register that holds the first element of the vector.
137; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
138; CHECK: v_movreld_b32_e32 v0, v{{[0-9]}}
139define void @insert_neg_offset_sgpr_loadreg(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, <4 x i32> %vec, i32 %offset) {
140entry:
141  %index = add i32 %offset, -512
142  %value = insertelement <4 x i32> %vec, i32 5, i32 %index
143  store <4 x i32> %value, <4 x i32> addrspace(1)* %out
144  ret void
145}
146
147; CHECK-LABEL: {{^}}insert_neg_offset_vgpr:
148; The offset depends on the register that holds the first element of the vector.
149; CHECK: v_readfirstlane_b32
150; CHECK: s_add_i32 m0, m0, 0xfffffe{{[0-9a-z]+}}
151; CHECK-NEXT: v_movreld_b32_e32 v0, v{{[0-9]}}
152; CHECK: s_cbranch_execnz
153define void @insert_neg_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) {
154entry:
155  %id = call i32 @llvm.r600.read.tidig.x() #1
156  %index = add i32 %id, -512
157  %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
158  store <4 x i32> %value, <4 x i32> addrspace(1)* %out
159  ret void
160}
161
162; CHECK-LABEL: {{^}}insert_neg_inline_offset_vgpr:
163; The offset depends on the register that holds the first element of the vector.
164; CHECK: v_readfirstlane_b32
165; CHECK: s_add_i32 m0, m0, -{{[0-9]+}}
166; CHECK-NEXT: v_movreld_b32_e32 v0, v{{[0-9]}}
167; CHECK: s_cbranch_execnz
168define void @insert_neg_inline_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) {
169entry:
170  %id = call i32 @llvm.r600.read.tidig.x() #1
171  %index = add i32 %id, -16
172  %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
173  store <4 x i32> %value, <4 x i32> addrspace(1)* %out
174  ret void
175}
176
177declare i32 @llvm.r600.read.tidig.x() #1
178attributes #1 = { nounwind readnone }
179