1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4
5; Test gfx9+ s_shl[1-4]_add_u32 pattern matching
6
7define amdgpu_ps i32 @s_shl1_add_u32(i32 inreg %src0, i32 inreg %src1) {
8; GFX9-LABEL: s_shl1_add_u32:
9; GFX9:       ; %bb.0:
10; GFX9-NEXT:    s_lshl1_add_u32 s0, s0, s1
11; GFX9-NEXT:    ; return to shader part epilog
12;
13; GFX8-LABEL: s_shl1_add_u32:
14; GFX8:       ; %bb.0:
15; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
16; GFX8-NEXT:    s_add_i32 s0, s0, s1
17; GFX8-NEXT:    ; return to shader part epilog
18  %shl = shl i32 %src0, 1
19  %add = add i32 %shl, %src1
20  ret i32 %add
21}
22
23define amdgpu_ps i32 @s_shl2_add_u32(i32 inreg %src0, i32 inreg %src1) {
24; GFX9-LABEL: s_shl2_add_u32:
25; GFX9:       ; %bb.0:
26; GFX9-NEXT:    s_lshl2_add_u32 s0, s0, s1
27; GFX9-NEXT:    ; return to shader part epilog
28;
29; GFX8-LABEL: s_shl2_add_u32:
30; GFX8:       ; %bb.0:
31; GFX8-NEXT:    s_lshl_b32 s0, s0, 2
32; GFX8-NEXT:    s_add_i32 s0, s0, s1
33; GFX8-NEXT:    ; return to shader part epilog
34  %shl = shl i32 %src0, 2
35  %add = add i32 %shl, %src1
36  ret i32 %add
37}
38
39define amdgpu_ps i32 @s_shl3_add_u32(i32 inreg %src0, i32 inreg %src1) {
40; GFX9-LABEL: s_shl3_add_u32:
41; GFX9:       ; %bb.0:
42; GFX9-NEXT:    s_lshl3_add_u32 s0, s0, s1
43; GFX9-NEXT:    ; return to shader part epilog
44;
45; GFX8-LABEL: s_shl3_add_u32:
46; GFX8:       ; %bb.0:
47; GFX8-NEXT:    s_lshl_b32 s0, s0, 3
48; GFX8-NEXT:    s_add_i32 s0, s0, s1
49; GFX8-NEXT:    ; return to shader part epilog
50  %shl = shl i32 %src0, 3
51  %add = add i32 %shl, %src1
52  ret i32 %add
53}
54
55define amdgpu_ps i32 @s_shl4_add_u32(i32 inreg %src0, i32 inreg %src1) {
56; GFX9-LABEL: s_shl4_add_u32:
57; GFX9:       ; %bb.0:
58; GFX9-NEXT:    s_lshl4_add_u32 s0, s0, s1
59; GFX9-NEXT:    ; return to shader part epilog
60;
61; GFX8-LABEL: s_shl4_add_u32:
62; GFX8:       ; %bb.0:
63; GFX8-NEXT:    s_lshl_b32 s0, s0, 4
64; GFX8-NEXT:    s_add_i32 s0, s0, s1
65; GFX8-NEXT:    ; return to shader part epilog
66  %shl = shl i32 %src0, 4
67  %add = add i32 %shl, %src1
68  ret i32 %add
69}
70
71define amdgpu_ps i32 @s_shl5_add_u32(i32 inreg %src0, i32 inreg %src1) {
72; GCN-LABEL: s_shl5_add_u32:
73; GCN:       ; %bb.0:
74; GCN-NEXT:    s_lshl_b32 s0, s0, 5
75; GCN-NEXT:    s_add_i32 s0, s0, s1
76; GCN-NEXT:    ; return to shader part epilog
77  %shl = shl i32 %src0, 5
78  %add = add i32 %shl, %src1
79  ret i32 %add
80}
81
82define i32 @v_shl1_add_u32(i32 %src0, i32 %src1) {
83; GFX9-LABEL: v_shl1_add_u32:
84; GFX9:       ; %bb.0:
85; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86; GFX9-NEXT:    v_lshl_add_u32 v0, v0, 1, v1
87; GFX9-NEXT:    s_setpc_b64 s[30:31]
88;
89; GFX8-LABEL: v_shl1_add_u32:
90; GFX8:       ; %bb.0:
91; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
92; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
93; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
94; GFX8-NEXT:    s_setpc_b64 s[30:31]
95  %shl = shl i32 %src0, 1
96  %add = add i32 %shl, %src1
97  ret i32 %add
98}
99
100define i32 @v_shl2_add_u32(i32 %src0, i32 %src1) {
101; GFX9-LABEL: v_shl2_add_u32:
102; GFX9:       ; %bb.0:
103; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104; GFX9-NEXT:    v_lshl_add_u32 v0, v0, 2, v1
105; GFX9-NEXT:    s_setpc_b64 s[30:31]
106;
107; GFX8-LABEL: v_shl2_add_u32:
108; GFX8:       ; %bb.0:
109; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
111; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
112; GFX8-NEXT:    s_setpc_b64 s[30:31]
113  %shl = shl i32 %src0, 2
114  %add = add i32 %shl, %src1
115  ret i32 %add
116}
117
118define i32 @v_shl3_add_u32(i32 %src0, i32 %src1) {
119; GFX9-LABEL: v_shl3_add_u32:
120; GFX9:       ; %bb.0:
121; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122; GFX9-NEXT:    v_lshl_add_u32 v0, v0, 3, v1
123; GFX9-NEXT:    s_setpc_b64 s[30:31]
124;
125; GFX8-LABEL: v_shl3_add_u32:
126; GFX8:       ; %bb.0:
127; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
128; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
129; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
130; GFX8-NEXT:    s_setpc_b64 s[30:31]
131  %shl = shl i32 %src0, 3
132  %add = add i32 %shl, %src1
133  ret i32 %add
134}
135
136define i32 @v_shl4_add_u32(i32 %src0, i32 %src1) {
137; GFX9-LABEL: v_shl4_add_u32:
138; GFX9:       ; %bb.0:
139; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140; GFX9-NEXT:    v_lshl_add_u32 v0, v0, 4, v1
141; GFX9-NEXT:    s_setpc_b64 s[30:31]
142;
143; GFX8-LABEL: v_shl4_add_u32:
144; GFX8:       ; %bb.0:
145; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
146; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
147; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
148; GFX8-NEXT:    s_setpc_b64 s[30:31]
149  %shl = shl i32 %src0, 4
150  %add = add i32 %shl, %src1
151  ret i32 %add
152}
153
154define i32 @v_shl5_add_u32(i32 %src0, i32 %src1) {
155; GFX9-LABEL: v_shl5_add_u32:
156; GFX9:       ; %bb.0:
157; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
158; GFX9-NEXT:    v_lshl_add_u32 v0, v0, 5, v1
159; GFX9-NEXT:    s_setpc_b64 s[30:31]
160;
161; GFX8-LABEL: v_shl5_add_u32:
162; GFX8:       ; %bb.0:
163; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
165; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
166; GFX8-NEXT:    s_setpc_b64 s[30:31]
167  %shl = shl i32 %src0, 5
168  %add = add i32 %shl, %src1
169  ret i32 %add
170}
171
172; FIXME: Use v_lshl_add_u32
173; shift is scalar, but add is vector.
174define amdgpu_ps float @shl1_add_u32_vgpr1(i32 inreg %src0, i32 %src1) {
175; GFX9-LABEL: shl1_add_u32_vgpr1:
176; GFX9:       ; %bb.0:
177; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
178; GFX9-NEXT:    v_add_u32_e32 v0, s0, v0
179; GFX9-NEXT:    ; return to shader part epilog
180;
181; GFX8-LABEL: shl1_add_u32_vgpr1:
182; GFX8:       ; %bb.0:
183; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
184; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
185; GFX8-NEXT:    ; return to shader part epilog
186  %shl = shl i32 %src0, 1
187  %add = add i32 %shl, %src1
188  %cast = bitcast i32 %add to float
189  ret float %cast
190}
191
192define amdgpu_ps float @shl2_add_u32_vgpr1(i32 inreg %src0, i32 %src1) {
193; GFX9-LABEL: shl2_add_u32_vgpr1:
194; GFX9:       ; %bb.0:
195; GFX9-NEXT:    s_lshl_b32 s0, s0, 2
196; GFX9-NEXT:    v_add_u32_e32 v0, s0, v0
197; GFX9-NEXT:    ; return to shader part epilog
198;
199; GFX8-LABEL: shl2_add_u32_vgpr1:
200; GFX8:       ; %bb.0:
201; GFX8-NEXT:    s_lshl_b32 s0, s0, 2
202; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
203; GFX8-NEXT:    ; return to shader part epilog
204  %shl = shl i32 %src0, 2
205  %add = add i32 %shl, %src1
206  %cast = bitcast i32 %add to float
207  ret float %cast
208}
209
210define amdgpu_ps float @shl3_add_u32_vgpr1(i32 inreg %src0, i32 %src1) {
211; GFX9-LABEL: shl3_add_u32_vgpr1:
212; GFX9:       ; %bb.0:
213; GFX9-NEXT:    s_lshl_b32 s0, s0, 3
214; GFX9-NEXT:    v_add_u32_e32 v0, s0, v0
215; GFX9-NEXT:    ; return to shader part epilog
216;
217; GFX8-LABEL: shl3_add_u32_vgpr1:
218; GFX8:       ; %bb.0:
219; GFX8-NEXT:    s_lshl_b32 s0, s0, 3
220; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
221; GFX8-NEXT:    ; return to shader part epilog
222  %shl = shl i32 %src0, 3
223  %add = add i32 %shl, %src1
224  %cast = bitcast i32 %add to float
225  ret float %cast
226}
227
228define amdgpu_ps float @shl4_add_u32_vgpr1(i32 inreg %src0, i32 %src1) {
229; GFX9-LABEL: shl4_add_u32_vgpr1:
230; GFX9:       ; %bb.0:
231; GFX9-NEXT:    s_lshl_b32 s0, s0, 4
232; GFX9-NEXT:    v_add_u32_e32 v0, s0, v0
233; GFX9-NEXT:    ; return to shader part epilog
234;
235; GFX8-LABEL: shl4_add_u32_vgpr1:
236; GFX8:       ; %bb.0:
237; GFX8-NEXT:    s_lshl_b32 s0, s0, 4
238; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
239; GFX8-NEXT:    ; return to shader part epilog
240  %shl = shl i32 %src0, 4
241  %add = add i32 %shl, %src1
242  %cast = bitcast i32 %add to float
243  ret float %cast
244}
245
246define amdgpu_ps float @shl5_add_u32_vgpr1(i32 inreg %src0, i32 %src1) {
247; GFX9-LABEL: shl5_add_u32_vgpr1:
248; GFX9:       ; %bb.0:
249; GFX9-NEXT:    s_lshl_b32 s0, s0, 5
250; GFX9-NEXT:    v_add_u32_e32 v0, s0, v0
251; GFX9-NEXT:    ; return to shader part epilog
252;
253; GFX8-LABEL: shl5_add_u32_vgpr1:
254; GFX8:       ; %bb.0:
255; GFX8-NEXT:    s_lshl_b32 s0, s0, 5
256; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
257; GFX8-NEXT:    ; return to shader part epilog
258  %shl = shl i32 %src0, 5
259  %add = add i32 %shl, %src1
260  %cast = bitcast i32 %add to float
261  ret float %cast
262}
263
264define amdgpu_ps <2 x i32> @s_shl1_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) {
265; GFX9-LABEL: s_shl1_add_u32_v2:
266; GFX9:       ; %bb.0:
267; GFX9-NEXT:    s_lshl1_add_u32 s0, s0, s2
268; GFX9-NEXT:    s_lshl1_add_u32 s1, s1, s3
269; GFX9-NEXT:    ; return to shader part epilog
270;
271; GFX8-LABEL: s_shl1_add_u32_v2:
272; GFX8:       ; %bb.0:
273; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
274; GFX8-NEXT:    s_lshl_b32 s1, s1, 1
275; GFX8-NEXT:    s_add_i32 s0, s0, s2
276; GFX8-NEXT:    s_add_i32 s1, s1, s3
277; GFX8-NEXT:    ; return to shader part epilog
278  %shl = shl <2 x i32> %src0, <i32 1, i32 1>
279  %add = add <2 x i32> %shl, %src1
280  ret <2 x i32> %add
281}
282
283define amdgpu_ps <2 x i32> @s_shl2_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) {
284; GFX9-LABEL: s_shl2_add_u32_v2:
285; GFX9:       ; %bb.0:
286; GFX9-NEXT:    s_lshl2_add_u32 s0, s0, s2
287; GFX9-NEXT:    s_lshl2_add_u32 s1, s1, s3
288; GFX9-NEXT:    ; return to shader part epilog
289;
290; GFX8-LABEL: s_shl2_add_u32_v2:
291; GFX8:       ; %bb.0:
292; GFX8-NEXT:    s_lshl_b32 s0, s0, 2
293; GFX8-NEXT:    s_lshl_b32 s1, s1, 2
294; GFX8-NEXT:    s_add_i32 s0, s0, s2
295; GFX8-NEXT:    s_add_i32 s1, s1, s3
296; GFX8-NEXT:    ; return to shader part epilog
297  %shl = shl <2 x i32> %src0, <i32 2, i32 2>
298  %add = add <2 x i32> %shl, %src1
299  ret <2 x i32> %add
300}
301
302define amdgpu_ps <2 x i32> @s_shl3_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) {
303; GFX9-LABEL: s_shl3_add_u32_v2:
304; GFX9:       ; %bb.0:
305; GFX9-NEXT:    s_lshl3_add_u32 s0, s0, s2
306; GFX9-NEXT:    s_lshl3_add_u32 s1, s1, s3
307; GFX9-NEXT:    ; return to shader part epilog
308;
309; GFX8-LABEL: s_shl3_add_u32_v2:
310; GFX8:       ; %bb.0:
311; GFX8-NEXT:    s_lshl_b32 s0, s0, 3
312; GFX8-NEXT:    s_lshl_b32 s1, s1, 3
313; GFX8-NEXT:    s_add_i32 s0, s0, s2
314; GFX8-NEXT:    s_add_i32 s1, s1, s3
315; GFX8-NEXT:    ; return to shader part epilog
316  %shl = shl <2 x i32> %src0, <i32 3, i32 3>
317  %add = add <2 x i32> %shl, %src1
318  ret <2 x i32> %add
319}
320
321define amdgpu_ps <2 x i32> @s_shl4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) {
322; GFX9-LABEL: s_shl4_add_u32_v2:
323; GFX9:       ; %bb.0:
324; GFX9-NEXT:    s_lshl4_add_u32 s0, s0, s2
325; GFX9-NEXT:    s_lshl4_add_u32 s1, s1, s3
326; GFX9-NEXT:    ; return to shader part epilog
327;
328; GFX8-LABEL: s_shl4_add_u32_v2:
329; GFX8:       ; %bb.0:
330; GFX8-NEXT:    s_lshl_b32 s0, s0, 4
331; GFX8-NEXT:    s_lshl_b32 s1, s1, 4
332; GFX8-NEXT:    s_add_i32 s0, s0, s2
333; GFX8-NEXT:    s_add_i32 s1, s1, s3
334; GFX8-NEXT:    ; return to shader part epilog
335  %shl = shl <2 x i32> %src0, <i32 4, i32 4>
336  %add = add <2 x i32> %shl, %src1
337  ret <2 x i32> %add
338}
339
340define amdgpu_ps <2 x i32> @s_shl_2_4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) {
341; GFX9-LABEL: s_shl_2_4_add_u32_v2:
342; GFX9:       ; %bb.0:
343; GFX9-NEXT:    s_lshl2_add_u32 s0, s0, s2
344; GFX9-NEXT:    s_lshl4_add_u32 s1, s1, s3
345; GFX9-NEXT:    ; return to shader part epilog
346;
347; GFX8-LABEL: s_shl_2_4_add_u32_v2:
348; GFX8:       ; %bb.0:
349; GFX8-NEXT:    s_lshl_b32 s0, s0, 2
350; GFX8-NEXT:    s_lshl_b32 s1, s1, 4
351; GFX8-NEXT:    s_add_i32 s0, s0, s2
352; GFX8-NEXT:    s_add_i32 s1, s1, s3
353; GFX8-NEXT:    ; return to shader part epilog
354  %shl = shl <2 x i32> %src0, <i32 2, i32 4>
355  %add = add <2 x i32> %shl, %src1
356  ret <2 x i32> %add
357}
358
359define amdgpu_ps { i32, i32 } @s_shl4_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) {
360; GCN-LABEL: s_shl4_add_u32_multi_use:
361; GCN:       ; %bb.0:
362; GCN-NEXT:    s_lshl_b32 s0, s0, 4
363; GCN-NEXT:    s_add_i32 s1, s0, s1
364; GCN-NEXT:    ; return to shader part epilog
365  %shl = shl i32 %src0, 4
366  %add = add i32 %shl, %src1
367  %insert0 = insertvalue { i32, i32 } undef, i32 %shl, 0
368  %insert1 = insertvalue { i32, i32 } %insert0, i32 %add, 1
369  ret { i32, i32 } %insert1
370}
371
372define amdgpu_ps { i32, i32 } @s_shl3_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) {
373; GCN-LABEL: s_shl3_add_u32_multi_use:
374; GCN:       ; %bb.0:
375; GCN-NEXT:    s_lshl_b32 s0, s0, 3
376; GCN-NEXT:    s_add_i32 s1, s0, s1
377; GCN-NEXT:    ; return to shader part epilog
378  %shl = shl i32 %src0, 3
379  %add = add i32 %shl, %src1
380  %insert0 = insertvalue { i32, i32 } undef, i32 %shl, 0
381  %insert1 = insertvalue { i32, i32 } %insert0, i32 %add, 1
382  ret { i32, i32 } %insert1
383}
384
385define amdgpu_ps { i32, i32 } @s_shl2_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) {
386; GCN-LABEL: s_shl2_add_u32_multi_use:
387; GCN:       ; %bb.0:
388; GCN-NEXT:    s_lshl_b32 s0, s0, 2
389; GCN-NEXT:    s_add_i32 s1, s0, s1
390; GCN-NEXT:    ; return to shader part epilog
391  %shl = shl i32 %src0, 2
392  %add = add i32 %shl, %src1
393  %insert0 = insertvalue { i32, i32 } undef, i32 %shl, 0
394  %insert1 = insertvalue { i32, i32 } %insert0, i32 %add, 1
395  ret { i32, i32 } %insert1
396}
397
398
399define amdgpu_ps { i32, i32 } @s_shl1_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) {
400; GCN-LABEL: s_shl1_add_u32_multi_use:
401; GCN:       ; %bb.0:
402; GCN-NEXT:    s_lshl_b32 s0, s0, 1
403; GCN-NEXT:    s_add_i32 s1, s0, s1
404; GCN-NEXT:    ; return to shader part epilog
405  %shl = shl i32 %src0, 1
406  %add = add i32 %shl, %src1
407  %insert0 = insertvalue { i32, i32 } undef, i32 %shl, 0
408  %insert1 = insertvalue { i32, i32 } %insert0, i32 %add, 1
409  ret { i32, i32 } %insert1
410}
411