1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX906 %s
3; RUN: llc -global-isel -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX908 %s
4; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
5; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
6
7define i32 @v_sdot2(<2 x i16> %a, <2 x i16> %b, i32 %c) {
8; GFX906-LABEL: v_sdot2:
9; GFX906:       ; %bb.0:
10; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GFX906-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2
12; GFX906-NEXT:    s_setpc_b64 s[30:31]
13;
14; GFX908-LABEL: v_sdot2:
15; GFX908:       ; %bb.0:
16; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17; GFX908-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2
18; GFX908-NEXT:    s_setpc_b64 s[30:31]
19;
20; GFX10-LABEL: v_sdot2:
21; GFX10:       ; %bb.0:
22; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
24; GFX10-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2
25; GFX10-NEXT:    s_setpc_b64 s[30:31]
26  %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %b, i32 %c, i1 false)
27  ret i32 %r
28}
29
30define i32 @v_sdot2_clamp(<2 x i16> %a, <2 x i16> %b, i32 %c) {
31; GFX906-LABEL: v_sdot2_clamp:
32; GFX906:       ; %bb.0:
33; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34; GFX906-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2 clamp
35; GFX906-NEXT:    s_setpc_b64 s[30:31]
36;
37; GFX908-LABEL: v_sdot2_clamp:
38; GFX908:       ; %bb.0:
39; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40; GFX908-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2 clamp
41; GFX908-NEXT:    s_setpc_b64 s[30:31]
42;
43; GFX10-LABEL: v_sdot2_clamp:
44; GFX10:       ; %bb.0:
45; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
47; GFX10-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2 clamp
48; GFX10-NEXT:    s_setpc_b64 s[30:31]
49  %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %b, i32 %c, i1 true)
50  ret i32 %r
51}
52
53define amdgpu_ps float @v_sdot2_sgpr_sgpr_sgpr(<2 x i16> inreg %a, <2 x i16> inreg %b, i32 inreg %c) {
54; GFX906-LABEL: v_sdot2_sgpr_sgpr_sgpr:
55; GFX906:       ; %bb.0:
56; GFX906-NEXT:    v_mov_b32_e32 v0, s1
57; GFX906-NEXT:    v_mov_b32_e32 v1, s2
58; GFX906-NEXT:    v_dot2_i32_i16 v0, s0, v0, v1
59; GFX906-NEXT:    ; return to shader part epilog
60;
61; GFX908-LABEL: v_sdot2_sgpr_sgpr_sgpr:
62; GFX908:       ; %bb.0:
63; GFX908-NEXT:    v_mov_b32_e32 v0, s1
64; GFX908-NEXT:    v_mov_b32_e32 v1, s2
65; GFX908-NEXT:    v_dot2_i32_i16 v0, s0, v0, v1
66; GFX908-NEXT:    ; return to shader part epilog
67;
68; GFX10-LABEL: v_sdot2_sgpr_sgpr_sgpr:
69; GFX10:       ; %bb.0:
70; GFX10-NEXT:    v_mov_b32_e32 v0, s2
71; GFX10-NEXT:    v_dot2_i32_i16 v0, s0, s1, v0
72; GFX10-NEXT:    ; return to shader part epilog
73  %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %b, i32 %c, i1 false)
74  %cast = bitcast i32 %r to float
75  ret float %cast
76}
77
78define i32 @v_sdot2_inline_literal_a(<2 x i16> %b, i32 %c) {
79; GFX906-LABEL: v_sdot2_inline_literal_a:
80; GFX906:       ; %bb.0:
81; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82; GFX906-NEXT:    v_dot2_i32_i16 v0, 4, v0, v1 op_sel_hi:[0,1,1]
83; GFX906-NEXT:    s_setpc_b64 s[30:31]
84;
85; GFX908-LABEL: v_sdot2_inline_literal_a:
86; GFX908:       ; %bb.0:
87; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88; GFX908-NEXT:    v_dot2_i32_i16 v0, 4, v0, v1 op_sel_hi:[0,1,1]
89; GFX908-NEXT:    s_setpc_b64 s[30:31]
90;
91; GFX10-LABEL: v_sdot2_inline_literal_a:
92; GFX10:       ; %bb.0:
93; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
95; GFX10-NEXT:    v_dot2_i32_i16 v0, 4, v0, v1 op_sel_hi:[0,1,1]
96; GFX10-NEXT:    s_setpc_b64 s[30:31]
97  %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> <i16 4, i16 4>, <2 x i16> %b, i32 %c, i1 false)
98  ret i32 %r
99}
100
101define i32 @v_sdot2_inline_literal_b(<2 x i16> %a, i32 %c) {
102; GFX906-LABEL: v_sdot2_inline_literal_b:
103; GFX906:       ; %bb.0:
104; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
105; GFX906-NEXT:    v_dot2_i32_i16 v0, v0, 4, v1 op_sel_hi:[1,0,1]
106; GFX906-NEXT:    s_setpc_b64 s[30:31]
107;
108; GFX908-LABEL: v_sdot2_inline_literal_b:
109; GFX908:       ; %bb.0:
110; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
111; GFX908-NEXT:    v_dot2_i32_i16 v0, v0, 4, v1 op_sel_hi:[1,0,1]
112; GFX908-NEXT:    s_setpc_b64 s[30:31]
113;
114; GFX10-LABEL: v_sdot2_inline_literal_b:
115; GFX10:       ; %bb.0:
116; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
117; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
118; GFX10-NEXT:    v_dot2_i32_i16 v0, v0, 4, v1 op_sel_hi:[1,0,1]
119; GFX10-NEXT:    s_setpc_b64 s[30:31]
120  %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> <i16 4, i16 4>, i32 %c, i1 false)
121  ret i32 %r
122}
123
124define i32 @v_sdot2_inline_literal_a_b(<2 x i16> %a, i32 %c) {
125; GFX906-LABEL: v_sdot2_inline_literal_a_b:
126; GFX906:       ; %bb.0:
127; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
128; GFX906-NEXT:    v_dot2_i32_i16 v0, 8, 4, v1 op_sel_hi:[0,0,1]
129; GFX906-NEXT:    s_setpc_b64 s[30:31]
130;
131; GFX908-LABEL: v_sdot2_inline_literal_a_b:
132; GFX908:       ; %bb.0:
133; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
134; GFX908-NEXT:    v_dot2_i32_i16 v0, 8, 4, v1 op_sel_hi:[0,0,1]
135; GFX908-NEXT:    s_setpc_b64 s[30:31]
136;
137; GFX10-LABEL: v_sdot2_inline_literal_a_b:
138; GFX10:       ; %bb.0:
139; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
141; GFX10-NEXT:    v_dot2_i32_i16 v0, 8, 4, v1 op_sel_hi:[0,0,1]
142; GFX10-NEXT:    s_setpc_b64 s[30:31]
143  %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> <i16 8, i16 8>, <2 x i16> <i16 4, i16 4>, i32 %c, i1 false)
144  ret i32 %r
145}
146
147define i32 @v_sdot2_inline_literal_a_b_c() {
148; GFX906-LABEL: v_sdot2_inline_literal_a_b_c:
149; GFX906:       ; %bb.0:
150; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151; GFX906-NEXT:    v_dot2_i32_i16 v0, 8, 4, 8 op_sel_hi:[0,0,1]
152; GFX906-NEXT:    s_setpc_b64 s[30:31]
153;
154; GFX908-LABEL: v_sdot2_inline_literal_a_b_c:
155; GFX908:       ; %bb.0:
156; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157; GFX908-NEXT:    v_dot2_i32_i16 v0, 8, 4, 8 op_sel_hi:[0,0,1]
158; GFX908-NEXT:    s_setpc_b64 s[30:31]
159;
160; GFX10-LABEL: v_sdot2_inline_literal_a_b_c:
161; GFX10:       ; %bb.0:
162; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
163; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
164; GFX10-NEXT:    v_dot2_i32_i16 v0, 8, 4, 8 op_sel_hi:[0,0,1]
165; GFX10-NEXT:    s_setpc_b64 s[30:31]
166  %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> <i16 8, i16 8>, <2 x i16> <i16 4, i16 4>, i32 8, i1 false)
167  ret i32 %r
168}
169
170define i32 @v_sdot2_inline_literal_c(<2 x i16> %a, <2 x i16> %b) {
171; GFX906-LABEL: v_sdot2_inline_literal_c:
172; GFX906:       ; %bb.0:
173; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174; GFX906-NEXT:    v_dot2_i32_i16 v0, v0, v1, 7
175; GFX906-NEXT:    s_setpc_b64 s[30:31]
176;
177; GFX908-LABEL: v_sdot2_inline_literal_c:
178; GFX908:       ; %bb.0:
179; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180; GFX908-NEXT:    v_dot2_i32_i16 v0, v0, v1, 7
181; GFX908-NEXT:    s_setpc_b64 s[30:31]
182;
183; GFX10-LABEL: v_sdot2_inline_literal_c:
184; GFX10:       ; %bb.0:
185; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
186; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
187; GFX10-NEXT:    v_dot2_i32_i16 v0, v0, v1, 7
188; GFX10-NEXT:    s_setpc_b64 s[30:31]
189  %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %b, i32 7, i1 false)
190  ret i32 %r
191}
192
193define i32 @v_sdot2_fneg_a(<2 x half> %a, <2 x i16> %b, i32 %c) {
194; GFX906-LABEL: v_sdot2_fneg_a:
195; GFX906:       ; %bb.0:
196; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
197; GFX906-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
198; GFX906-NEXT:    s_setpc_b64 s[30:31]
199;
200; GFX908-LABEL: v_sdot2_fneg_a:
201; GFX908:       ; %bb.0:
202; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
203; GFX908-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
204; GFX908-NEXT:    s_setpc_b64 s[30:31]
205;
206; GFX10-LABEL: v_sdot2_fneg_a:
207; GFX10:       ; %bb.0:
208; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
209; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
210; GFX10-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
211; GFX10-NEXT:    s_setpc_b64 s[30:31]
212  %neg.a = fneg <2 x half> %a
213  %cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16>
214  %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %cast.neg.a, <2 x i16> %b, i32 %c, i1 false)
215  ret i32 %r
216}
217
218define i32 @v_sdot2_fneg_b(<2 x i16> %a, <2 x half> %b, i32 %c) {
219; GFX906-LABEL: v_sdot2_fneg_b:
220; GFX906:       ; %bb.0:
221; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222; GFX906-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
223; GFX906-NEXT:    s_setpc_b64 s[30:31]
224;
225; GFX908-LABEL: v_sdot2_fneg_b:
226; GFX908:       ; %bb.0:
227; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
228; GFX908-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
229; GFX908-NEXT:    s_setpc_b64 s[30:31]
230;
231; GFX10-LABEL: v_sdot2_fneg_b:
232; GFX10:       ; %bb.0:
233; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
234; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
235; GFX10-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
236; GFX10-NEXT:    s_setpc_b64 s[30:31]
237  %neg.b = fneg <2 x half> %b
238  %cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16>
239  %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %cast.neg.b, i32 %c, i1 false)
240  ret i32 %r
241}
242
243define i32 @v_sdot2_fnegf32_c(<2 x i16> %a, <2 x i16> %b, float %c) {
244; GFX906-LABEL: v_sdot2_fnegf32_c:
245; GFX906:       ; %bb.0:
246; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
247; GFX906-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
248; GFX906-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2
249; GFX906-NEXT:    s_setpc_b64 s[30:31]
250;
251; GFX908-LABEL: v_sdot2_fnegf32_c:
252; GFX908:       ; %bb.0:
253; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
254; GFX908-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
255; GFX908-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2
256; GFX908-NEXT:    s_setpc_b64 s[30:31]
257;
258; GFX10-LABEL: v_sdot2_fnegf32_c:
259; GFX10:       ; %bb.0:
260; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
261; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
262; GFX10-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
263; GFX10-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2
264; GFX10-NEXT:    s_setpc_b64 s[30:31]
265  %neg.c = fneg float %c
266  %cast.neg.c = bitcast float %neg.c to i32
267  %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %b, i32 %cast.neg.c, i1 false)
268  ret i32 %r
269}
270
271define i32 @v_sdot2_fnegv2f16_c(<2 x i16> %a, <2 x i16> %b, <2 x half> %c) {
272; GFX906-LABEL: v_sdot2_fnegv2f16_c:
273; GFX906:       ; %bb.0:
274; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
275; GFX906-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
276; GFX906-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2
277; GFX906-NEXT:    s_setpc_b64 s[30:31]
278;
279; GFX908-LABEL: v_sdot2_fnegv2f16_c:
280; GFX908:       ; %bb.0:
281; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
282; GFX908-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
283; GFX908-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2
284; GFX908-NEXT:    s_setpc_b64 s[30:31]
285;
286; GFX10-LABEL: v_sdot2_fnegv2f16_c:
287; GFX10:       ; %bb.0:
288; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
289; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
290; GFX10-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
291; GFX10-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2
292; GFX10-NEXT:    s_setpc_b64 s[30:31]
293  %neg.c = fneg <2 x half> %c
294  %cast.neg.c = bitcast <2 x half> %neg.c to i32
295  %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %b, i32 %cast.neg.c, i1 false)
296  ret i32 %r
297}
298
299define i32 @v_sdot2_shuffle10_a(<2 x i16> %a, <2 x i16> %b, i32 %c) {
300; GFX906-LABEL: v_sdot2_shuffle10_a:
301; GFX906:       ; %bb.0:
302; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
303; GFX906-NEXT:    v_alignbit_b32 v0, v0, v0, 16
304; GFX906-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2
305; GFX906-NEXT:    s_setpc_b64 s[30:31]
306;
307; GFX908-LABEL: v_sdot2_shuffle10_a:
308; GFX908:       ; %bb.0:
309; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
310; GFX908-NEXT:    v_alignbit_b32 v0, v0, v0, 16
311; GFX908-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2
312; GFX908-NEXT:    s_setpc_b64 s[30:31]
313;
314; GFX10-LABEL: v_sdot2_shuffle10_a:
315; GFX10:       ; %bb.0:
316; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
317; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
318; GFX10-NEXT:    v_alignbit_b32 v0, v0, v0, 16
319; GFX10-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2
320; GFX10-NEXT:    s_setpc_b64 s[30:31]
321  %shuf.a = shufflevector <2 x i16> %a, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
322  %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %shuf.a, <2 x i16> %b, i32 %c, i1 false)
323  ret i32 %r
324}
325
326define i32 @v_sdot2_shuffle10_b(<2 x i16> %a, <2 x i16> %b, i32 %c) {
327; GFX906-LABEL: v_sdot2_shuffle10_b:
328; GFX906:       ; %bb.0:
329; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
330; GFX906-NEXT:    v_alignbit_b32 v1, v1, v1, 16
331; GFX906-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2
332; GFX906-NEXT:    s_setpc_b64 s[30:31]
333;
334; GFX908-LABEL: v_sdot2_shuffle10_b:
335; GFX908:       ; %bb.0:
336; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
337; GFX908-NEXT:    v_alignbit_b32 v1, v1, v1, 16
338; GFX908-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2
339; GFX908-NEXT:    s_setpc_b64 s[30:31]
340;
341; GFX10-LABEL: v_sdot2_shuffle10_b:
342; GFX10:       ; %bb.0:
343; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
344; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
345; GFX10-NEXT:    v_alignbit_b32 v1, v1, v1, 16
346; GFX10-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2
347; GFX10-NEXT:    s_setpc_b64 s[30:31]
348  %shuf.b = shufflevector <2 x i16> %b, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
349  %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %shuf.b, i32 %c, i1 false)
350  ret i32 %r
351}
352
353declare i32 @llvm.amdgcn.sdot2(<2 x i16>, <2 x i16>, i32, i1 immarg) #0
354
355attributes #0 = { nounwind readnone speculatable }
356