1; This checks support for insertelement and extractelement.
2
3; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 \
4; RUN:   | FileCheck %s
5; RUN: %p2i -i %s --filetype=obj --disassemble --args -Om1 \
6; RUN:   | FileCheck %s
7; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 -mattr=sse4.1 \
8; RUN:   | FileCheck --check-prefix=SSE41 %s
9; RUN: %p2i -i %s --filetype=obj --disassemble --args -Om1 -mattr=sse4.1 \
10; RUN:   | FileCheck --check-prefix=SSE41 %s
11
12; RUN: %if --need=target_MIPS32 --need=allow_dump \
13; RUN:   --command %p2i --filetype=asm --assemble --disassemble --target mips32\
14; RUN:   -i %s --args -O2 \
15; RUN:   | %if --need=target_MIPS32 --need=allow_dump \
16; RUN:   --command FileCheck --check-prefix MIPS32 %s
17
18; insertelement operations
19
20define internal <4 x float> @insertelement_v4f32_0(<4 x float> %vec,
21                                                   float %elt) {
22entry:
23  %res = insertelement <4 x float> %vec, float %elt, i32 0
24  ret <4 x float> %res
25; CHECK-LABEL: insertelement_v4f32_0
26; CHECK: movss
27
28; SSE41-LABEL: insertelement_v4f32_0
29; SSE41: insertps {{.*}},{{.*}},0x0
30
31; *** a0 - implicit return <4 x float>
32; *** a1 - unused due to alignment of %vec
33; *** a2:a3:sp[16]:s[20] - %vec
34; *** sp[24] - %elt
35; MIPS32-LABEL: insertelement_v4f32_0
36; *** Load element 2 and 3 of %vec
37; MIPS32: lw [[BV_E2:.*]],
38; MIPS32: lw [[BV_E3:.*]],
39; *** Load %elt
40; MIPS32: lwc1 [[ELT:.*]],
41; *** Insert %elt at %vec[0]
42; MIPS32: mfc1 [[RV_E0:.*]],[[ELT]]
43; MIPS32: move [[RET_PTR:.*]],a0
44; MIPS32: sw [[RV_E0]],0([[RET_PTR]])
45; MIPS32: sw a3,4([[RET_PTR]])
46; MIPS32: sw [[BV_E2]],8([[RET_PTR]])
47; MIPS32: sw [[BV_E3]],12([[RET_PTR]])
48}
49
50define internal <4 x i32> @insertelement_v4i32_0(<4 x i32> %vec, i32 %elt) {
51entry:
52  %res = insertelement <4 x i32> %vec, i32 %elt, i32 0
53  ret <4 x i32> %res
54; CHECK-LABEL: insertelement_v4i32_0
55; CHECK: movd xmm{{.*}},
56; CHECK: movss
57
58; SSE41-LABEL: insertelement_v4i32_0
59; SSE41: pinsrd {{.*}},{{.*}},0x0
60
61; *** a0:a1:a2:a3 - %vec
62; *** sp[16] - %elt
63; MIPS32-LABEL: insertelement_v4i32_0
64; *** Load %elt
65; MIPS32: lw v0,16(sp)
66; MIPS32: move v1,a1
67; MIPS32: move a0,a2
68; MIPS32: move a1,a3
69}
70
71
72define internal <4 x float> @insertelement_v4f32_1(<4 x float> %vec,
73                                                   float %elt) {
74entry:
75  %res = insertelement <4 x float> %vec, float %elt, i32 1
76  ret <4 x float> %res
77; CHECK-LABEL: insertelement_v4f32_1
78; CHECK: shufps
79; CHECK: shufps
80
81; SSE41-LABEL: insertelement_v4f32_1
82; SSE41: insertps {{.*}},{{.*}},0x10
83
84; MIPS32-LABEL: insertelement_v4f32_1
85; MIPS32: lw [[VEC_E2:.*]],16(sp)
86; MIPS32: lw [[VEC_E3:.*]],20(sp)
87; MIPS32: lwc1 [[ELT:.*]],24(sp)
88; MIPS32: mfc1 [[R_E1:.*]],[[ELT]]
89; MIPS32: move [[PTR:.*]],a0
90; MIPS32: sw a2,0([[PTR]])
91; MIPS32: sw [[R_E1]],4([[PTR]])
92; MIPS32: sw [[VEC_E2]],8([[PTR]])
93; MIPS32: sw [[VEC_E3]],12([[PTR]])
94}
95
96define internal <4 x i32> @insertelement_v4i32_1(<4 x i32> %vec, i32 %elt) {
97entry:
98  %res = insertelement <4 x i32> %vec, i32 %elt, i32 1
99  ret <4 x i32> %res
100; CHECK-LABEL: insertelement_v4i32_1
101; CHECK: shufps
102; CHECK: shufps
103
104; SSE41-LABEL: insertelement_v4i32_1
105; SSE41: pinsrd {{.*}},{{.*}},0x1
106
107; MIPS32-LABEL: insertelement_v4i32_1
108; MIPS32: lw [[ELT:.*]],16(sp)
109; MIPS32: move v1,[[ELT]]
110; MIPS32: move v0,a0
111; MIPS32: move a0,a2
112; MIPS32: move a1,a3
113}
114
115define internal <8 x i16> @insertelement_v8i16(<8 x i16> %vec, i32 %elt.arg) {
116entry:
117  %elt = trunc i32 %elt.arg to i16
118  %res = insertelement <8 x i16> %vec, i16 %elt, i32 1
119  ret <8 x i16> %res
120; CHECK-LABEL: insertelement_v8i16
121; CHECK: pinsrw
122
123; SSE41-LABEL: insertelement_v8i16
124; SSE41: pinsrw
125
126; MIPS32-LABEL: insertelement_v8i16
127; MIPS32: lw [[ELT:.*]],16(sp)
128; MIPS32: sll [[ELT]],[[ELT]],0x10
129; MIPS32: sll a0,a0,0x10
130; MIPS32: srl a0,a0,0x10
131; MIPS32: or v0,[[ELT]],a0
132; MIPS32: move v1,a1
133; MIPS32: move a0,a2
134; MIPS32: move a1,a3
135}
136
137define internal <16 x i8> @insertelement_v16i8(<16 x i8> %vec, i32 %elt.arg) {
138entry:
139  %elt = trunc i32 %elt.arg to i8
140  %res = insertelement <16 x i8> %vec, i8 %elt, i32 1
141  ret <16 x i8> %res
142; CHECK-LABEL: insertelement_v16i8
143; CHECK: movups
144; CHECK: lea
145; CHECK: mov
146
147; SSE41-LABEL: insertelement_v16i8
148; SSE41: pinsrb
149
150; MIPS32-LABEL: insertelement_v16i8
151; MIPS32: lw [[ELT:.*]],16(sp)
152; MIPS32: andi [[ELT]],[[ELT]],0xff
153; MIPS32: sll [[ELT]],[[ELT]],0x8
154; MIPS32: lui [[T:.*]],0xffff
155; MIPS32: ori [[T]],[[T]],0xff
156; MIPS32: and a0,a0,[[T]]
157; MIPS32: or v0,v0,a0
158; MIPS32: move v1,a1
159; MIPS32: move a0,a2
160; MIPS32: move a1,a3
161}
162
163define internal <4 x i1> @insertelement_v4i1_0(<4 x i1> %vec, i32 %elt.arg) {
164entry:
165  %elt = trunc i32 %elt.arg to i1
166  %res = insertelement <4 x i1> %vec, i1 %elt, i32 0
167  ret <4 x i1> %res
168; CHECK-LABEL: insertelement_v4i1_0
169; CHECK: movss
170
171; SSE41-LABEL: insertelement_v4i1_0
172; SSE41: pinsrd {{.*}},{{.*}},0x0
173
174; MIPS32-LABEL: insertelement_v4i1_0
175; MIPS32: lw v0,16(sp)
176; MIPS32: move v1,a1
177; MIPS32: move a0,a2
178; MIPS32: move a1,a3
179}
180
181define internal <4 x i1> @insertelement_v4i1_1(<4 x i1> %vec, i32 %elt.arg) {
182entry:
183  %elt = trunc i32 %elt.arg to i1
184  %res = insertelement <4 x i1> %vec, i1 %elt, i32 1
185  ret <4 x i1> %res
186; CHECK-LABEL: insertelement_v4i1_1
187; CHECK: shufps
188; CHECK: shufps
189
190; SSE41-LABEL: insertelement_v4i1_1
191; SSE41: pinsrd {{.*}},{{.*}},0x1
192
193; MIPS32-LABEL: insertelement_v4i1_1
194; MIPS32: lw [[ELT:.*]],16(sp)
195; MIPS32: move v1,[[ELT]]
196; MIPS32: move v0,a0
197; MIPS32: move a0,a2
198; MIPS32: move a1,a3
199}
200
201define internal <8 x i1> @insertelement_v8i1(<8 x i1> %vec, i32 %elt.arg) {
202entry:
203  %elt = trunc i32 %elt.arg to i1
204  %res = insertelement <8 x i1> %vec, i1 %elt, i32 1
205  ret <8 x i1> %res
206; CHECK-LABEL: insertelement_v8i1
207; CHECK: pinsrw
208
209; SSE41-LABEL: insertelement_v8i1
210; SSE41: pinsrw
211
212; MIPS32-LABEL: insertelement_v8i1
213; MIPS32: lw [[ELT:.*]],16(sp)
214; MIPS32: sll [[ELT]],[[ELT]],0x10
215; MIPS32: sll a0,a0,0x10
216; MIPS32: srl a0,a0,0x10
217; MIPS32: or v0,[[ELT]],a0
218; MIPS32: move v1,a1
219; MIPS32: move a0,a2
220; MIPS32: move a1,a3
221}
222
223define internal <16 x i1> @insertelement_v16i1(<16 x i1> %vec, i32 %elt.arg) {
224entry:
225  %elt = trunc i32 %elt.arg to i1
226  %res = insertelement <16 x i1> %vec, i1 %elt, i32 1
227  ret <16 x i1> %res
228; CHECK-LABEL: insertelement_v16i1
229; CHECK: movups
230; CHECK: lea
231; CHECK: mov
232
233; SSE41-LABEL: insertelement_v16i1
234; SSE41: pinsrb
235
236; MIPS32-LABEL: insertelement_v16i1
237; MIPS32: lw [[ELT:.*]],16(sp)
238; MIPS32: andi [[ELT]],[[ELT]],0xff
239; MIPS32: sll [[ELT]],[[ELT]],0x8
240; MIPS32: lui [[T:.*]],0xffff
241; MIPS32: ori [[T]],[[T]],0xff
242; MIPS32: and a0,a0,[[T]]
243; MIPS32: or v0,[[ELT]],a0
244; MIPS32: move v1,a1
245; MIPS32: move a0,a2
246; MIPS32: move a1,a3
247}
248
249; extractelement operations
250
251define internal float @extractelement_v4f32(<4 x float> %vec) {
252entry:
253  %res = extractelement <4 x float> %vec, i32 1
254  ret float %res
255; CHECK-LABEL: extractelement_v4f32
256; CHECK: pshufd
257
258; SSE41-LABEL: extractelement_v4f32
259; SSE41: pshufd
260
261; MIPS32-LABEL: extractelement_v4f32
262; MIPS32: mtc1 a1,$f0
263}
264
265define internal i32 @extractelement_v4i32(<4 x i32> %vec) {
266entry:
267  %res = extractelement <4 x i32> %vec, i32 1
268  ret i32 %res
269; CHECK-LABEL: extractelement_v4i32
270; CHECK: pshufd
271; CHECK: movd {{.*}},xmm
272
273; SSE41-LABEL: extractelement_v4i32
274; SSE41: pextrd
275
276; MIPS32-LABEL: extractelement_v4i32
277; MIPS32L move v0,a1
278}
279
280define internal i32 @extractelement_v8i16(<8 x i16> %vec) {
281entry:
282  %res = extractelement <8 x i16> %vec, i32 1
283  %res.ext = zext i16 %res to i32
284  ret i32 %res.ext
285; CHECK-LABEL: extractelement_v8i16
286; CHECK: pextrw
287
288; SSE41-LABEL: extractelement_v8i16
289; SSE41: pextrw
290
291; MIPS32-LABEL: extractelement_v8i16
292; MIPS32: srl a0,a0,0x10
293; MIPS32: andi a0,a0,0xffff
294; MIPS32: move v0,a0
295}
296
297define internal i32 @extractelement_v16i8(<16 x i8> %vec) {
298entry:
299  %res = extractelement <16 x i8> %vec, i32 1
300  %res.ext = zext i8 %res to i32
301  ret i32 %res.ext
302; CHECK-LABEL: extractelement_v16i8
303; CHECK: movups
304; CHECK: lea
305; CHECK: mov
306
307; SSE41-LABEL: extractelement_v16i8
308; SSE41: pextrb
309
310; MIPS32-LABEL: extractelement_v16i8
311; MIPS32: srl a0,a0,0x8
312; MIPS32: andi a0,a0,0xff
313; MIPS32: andi a0,a0,0xff
314; MIPS32: move v0,a0
315}
316
317define internal i32 @extractelement_v4i1(<4 x i1> %vec) {
318entry:
319  %res = extractelement <4 x i1> %vec, i32 1
320  %res.ext = zext i1 %res to i32
321  ret i32 %res.ext
322; CHECK-LABEL: extractelement_v4i1
323; CHECK: pshufd
324
325; SSE41-LABEL: extractelement_v4i1
326; SSE41: pextrd
327
328; MIPS32-LABEL: extractelement_v4i1
329; MIPS32: andi a1,a1,0x1
330; MIPS32: andi a1,a1,0x1
331; MIPS32: move v0,a1
332}
333
334define internal i32 @extractelement_v8i1(<8 x i1> %vec) {
335entry:
336  %res = extractelement <8 x i1> %vec, i32 1
337  %res.ext = zext i1 %res to i32
338  ret i32 %res.ext
339; CHECK-LABEL: extractelement_v8i1
340; CHECK: pextrw
341
342; SSE41-LABEL: extractelement_v8i1
343; SSE41: pextrw
344
345; MIPS32-LABEL: extractelement_v8i1
346; MIPS32: srl a0,a0,0x10
347; MIPS32: andi a0,a0,0x1
348; MIPS32: andi a0,a0,0x1
349; MIPS32: move v0,a0
350}
351
352define internal i32 @extractelement_v16i1(<16 x i1> %vec) {
353entry:
354  %res = extractelement <16 x i1> %vec, i32 1
355  %res.ext = zext i1 %res to i32
356  ret i32 %res.ext
357; CHECK-LABEL: extractelement_v16i1
358; CHECK: movups
359; CHECK: lea
360; CHECK: mov
361
362; SSE41-LABEL: extractelement_v16i1
363; SSE41: pextrb
364
365; MIPS32-LABEL: extractelement_v16i1
366; MIPS32: srl a0,a0,0x8
367; MIPS32: andi a0,a0,0xff
368; MIPS32: andi a0,a0,0x1
369; MIPS32: andi a0,a0,0x1
370; MIPS32: move v0,a0
371}
372