1; RUN: opt < %s -instcombine -S | FileCheck %s
2
3declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
4
5; This should never happen, but make sure we don't crash handling a non-constant immediate byte.
6
7define <4 x float> @insertps_non_const_imm(<4 x float> %v1, <4 x float> %v2, i8 %c) {
8  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c)
9  ret <4 x float> %res
10
11; CHECK-LABEL: @insertps_non_const_imm
12; CHECK-NEXT:  call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c)
13; CHECK-NEXT:  ret <4 x float>
14}
15
16; If all zero mask bits are set, return a zero regardless of the other control bits.
17
18define <4 x float> @insertps_0x0f(<4 x float> %v1, <4 x float> %v2) {
19  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 15)
20  ret <4 x float> %res
21
22; CHECK-LABEL: @insertps_0x0f
23; CHECK-NEXT:  ret <4 x float> zeroinitializer
24}
25define <4 x float> @insertps_0xff(<4 x float> %v1, <4 x float> %v2) {
26  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 255)
27  ret <4 x float> %res
28
29; CHECK-LABEL: @insertps_0xff
30; CHECK-NEXT:  ret <4 x float> zeroinitializer
31}
32
33; If some zero mask bits are set that do not override the insertion, we do not change anything.
34
35define <4 x float> @insertps_0x0c(<4 x float> %v1, <4 x float> %v2) {
36  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
37  ret <4 x float> %res
38
39; CHECK-LABEL: @insertps_0x0c
40; CHECK-NEXT:  call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
41; CHECK-NEXT:  ret <4 x float>
42}
43
44; ...unless both input vectors are the same operand.
45
46define <4 x float> @insertps_0x15_single_input(<4 x float> %v1) {
47  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 21)
48  ret <4 x float> %res
49
50; CHECK-LABEL: @insertps_0x15_single_input
51; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float undef>, <4 x i32> <i32 4, i32 0, i32 6, i32 3>
52; CHECK-NEXT:  ret <4 x float>
53}
54
55; The zero mask overrides the insertion lane.
56
57define <4 x float> @insertps_0x1a_single_input(<4 x float> %v1) {
58  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 26)
59  ret <4 x float> %res
60
61; CHECK-LABEL: @insertps_0x1a_single_input
62; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> <float undef, float 0.000000e+00, float undef, float 0.000000e+00>, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
63; CHECK-NEXT:  ret <4 x float>
64}
65
66; The zero mask overrides the insertion lane, so the second input vector is not used.
67
68define <4 x float> @insertps_0xc1(<4 x float> %v1, <4 x float> %v2) {
69  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 193)
70  ret <4 x float> %res
71
72; CHECK-LABEL: @insertps_0xc1
73; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
74; CHECK-NEXT:  ret <4 x float>
75}
76
77; If no zero mask bits are set, convert to a shuffle.
78
79define <4 x float> @insertps_0x00(<4 x float> %v1, <4 x float> %v2) {
80  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 0)
81  ret <4 x float> %res
82
83; CHECK-LABEL: @insertps_0x00
84; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
85; CHECK-NEXT:  ret <4 x float>
86}
87
88define <4 x float> @insertps_0x10(<4 x float> %v1, <4 x float> %v2) {
89  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 16)
90  ret <4 x float> %res
91
92; CHECK-LABEL: @insertps_0x10
93; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
94; CHECK-NEXT:  ret <4 x float>
95}
96
97define <4 x float> @insertps_0x20(<4 x float> %v1, <4 x float> %v2) {
98  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 32)
99  ret <4 x float> %res
100
101; CHECK-LABEL: @insertps_0x20
102; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
103; CHECK-NEXT:  ret <4 x float>
104}
105
106define <4 x float> @insertps_0x30(<4 x float> %v1, <4 x float> %v2) {
107  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 48)
108  ret <4 x float> %res
109
110; CHECK-LABEL: @insertps_0x30
111; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
112; CHECK-NEXT:  ret <4 x float>
113}
114
115define <4 x float> @insertps_0xc0(<4 x float> %v1, <4 x float> %v2) {
116  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 192)
117  ret <4 x float> %res
118
119; CHECK-LABEL: @insertps_0xc0
120; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 1, i32 2, i32 3>
121; CHECK-NEXT:  ret <4 x float>
122}
123
124define <4 x float> @insertps_0xd0(<4 x float> %v1, <4 x float> %v2) {
125  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 208)
126  ret <4 x float> %res
127
128; CHECK-LABEL: @insertps_0xd0
129; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
130; CHECK-NEXT:  ret <4 x float>
131}
132
133define <4 x float> @insertps_0xe0(<4 x float> %v1, <4 x float> %v2) {
134  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 224)
135  ret <4 x float> %res
136
137; CHECK-LABEL: @insertps_0xe0
138; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 7, i32 3>
139; CHECK-NEXT:  ret <4 x float>
140}
141
142define <4 x float> @insertps_0xf0(<4 x float> %v1, <4 x float> %v2) {
143  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 240)
144  ret <4 x float> %res
145
146; CHECK-LABEL: @insertps_0xf0
147; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
148; CHECK-NEXT:  ret <4 x float>
149}
150
151