1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s
3
4declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
5
6; If all zero mask bits are set, return a zero regardless of the other control bits.
7
8define <4 x float> @insertps_0x0f(<4 x float> %v1, <4 x float> %v2) {
9; CHECK-LABEL: @insertps_0x0f(
10; CHECK-NEXT:    ret <4 x float> zeroinitializer
11;
12  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 15)
13  ret <4 x float> %res
14}
15
16define <4 x float> @insertps_0xff(<4 x float> %v1, <4 x float> %v2) {
17; CHECK-LABEL: @insertps_0xff(
18; CHECK-NEXT:    ret <4 x float> zeroinitializer
19;
20  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 255)
21  ret <4 x float> %res
22}
23
24; If some zero mask bits are set that do not override the insertion, we do not change anything.
25
26define <4 x float> @insertps_0x0c(<4 x float> %v1, <4 x float> %v2) {
27; CHECK-LABEL: @insertps_0x0c(
28; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], i8 12)
29; CHECK-NEXT:    ret <4 x float> [[RES]]
30;
31  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
32  ret <4 x float> %res
33}
34
35; ...unless both input vectors are the same operand.
36
37define <4 x float> @insertps_0x15_single_input(<4 x float> %v1) {
38; CHECK-LABEL: @insertps_0x15_single_input(
39; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float undef>, <4 x i32> <i32 4, i32 0, i32 6, i32 3>
40; CHECK-NEXT:    ret <4 x float> [[TMP1]]
41;
42  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 21)
43  ret <4 x float> %res
44}
45
46; The zero mask overrides the insertion lane.
47
48define <4 x float> @insertps_0x1a_single_input(<4 x float> %v1) {
49; CHECK-LABEL: @insertps_0x1a_single_input(
50; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> <float undef, float 0.000000e+00, float undef, float 0.000000e+00>, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
51; CHECK-NEXT:    ret <4 x float> [[TMP1]]
52;
53  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 26)
54  ret <4 x float> %res
55}
56
57; The zero mask overrides the insertion lane, so the second input vector is not used.
58
59define <4 x float> @insertps_0xc1(<4 x float> %v1, <4 x float> %v2) {
60; CHECK-LABEL: @insertps_0xc1(
61; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> [[V1:%.*]], float 0.000000e+00, i32 0
62; CHECK-NEXT:    ret <4 x float> [[TMP1]]
63;
64  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 193)
65  ret <4 x float> %res
66}
67
68; If no zero mask bits are set, convert to a shuffle.
69
70define <4 x float> @insertps_0x00(<4 x float> %v1, <4 x float> %v2) {
71; CHECK-LABEL: @insertps_0x00(
72; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V2:%.*]], <4 x float> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
73; CHECK-NEXT:    ret <4 x float> [[TMP1]]
74;
75  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 0)
76  ret <4 x float> %res
77}
78
79define <4 x float> @insertps_0x10(<4 x float> %v1, <4 x float> %v2) {
80; CHECK-LABEL: @insertps_0x10(
81; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 3>
82; CHECK-NEXT:    ret <4 x float> [[TMP1]]
83;
84  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 16)
85  ret <4 x float> %res
86}
87
88define <4 x float> @insertps_0x20(<4 x float> %v1, <4 x float> %v2) {
89; CHECK-LABEL: @insertps_0x20(
90; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
91; CHECK-NEXT:    ret <4 x float> [[TMP1]]
92;
93  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 32)
94  ret <4 x float> %res
95}
96
97define <4 x float> @insertps_0x30(<4 x float> %v1, <4 x float> %v2) {
98; CHECK-LABEL: @insertps_0x30(
99; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 4>
100; CHECK-NEXT:    ret <4 x float> [[TMP1]]
101;
102  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 48)
103  ret <4 x float> %res
104}
105
106define <4 x float> @insertps_0xc0(<4 x float> %v1, <4 x float> %v2) {
107; CHECK-LABEL: @insertps_0xc0(
108; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 7, i32 1, i32 2, i32 3>
109; CHECK-NEXT:    ret <4 x float> [[TMP1]]
110;
111  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 192)
112  ret <4 x float> %res
113}
114
115define <4 x float> @insertps_0xd0(<4 x float> %v1, <4 x float> %v2) {
116; CHECK-LABEL: @insertps_0xd0(
117; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 0, i32 7, i32 2, i32 3>
118; CHECK-NEXT:    ret <4 x float> [[TMP1]]
119;
120  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 208)
121  ret <4 x float> %res
122}
123
124define <4 x float> @insertps_0xe0(<4 x float> %v1, <4 x float> %v2) {
125; CHECK-LABEL: @insertps_0xe0(
126; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 0, i32 1, i32 7, i32 3>
127; CHECK-NEXT:    ret <4 x float> [[TMP1]]
128;
129  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 224)
130  ret <4 x float> %res
131}
132
133define <4 x float> @insertps_0xf0(<4 x float> %v1, <4 x float> %v2) {
134; CHECK-LABEL: @insertps_0xf0(
135; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
136; CHECK-NEXT:    ret <4 x float> [[TMP1]]
137;
138  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 240)
139  ret <4 x float> %res
140}
141