1; RUN: opt < %s -instcombine -S | FileCheck %s 2 3declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 4 5; This should never happen, but make sure we don't crash handling a non-constant immediate byte. 6 7define <4 x float> @insertps_non_const_imm(<4 x float> %v1, <4 x float> %v2, i8 %c) { 8 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c) 9 ret <4 x float> %res 10 11; CHECK-LABEL: @insertps_non_const_imm 12; CHECK-NEXT: call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c) 13; CHECK-NEXT: ret <4 x float> 14} 15 16; If all zero mask bits are set, return a zero regardless of the other control bits. 17 18define <4 x float> @insertps_0x0f(<4 x float> %v1, <4 x float> %v2) { 19 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 15) 20 ret <4 x float> %res 21 22; CHECK-LABEL: @insertps_0x0f 23; CHECK-NEXT: ret <4 x float> zeroinitializer 24} 25define <4 x float> @insertps_0xff(<4 x float> %v1, <4 x float> %v2) { 26 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 255) 27 ret <4 x float> %res 28 29; CHECK-LABEL: @insertps_0xff 30; CHECK-NEXT: ret <4 x float> zeroinitializer 31} 32 33; If some zero mask bits are set that do not override the insertion, we do not change anything. 34 35define <4 x float> @insertps_0x0c(<4 x float> %v1, <4 x float> %v2) { 36 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12) 37 ret <4 x float> %res 38 39; CHECK-LABEL: @insertps_0x0c 40; CHECK-NEXT: call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12) 41; CHECK-NEXT: ret <4 x float> 42} 43 44; ...unless both input vectors are the same operand. 45 46define <4 x float> @insertps_0x15_single_input(<4 x float> %v1) { 47 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 21) 48 ret <4 x float> %res 49 50; CHECK-LABEL: @insertps_0x15_single_input 51; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float undef>, <4 x i32> <i32 4, i32 0, i32 6, i32 3> 52; CHECK-NEXT: ret <4 x float> 53} 54 55; The zero mask overrides the insertion lane. 56 57define <4 x float> @insertps_0x1a_single_input(<4 x float> %v1) { 58 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 26) 59 ret <4 x float> %res 60 61; CHECK-LABEL: @insertps_0x1a_single_input 62; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> <float undef, float 0.000000e+00, float undef, float 0.000000e+00>, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 63; CHECK-NEXT: ret <4 x float> 64} 65 66; The zero mask overrides the insertion lane, so the second input vector is not used. 67 68define <4 x float> @insertps_0xc1(<4 x float> %v1, <4 x float> %v2) { 69 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 193) 70 ret <4 x float> %res 71 72; CHECK-LABEL: @insertps_0xc1 73; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 74; CHECK-NEXT: ret <4 x float> 75} 76 77; If no zero mask bits are set, convert to a shuffle. 78 79define <4 x float> @insertps_0x00(<4 x float> %v1, <4 x float> %v2) { 80 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 0) 81 ret <4 x float> %res 82 83; CHECK-LABEL: @insertps_0x00 84; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 85; CHECK-NEXT: ret <4 x float> 86} 87 88define <4 x float> @insertps_0x10(<4 x float> %v1, <4 x float> %v2) { 89 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 16) 90 ret <4 x float> %res 91 92; CHECK-LABEL: @insertps_0x10 93; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 4, i32 2, i32 3> 94; CHECK-NEXT: ret <4 x float> 95} 96 97define <4 x float> @insertps_0x20(<4 x float> %v1, <4 x float> %v2) { 98 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 32) 99 ret <4 x float> %res 100 101; CHECK-LABEL: @insertps_0x20 102; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 4, i32 3> 103; CHECK-NEXT: ret <4 x float> 104} 105 106define <4 x float> @insertps_0x30(<4 x float> %v1, <4 x float> %v2) { 107 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 48) 108 ret <4 x float> %res 109 110; CHECK-LABEL: @insertps_0x30 111; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 4> 112; CHECK-NEXT: ret <4 x float> 113} 114 115define <4 x float> @insertps_0xc0(<4 x float> %v1, <4 x float> %v2) { 116 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 192) 117 ret <4 x float> %res 118 119; CHECK-LABEL: @insertps_0xc0 120; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 1, i32 2, i32 3> 121; CHECK-NEXT: ret <4 x float> 122} 123 124define <4 x float> @insertps_0xd0(<4 x float> %v1, <4 x float> %v2) { 125 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 208) 126 ret <4 x float> %res 127 128; CHECK-LABEL: @insertps_0xd0 129; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 7, i32 2, i32 3> 130; CHECK-NEXT: ret <4 x float> 131} 132 133define <4 x float> @insertps_0xe0(<4 x float> %v1, <4 x float> %v2) { 134 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 224) 135 ret <4 x float> %res 136 137; CHECK-LABEL: @insertps_0xe0 138; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 7, i32 3> 139; CHECK-NEXT: ret <4 x float> 140} 141 142define <4 x float> @insertps_0xf0(<4 x float> %v1, <4 x float> %v2) { 143 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 240) 144 ret <4 x float> %res 145 146; CHECK-LABEL: @insertps_0xf0 147; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 148; CHECK-NEXT: ret <4 x float> 149} 150 151