1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s 3 4declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 5 6; If all zero mask bits are set, return a zero regardless of the other control bits. 7 8define <4 x float> @insertps_0x0f(<4 x float> %v1, <4 x float> %v2) { 9; CHECK-LABEL: @insertps_0x0f( 10; CHECK-NEXT: ret <4 x float> zeroinitializer 11; 12 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 15) 13 ret <4 x float> %res 14} 15 16define <4 x float> @insertps_0xff(<4 x float> %v1, <4 x float> %v2) { 17; CHECK-LABEL: @insertps_0xff( 18; CHECK-NEXT: ret <4 x float> zeroinitializer 19; 20 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 255) 21 ret <4 x float> %res 22} 23 24; If some zero mask bits are set that do not override the insertion, we do not change anything. 25 26define <4 x float> @insertps_0x0c(<4 x float> %v1, <4 x float> %v2) { 27; CHECK-LABEL: @insertps_0x0c( 28; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], i8 12) 29; CHECK-NEXT: ret <4 x float> [[RES]] 30; 31 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12) 32 ret <4 x float> %res 33} 34 35; ...unless both input vectors are the same operand. 36 37define <4 x float> @insertps_0x15_single_input(<4 x float> %v1) { 38; CHECK-LABEL: @insertps_0x15_single_input( 39; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float undef>, <4 x i32> <i32 4, i32 0, i32 6, i32 3> 40; CHECK-NEXT: ret <4 x float> [[TMP1]] 41; 42 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 21) 43 ret <4 x float> %res 44} 45 46; The zero mask overrides the insertion lane. 47 48define <4 x float> @insertps_0x1a_single_input(<4 x float> %v1) { 49; CHECK-LABEL: @insertps_0x1a_single_input( 50; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> <float undef, float 0.000000e+00, float undef, float 0.000000e+00>, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 51; CHECK-NEXT: ret <4 x float> [[TMP1]] 52; 53 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 26) 54 ret <4 x float> %res 55} 56 57; The zero mask overrides the insertion lane, so the second input vector is not used. 58 59define <4 x float> @insertps_0xc1(<4 x float> %v1, <4 x float> %v2) { 60; CHECK-LABEL: @insertps_0xc1( 61; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[V1:%.*]], float 0.000000e+00, i32 0 62; CHECK-NEXT: ret <4 x float> [[TMP1]] 63; 64 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 193) 65 ret <4 x float> %res 66} 67 68; If no zero mask bits are set, convert to a shuffle. 69 70define <4 x float> @insertps_0x00(<4 x float> %v1, <4 x float> %v2) { 71; CHECK-LABEL: @insertps_0x00( 72; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V2:%.*]], <4 x float> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 7> 73; CHECK-NEXT: ret <4 x float> [[TMP1]] 74; 75 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 0) 76 ret <4 x float> %res 77} 78 79define <4 x float> @insertps_0x10(<4 x float> %v1, <4 x float> %v2) { 80; CHECK-LABEL: @insertps_0x10( 81; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 3> 82; CHECK-NEXT: ret <4 x float> [[TMP1]] 83; 84 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 16) 85 ret <4 x float> %res 86} 87 88define <4 x float> @insertps_0x20(<4 x float> %v1, <4 x float> %v2) { 89; CHECK-LABEL: @insertps_0x20( 90; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 0, i32 1, i32 4, i32 3> 91; CHECK-NEXT: ret <4 x float> [[TMP1]] 92; 93 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 32) 94 ret <4 x float> %res 95} 96 97define <4 x float> @insertps_0x30(<4 x float> %v1, <4 x float> %v2) { 98; CHECK-LABEL: @insertps_0x30( 99; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 4> 100; CHECK-NEXT: ret <4 x float> [[TMP1]] 101; 102 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 48) 103 ret <4 x float> %res 104} 105 106define <4 x float> @insertps_0xc0(<4 x float> %v1, <4 x float> %v2) { 107; CHECK-LABEL: @insertps_0xc0( 108; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 7, i32 1, i32 2, i32 3> 109; CHECK-NEXT: ret <4 x float> [[TMP1]] 110; 111 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 192) 112 ret <4 x float> %res 113} 114 115define <4 x float> @insertps_0xd0(<4 x float> %v1, <4 x float> %v2) { 116; CHECK-LABEL: @insertps_0xd0( 117; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 0, i32 7, i32 2, i32 3> 118; CHECK-NEXT: ret <4 x float> [[TMP1]] 119; 120 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 208) 121 ret <4 x float> %res 122} 123 124define <4 x float> @insertps_0xe0(<4 x float> %v1, <4 x float> %v2) { 125; CHECK-LABEL: @insertps_0xe0( 126; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 0, i32 1, i32 7, i32 3> 127; CHECK-NEXT: ret <4 x float> [[TMP1]] 128; 129 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 224) 130 ret <4 x float> %res 131} 132 133define <4 x float> @insertps_0xf0(<4 x float> %v1, <4 x float> %v2) { 134; CHECK-LABEL: @insertps_0xf0( 135; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> 136; CHECK-NEXT: ret <4 x float> [[TMP1]] 137; 138 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 240) 139 ret <4 x float> %res 140} 141