1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -instcombine -S | FileCheck %s 3 4define double @test_vfrcz_sd_0(double %a) { 5; CHECK-LABEL: @test_vfrcz_sd_0( 6; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 7; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> [[TMP1]]) 8; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0 9; CHECK-NEXT: ret double [[TMP3]] 10; 11 %1 = insertelement <2 x double> undef, double %a, i32 0 12 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 13 %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2) 14 %4 = extractelement <2 x double> %3, i32 0 15 ret double %4 16} 17 18define double @test_vfrcz_sd_1(double %a) { 19; CHECK-LABEL: @test_vfrcz_sd_1( 20; CHECK-NEXT: ret double 1.000000e+00 21; 22 %1 = insertelement <2 x double> undef, double %a, i32 0 23 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 24 %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2) 25 %4 = extractelement <2 x double> %3, i32 1 26 ret double %4 27} 28 29define float @test_vfrcz_ss_0(float %a) { 30; CHECK-LABEL: @test_vfrcz_ss_0( 31; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 32; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> [[TMP1]]) 33; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 34; CHECK-NEXT: ret float [[TMP3]] 35; 36 %1 = insertelement <4 x float> undef, float %a, i32 0 37 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 38 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 39 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 40 %5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4) 41 %6 = extractelement <4 x float> %5, i32 0 42 ret float %6 43} 44 45define float @test_vfrcz_ss_3(float %a) { 46; CHECK-LABEL: @test_vfrcz_ss_3( 47; CHECK-NEXT: ret float 3.000000e+00 48; 49 %1 = insertelement <4 x float> undef, float %a, i32 0 50 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 51 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 52 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 53 %5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4) 54 %6 = extractelement <4 x float> %5, i32 3 55 ret float %6 56} 57 58define <2 x i64> @cmp_slt_v2i64(<2 x i64> %a, <2 x i64> %b) { 59; CHECK-LABEL: @cmp_slt_v2i64( 60; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i64> %a, %b 61; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> 62; CHECK-NEXT: ret <2 x i64> [[TMP2]] 63; 64 %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a, <2 x i64> %b) 65 ret <2 x i64> %1 66} 67 68define <2 x i64> @cmp_ult_v2i64(<2 x i64> %a, <2 x i64> %b) { 69; CHECK-LABEL: @cmp_ult_v2i64( 70; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> %a, %b 71; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> 72; CHECK-NEXT: ret <2 x i64> [[TMP2]] 73; 74 %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a, <2 x i64> %b) 75 ret <2 x i64> %1 76} 77 78define <2 x i64> @cmp_sle_v2i64(<2 x i64> %a, <2 x i64> %b) { 79; CHECK-LABEL: @cmp_sle_v2i64( 80; CHECK-NEXT: [[TMP1:%.*]] = icmp sle <2 x i64> %a, %b 81; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> 82; CHECK-NEXT: ret <2 x i64> [[TMP2]] 83; 84 %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a, <2 x i64> %b) 85 ret <2 x i64> %1 86} 87 88define <2 x i64> @cmp_ule_v2i64(<2 x i64> %a, <2 x i64> %b) { 89; CHECK-LABEL: @cmp_ule_v2i64( 90; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <2 x i64> %a, %b 91; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> 92; CHECK-NEXT: ret <2 x i64> [[TMP2]] 93; 94 %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a, <2 x i64> %b) 95 ret <2 x i64> %1 96} 97 98define <4 x i32> @cmp_sgt_v4i32(<4 x i32> %a, <4 x i32> %b) { 99; CHECK-LABEL: @cmp_sgt_v4i32( 100; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> %a, %b 101; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> 102; CHECK-NEXT: ret <4 x i32> [[TMP2]] 103; 104 %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a, <4 x i32> %b) 105 ret <4 x i32> %1 106} 107 108define <4 x i32> @cmp_ugt_v4i32(<4 x i32> %a, <4 x i32> %b) { 109; CHECK-LABEL: @cmp_ugt_v4i32( 110; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i32> %a, %b 111; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> 112; CHECK-NEXT: ret <4 x i32> [[TMP2]] 113; 114 %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a, <4 x i32> %b) 115 ret <4 x i32> %1 116} 117 118define <4 x i32> @cmp_sge_v4i32(<4 x i32> %a, <4 x i32> %b) { 119; CHECK-LABEL: @cmp_sge_v4i32( 120; CHECK-NEXT: [[TMP1:%.*]] = icmp sge <4 x i32> %a, %b 121; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> 122; CHECK-NEXT: ret <4 x i32> [[TMP2]] 123; 124 %1 = tail call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a, <4 x i32> %b) 125 ret <4 x i32> %1 126} 127 128define <4 x i32> @cmp_uge_v4i32(<4 x i32> %a, <4 x i32> %b) { 129; CHECK-LABEL: @cmp_uge_v4i32( 130; CHECK-NEXT: [[TMP1:%.*]] = icmp uge <4 x i32> %a, %b 131; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> 132; CHECK-NEXT: ret <4 x i32> [[TMP2]] 133; 134 %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a, <4 x i32> %b) 135 ret <4 x i32> %1 136} 137 138define <8 x i16> @cmp_seq_v8i16(<8 x i16> %a, <8 x i16> %b) { 139; CHECK-LABEL: @cmp_seq_v8i16( 140; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> %a, %b 141; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> 142; CHECK-NEXT: ret <8 x i16> [[TMP2]] 143; 144 %1 = tail call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a, <8 x i16> %b) 145 ret <8 x i16> %1 146} 147 148define <8 x i16> @cmp_ueq_v8i16(<8 x i16> %a, <8 x i16> %b) { 149; CHECK-LABEL: @cmp_ueq_v8i16( 150; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> %a, %b 151; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> 152; CHECK-NEXT: ret <8 x i16> [[TMP2]] 153; 154 %1 = tail call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a, <8 x i16> %b) 155 ret <8 x i16> %1 156} 157 158define <8 x i16> @cmp_sne_v8i16(<8 x i16> %a, <8 x i16> %b) { 159; CHECK-LABEL: @cmp_sne_v8i16( 160; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <8 x i16> %a, %b 161; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> 162; CHECK-NEXT: ret <8 x i16> [[TMP2]] 163; 164 %1 = tail call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a, <8 x i16> %b) 165 ret <8 x i16> %1 166} 167 168define <8 x i16> @cmp_une_v8i16(<8 x i16> %a, <8 x i16> %b) { 169; CHECK-LABEL: @cmp_une_v8i16( 170; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <8 x i16> %a, %b 171; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> 172; CHECK-NEXT: ret <8 x i16> [[TMP2]] 173; 174 %1 = tail call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a, <8 x i16> %b) 175 ret <8 x i16> %1 176} 177 178define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) { 179; CHECK-LABEL: @cmp_strue_v16i8( 180; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 181; 182 %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a, <16 x i8> %b) 183 ret <16 x i8> %1 184} 185 186define <16 x i8> @cmp_utrue_v16i8(<16 x i8> %a, <16 x i8> %b) { 187; CHECK-LABEL: @cmp_utrue_v16i8( 188; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 189; 190 %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a, <16 x i8> %b) 191 ret <16 x i8> %1 192} 193 194define <16 x i8> @cmp_sfalse_v16i8(<16 x i8> %a, <16 x i8> %b) { 195; CHECK-LABEL: @cmp_sfalse_v16i8( 196; CHECK-NEXT: ret <16 x i8> zeroinitializer 197; 198 %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a, <16 x i8> %b) 199 ret <16 x i8> %1 200} 201 202define <16 x i8> @cmp_ufalse_v16i8(<16 x i8> %a, <16 x i8> %b) { 203; CHECK-LABEL: @cmp_ufalse_v16i8( 204; CHECK-NEXT: ret <16 x i8> zeroinitializer 205; 206 %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a, <16 x i8> %b) 207 ret <16 x i8> %1 208} 209 210declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone 211declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone 212 213declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone 214declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone 215declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone 216declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone 217declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone 218declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone 219declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone 220declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone 221 222declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone 223declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone 224declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone 225declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone 226declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone 227declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone 228declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone 229declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone 230 231declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone 232declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone 233declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone 234declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone 235declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone 236declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone 237declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone 238declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone 239 240declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone 241declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone 242declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone 243declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone 244declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone 245declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone 246declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone 247declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone 248 249declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone 250declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone 251declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone 252declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone 253declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone 254declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone 255declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone 256declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone 257 258declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone 259declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone 260declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone 261declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone 262declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone 263declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone 264declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone 265declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone 266 267declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone 268declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone 269declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone 270declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone 271declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone 272declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone 273declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone 274declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone 275 276declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone 277declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone 278declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone 279declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone 280declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone 281declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone 282declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone 283declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone 284