1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -instcombine -S | FileCheck %s 3 4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 5 6; 7; DemandedBits - MOVMSK zeros the upper bits of the result. 8; 9 10define i32 @test_upper_x86_mmx_pmovmskb(x86_mmx %a0) { 11; CHECK-LABEL: @test_upper_x86_mmx_pmovmskb( 12; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0) 13; CHECK-NEXT: ret i32 [[TMP1]] 14; 15 %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0) 16 %2 = and i32 %1, 255 17 ret i32 %2 18} 19 20define i32 @test_upper_x86_sse_movmsk_ps(<4 x float> %a0) { 21; CHECK-LABEL: @test_upper_x86_sse_movmsk_ps( 22; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) 23; CHECK-NEXT: ret i32 [[TMP1]] 24; 25 %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) 26 %2 = and i32 %1, 15 27 ret i32 %2 28} 29 30define i32 @test_upper_x86_sse2_movmsk_pd(<2 x double> %a0) { 31; CHECK-LABEL: @test_upper_x86_sse2_movmsk_pd( 32; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) 33; CHECK-NEXT: ret i32 [[TMP1]] 34; 35 %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) 36 %2 = and i32 %1, 3 37 ret i32 %2 38} 39 40define i32 @test_upper_x86_sse2_pmovmskb_128(<16 x i8> %a0) { 41; CHECK-LABEL: @test_upper_x86_sse2_pmovmskb_128( 42; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) 43; CHECK-NEXT: ret i32 [[TMP1]] 44; 45 %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) 46 %2 = and i32 %1, 65535 47 ret i32 %2 48} 49 50define i32 @test_upper_x86_avx_movmsk_ps_256(<8 x float> %a0) { 51; CHECK-LABEL: @test_upper_x86_avx_movmsk_ps_256( 52; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) 53; CHECK-NEXT: ret i32 [[TMP1]] 54; 55 %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) 56 %2 = and i32 %1, 255 57 ret i32 %2 58} 59 60define i32 @test_upper_x86_avx_movmsk_pd_256(<4 x double> %a0) { 61; CHECK-LABEL: @test_upper_x86_avx_movmsk_pd_256( 62; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) 63; CHECK-NEXT: ret i32 [[TMP1]] 64; 65 %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) 66 %2 = and i32 %1, 15 67 ret i32 %2 68} 69 70; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register. 71 72; 73; DemandedBits - If we don't use the lower bits then we just return zero. 74; 75 76define i32 @test_lower_x86_mmx_pmovmskb(x86_mmx %a0) { 77; CHECK-LABEL: @test_lower_x86_mmx_pmovmskb( 78; CHECK-NEXT: ret i32 0 79; 80 %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0) 81 %2 = and i32 %1, -256 82 ret i32 %2 83} 84 85define i32 @test_lower_x86_sse_movmsk_ps(<4 x float> %a0) { 86; CHECK-LABEL: @test_lower_x86_sse_movmsk_ps( 87; CHECK-NEXT: ret i32 0 88; 89 %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) 90 %2 = and i32 %1, -16 91 ret i32 %2 92} 93 94define i32 @test_lower_x86_sse2_movmsk_pd(<2 x double> %a0) { 95; CHECK-LABEL: @test_lower_x86_sse2_movmsk_pd( 96; CHECK-NEXT: ret i32 0 97; 98 %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) 99 %2 = and i32 %1, -4 100 ret i32 %2 101} 102 103define i32 @test_lower_x86_sse2_pmovmskb_128(<16 x i8> %a0) { 104; CHECK-LABEL: @test_lower_x86_sse2_pmovmskb_128( 105; CHECK-NEXT: ret i32 0 106; 107 %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) 108 %2 = and i32 %1, -65536 109 ret i32 %2 110} 111 112define i32 @test_lower_x86_avx_movmsk_ps_256(<8 x float> %a0) { 113; CHECK-LABEL: @test_lower_x86_avx_movmsk_ps_256( 114; CHECK-NEXT: ret i32 0 115; 116 %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) 117 %2 = and i32 %1, -256 118 ret i32 %2 119} 120 121define i32 @test_lower_x86_avx_movmsk_pd_256(<4 x double> %a0) { 122; CHECK-LABEL: @test_lower_x86_avx_movmsk_pd_256( 123; CHECK-NEXT: ret i32 0 124; 125 %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) 126 %2 = and i32 %1, -16 127 ret i32 %2 128} 129 130; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register. 131 132; 133; Constant Folding (UNDEF -> ZERO) 134; 135 136define i32 @undef_x86_mmx_pmovmskb() { 137; CHECK-LABEL: @undef_x86_mmx_pmovmskb( 138; CHECK-NEXT: ret i32 0 139; 140 %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx undef) 141 ret i32 %1 142} 143 144define i32 @undef_x86_sse_movmsk_ps() { 145; CHECK-LABEL: @undef_x86_sse_movmsk_ps( 146; CHECK-NEXT: ret i32 0 147; 148 %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> undef) 149 ret i32 %1 150} 151 152define i32 @undef_x86_sse2_movmsk_pd() { 153; CHECK-LABEL: @undef_x86_sse2_movmsk_pd( 154; CHECK-NEXT: ret i32 0 155; 156 %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> undef) 157 ret i32 %1 158} 159 160define i32 @undef_x86_sse2_pmovmskb_128() { 161; CHECK-LABEL: @undef_x86_sse2_pmovmskb_128( 162; CHECK-NEXT: ret i32 0 163; 164 %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> undef) 165 ret i32 %1 166} 167 168define i32 @undef_x86_avx_movmsk_ps_256() { 169; CHECK-LABEL: @undef_x86_avx_movmsk_ps_256( 170; CHECK-NEXT: ret i32 0 171; 172 %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> undef) 173 ret i32 %1 174} 175 176define i32 @undef_x86_avx_movmsk_pd_256() { 177; CHECK-LABEL: @undef_x86_avx_movmsk_pd_256( 178; CHECK-NEXT: ret i32 0 179; 180 %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> undef) 181 ret i32 %1 182} 183 184define i32 @undef_x86_avx2_pmovmskb() { 185; CHECK-LABEL: @undef_x86_avx2_pmovmskb( 186; CHECK-NEXT: ret i32 0 187; 188 %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> undef) 189 ret i32 %1 190} 191 192; 193; Constant Folding (ZERO -> ZERO) 194; 195 196define i32 @zero_x86_mmx_pmovmskb() { 197; CHECK-LABEL: @zero_x86_mmx_pmovmskb( 198; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx bitcast (<1 x i64> zeroinitializer to x86_mmx)) 199; CHECK-NEXT: ret i32 [[TMP1]] 200; 201 %1 = bitcast <1 x i64> zeroinitializer to x86_mmx 202 %2 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %1) 203 ret i32 %2 204} 205 206define i32 @zero_x86_sse_movmsk_ps() { 207; CHECK-LABEL: @zero_x86_sse_movmsk_ps( 208; CHECK-NEXT: ret i32 0 209; 210 %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> zeroinitializer) 211 ret i32 %1 212} 213 214define i32 @zero_x86_sse2_movmsk_pd() { 215; CHECK-LABEL: @zero_x86_sse2_movmsk_pd( 216; CHECK-NEXT: ret i32 0 217; 218 %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> zeroinitializer) 219 ret i32 %1 220} 221 222define i32 @zero_x86_sse2_pmovmskb_128() { 223; CHECK-LABEL: @zero_x86_sse2_pmovmskb_128( 224; CHECK-NEXT: ret i32 0 225; 226 %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> zeroinitializer) 227 ret i32 %1 228} 229 230define i32 @zero_x86_avx_movmsk_ps_256() { 231; CHECK-LABEL: @zero_x86_avx_movmsk_ps_256( 232; CHECK-NEXT: ret i32 0 233; 234 %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> zeroinitializer) 235 ret i32 %1 236} 237 238define i32 @zero_x86_avx_movmsk_pd_256() { 239; CHECK-LABEL: @zero_x86_avx_movmsk_pd_256( 240; CHECK-NEXT: ret i32 0 241; 242 %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> zeroinitializer) 243 ret i32 %1 244} 245 246define i32 @zero_x86_avx2_pmovmskb() { 247; CHECK-LABEL: @zero_x86_avx2_pmovmskb( 248; CHECK-NEXT: ret i32 0 249; 250 %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> zeroinitializer) 251 ret i32 %1 252} 253 254; 255; Constant Folding 256; 257 258define i32 @fold_x86_mmx_pmovmskb() { 259; CHECK-LABEL: @fold_x86_mmx_pmovmskb( 260; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx bitcast (<8 x i8> <i8 0, i8 -1, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 0> to x86_mmx)) 261; CHECK-NEXT: ret i32 [[TMP1]] 262; 263 %1 = bitcast <8 x i8> <i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256> to x86_mmx 264 %2 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %1) 265 ret i32 %2 266} 267 268define i32 @fold_x86_sse_movmsk_ps() { 269; CHECK-LABEL: @fold_x86_sse_movmsk_ps( 270; CHECK-NEXT: ret i32 10 271; 272 %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> <float 1.0, float -1.0, float 100.0, float -200.0>) 273 ret i32 %1 274} 275 276define i32 @fold_x86_sse2_movmsk_pd() { 277; CHECK-LABEL: @fold_x86_sse2_movmsk_pd( 278; CHECK-NEXT: ret i32 2 279; 280 %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> <double 1.0, double -1.0>) 281 ret i32 %1 282} 283 284define i32 @fold_x86_sse2_pmovmskb_128() { 285; CHECK-LABEL: @fold_x86_sse2_pmovmskb_128( 286; CHECK-NEXT: ret i32 5654 287; 288 %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> <i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256>) 289 ret i32 %1 290} 291 292define i32 @fold_x86_avx_movmsk_ps_256() { 293; CHECK-LABEL: @fold_x86_avx_movmsk_ps_256( 294; CHECK-NEXT: ret i32 170 295; 296 %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> <float 1.0, float -1.0, float 100.0, float -200.0, float +0.0, float -0.0, float 100000.0, float -5000000.0>) 297 ret i32 %1 298} 299 300define i32 @fold_x86_avx_movmsk_pd_256() { 301; CHECK-LABEL: @fold_x86_avx_movmsk_pd_256( 302; CHECK-NEXT: ret i32 10 303; 304 %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> <double 1.0, double -1.0, double 100.0, double -200.0>) 305 ret i32 %1 306} 307 308define i32 @fold_x86_avx2_pmovmskb() { 309; CHECK-LABEL: @fold_x86_avx2_pmovmskb( 310; CHECK-NEXT: ret i32 370546176 311; 312 %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256>) 313 ret i32 %1 314} 315 316declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) 317 318declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) 319declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) 320declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) 321 322declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) 323declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) 324declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) 325