1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
3
4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5target triple = "x86_64-unknown-linux-gnu"
6
7define void @f_fu(float* %ret, float*  %aa, float %b) {
8; CHECK-LABEL: f_fu:
9; CHECK:       ## BB#0: ## %allocas
10; CHECK-NEXT:    vcvttss2si %xmm0, %eax
11; CHECK-NEXT:    vpbroadcastd %eax, %zmm0
12; CHECK-NEXT:    vcvttps2dq (%rsi), %zmm1
13; CHECK-NEXT:    vpsrld $31, %zmm0, %zmm2
14; CHECK-NEXT:    vpaddd %zmm2, %zmm0, %zmm2
15; CHECK-NEXT:    vpsrad $1, %zmm2, %zmm2
16; CHECK-NEXT:    movw $-21846, %ax ## imm = 0xAAAA
17; CHECK-NEXT:    kmovw %eax, %k1
18; CHECK-NEXT:    vpblendmd {{.*}}(%rip), %zmm1, %zmm1 {%k1}
19; CHECK-NEXT:    vpaddd %zmm0, %zmm2, %zmm0
20; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
21; CHECK-NEXT:    vcvtdq2ps %zmm0, %zmm0
22; CHECK-NEXT:    vmovups %zmm0, (%rdi)
23; CHECK-NEXT:    retq
24allocas:
25  %ptr_cast_for_load = bitcast float* %aa to <16 x float>*
26  %ptr_masked_load.39 = load <16 x float>, <16 x float>* %ptr_cast_for_load, align 4
27  %b_load_to_int32 = fptosi float %b to i32
28  %b_load_to_int32_broadcast_init = insertelement <16 x i32> undef, i32 %b_load_to_int32, i32 0
29  %b_load_to_int32_broadcast = shufflevector <16 x i32> %b_load_to_int32_broadcast_init, <16 x i32> undef, <16 x i32> zeroinitializer
30  %b_to_int32 = fptosi float %b to i32
31  %b_to_int32_broadcast_init = insertelement <16 x i32> undef, i32 %b_to_int32, i32 0
32  %b_to_int32_broadcast = shufflevector <16 x i32> %b_to_int32_broadcast_init, <16 x i32> undef, <16 x i32> zeroinitializer
33
34  %a_load_to_int32 = fptosi <16 x float> %ptr_masked_load.39 to <16 x i32>
35  %div_v019_load_ = sdiv <16 x i32> %b_to_int32_broadcast, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
36
37  %v1.i = select <16 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>, <16 x i32> %a_load_to_int32
38
39  %foo_test = add <16 x i32> %div_v019_load_, %b_load_to_int32_broadcast
40
41
42  %add_struct_offset_y_struct_offset33_x = add <16 x i32> %foo_test, %v1.i
43
44  %val = sitofp <16 x i32> %add_struct_offset_y_struct_offset33_x to <16 x float>
45  %ptrcast = bitcast float* %ret to <16 x float>*
46  store <16 x float> %val, <16 x float>* %ptrcast, align 4
47  ret void
48}
49