1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=1 \ 3; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWON,BWON-NOF16C 4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=0 \ 5; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWOFF 6; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+f16c -fixup-byte-word-insts=1 \ 7; RUN: | FileCheck %s -check-prefixes=CHECK,BWON,BWON-F16C 8; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -fixup-byte-word-insts=0 \ 9; RUN: | FileCheck %s -check-prefixes=CHECK-I686 10 11define void @test_load_store(half* %in, half* %out) #0 { 12; BWON-LABEL: test_load_store: 13; BWON: # %bb.0: 14; BWON-NEXT: movzwl (%rdi), %eax 15; BWON-NEXT: movw %ax, (%rsi) 16; BWON-NEXT: retq 17; 18; BWOFF-LABEL: test_load_store: 19; BWOFF: # %bb.0: 20; BWOFF-NEXT: movw (%rdi), %ax 21; BWOFF-NEXT: movw %ax, (%rsi) 22; BWOFF-NEXT: retq 23; 24; CHECK-I686-LABEL: test_load_store: 25; CHECK-I686: # %bb.0: 26; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 27; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ecx 28; CHECK-I686-NEXT: movw (%ecx), %cx 29; CHECK-I686-NEXT: movw %cx, (%eax) 30; CHECK-I686-NEXT: retl 31 %val = load half, half* %in 32 store half %val, half* %out 33 ret void 34} 35 36define i16 @test_bitcast_from_half(half* %addr) #0 { 37; BWON-LABEL: test_bitcast_from_half: 38; BWON: # %bb.0: 39; BWON-NEXT: movzwl (%rdi), %eax 40; BWON-NEXT: retq 41; 42; BWOFF-LABEL: test_bitcast_from_half: 43; BWOFF: # %bb.0: 44; BWOFF-NEXT: movw (%rdi), %ax 45; BWOFF-NEXT: retq 46; 47; CHECK-I686-LABEL: test_bitcast_from_half: 48; CHECK-I686: # %bb.0: 49; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 50; CHECK-I686-NEXT: movw (%eax), %ax 51; CHECK-I686-NEXT: retl 52 %val = load half, half* %addr 53 %val_int = bitcast half %val to i16 54 ret i16 %val_int 55} 56 57define void @test_bitcast_to_half(half* %addr, i16 %in) #0 { 58; CHECK-LABEL: test_bitcast_to_half: 59; CHECK: # %bb.0: 60; CHECK-NEXT: movw %si, (%rdi) 61; CHECK-NEXT: retq 62; 63; CHECK-I686-LABEL: test_bitcast_to_half: 64; CHECK-I686: # %bb.0: 65; CHECK-I686-NEXT: movw {{[0-9]+}}(%esp), %ax 66; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ecx 67; CHECK-I686-NEXT: movw %ax, (%ecx) 68; CHECK-I686-NEXT: retl 69 %val_fp = bitcast i16 %in to half 70 store half %val_fp, half* %addr 71 ret void 72} 73 74define float @test_extend32(half* %addr) #0 { 75; CHECK-LIBCALL-LABEL: test_extend32: 76; CHECK-LIBCALL: # %bb.0: 77; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 78; CHECK-LIBCALL-NEXT: jmp __gnu_h2f_ieee@PLT # TAILCALL 79; 80; BWON-F16C-LABEL: test_extend32: 81; BWON-F16C: # %bb.0: 82; BWON-F16C-NEXT: movzwl (%rdi), %eax 83; BWON-F16C-NEXT: vmovd %eax, %xmm0 84; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 85; BWON-F16C-NEXT: retq 86; 87; CHECK-I686-LABEL: test_extend32: 88; CHECK-I686: # %bb.0: 89; CHECK-I686-NEXT: subl $12, %esp 90; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 91; CHECK-I686-NEXT: movzwl (%eax), %eax 92; CHECK-I686-NEXT: movl %eax, (%esp) 93; CHECK-I686-NEXT: calll __gnu_h2f_ieee 94; CHECK-I686-NEXT: addl $12, %esp 95; CHECK-I686-NEXT: retl 96 %val16 = load half, half* %addr 97 %val32 = fpext half %val16 to float 98 ret float %val32 99} 100 101define double @test_extend64(half* %addr) #0 { 102; CHECK-LIBCALL-LABEL: test_extend64: 103; CHECK-LIBCALL: # %bb.0: 104; CHECK-LIBCALL-NEXT: pushq %rax 105; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 106; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 107; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 108; CHECK-LIBCALL-NEXT: popq %rax 109; CHECK-LIBCALL-NEXT: retq 110; 111; BWON-F16C-LABEL: test_extend64: 112; BWON-F16C: # %bb.0: 113; BWON-F16C-NEXT: movzwl (%rdi), %eax 114; BWON-F16C-NEXT: vmovd %eax, %xmm0 115; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 116; BWON-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 117; BWON-F16C-NEXT: retq 118; 119; CHECK-I686-LABEL: test_extend64: 120; CHECK-I686: # %bb.0: 121; CHECK-I686-NEXT: subl $12, %esp 122; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 123; CHECK-I686-NEXT: movzwl (%eax), %eax 124; CHECK-I686-NEXT: movl %eax, (%esp) 125; CHECK-I686-NEXT: calll __gnu_h2f_ieee 126; CHECK-I686-NEXT: addl $12, %esp 127; CHECK-I686-NEXT: retl 128 %val16 = load half, half* %addr 129 %val32 = fpext half %val16 to double 130 ret double %val32 131} 132 133define void @test_trunc32(float %in, half* %addr) #0 { 134; CHECK-LIBCALL-LABEL: test_trunc32: 135; CHECK-LIBCALL: # %bb.0: 136; CHECK-LIBCALL-NEXT: pushq %rbx 137; CHECK-LIBCALL-NEXT: movq %rdi, %rbx 138; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 139; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 140; CHECK-LIBCALL-NEXT: popq %rbx 141; CHECK-LIBCALL-NEXT: retq 142; 143; BWON-F16C-LABEL: test_trunc32: 144; BWON-F16C: # %bb.0: 145; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 146; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rdi) 147; BWON-F16C-NEXT: retq 148; 149; CHECK-I686-LABEL: test_trunc32: 150; CHECK-I686: # %bb.0: 151; CHECK-I686-NEXT: pushl %esi 152; CHECK-I686-NEXT: subl $8, %esp 153; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 154; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 155; CHECK-I686-NEXT: movss %xmm0, (%esp) 156; CHECK-I686-NEXT: calll __gnu_f2h_ieee 157; CHECK-I686-NEXT: movw %ax, (%esi) 158; CHECK-I686-NEXT: addl $8, %esp 159; CHECK-I686-NEXT: popl %esi 160; CHECK-I686-NEXT: retl 161 %val16 = fptrunc float %in to half 162 store half %val16, half* %addr 163 ret void 164} 165 166define void @test_trunc64(double %in, half* %addr) #0 { 167; CHECK-LABEL: test_trunc64: 168; CHECK: # %bb.0: 169; CHECK-NEXT: pushq %rbx 170; CHECK-NEXT: movq %rdi, %rbx 171; CHECK-NEXT: callq __truncdfhf2 172; CHECK-NEXT: movw %ax, (%rbx) 173; CHECK-NEXT: popq %rbx 174; CHECK-NEXT: retq 175; 176; CHECK-I686-LABEL: test_trunc64: 177; CHECK-I686: # %bb.0: 178; CHECK-I686-NEXT: pushl %esi 179; CHECK-I686-NEXT: subl $8, %esp 180; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 181; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 182; CHECK-I686-NEXT: movsd %xmm0, (%esp) 183; CHECK-I686-NEXT: calll __truncdfhf2 184; CHECK-I686-NEXT: movw %ax, (%esi) 185; CHECK-I686-NEXT: addl $8, %esp 186; CHECK-I686-NEXT: popl %esi 187; CHECK-I686-NEXT: retl 188 %val16 = fptrunc double %in to half 189 store half %val16, half* %addr 190 ret void 191} 192 193define i64 @test_fptosi_i64(half* %p) #0 { 194; CHECK-LIBCALL-LABEL: test_fptosi_i64: 195; CHECK-LIBCALL: # %bb.0: 196; CHECK-LIBCALL-NEXT: pushq %rax 197; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 198; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 199; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax 200; CHECK-LIBCALL-NEXT: popq %rcx 201; CHECK-LIBCALL-NEXT: retq 202; 203; BWON-F16C-LABEL: test_fptosi_i64: 204; BWON-F16C: # %bb.0: 205; BWON-F16C-NEXT: movzwl (%rdi), %eax 206; BWON-F16C-NEXT: vmovd %eax, %xmm0 207; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 208; BWON-F16C-NEXT: vcvttss2si %xmm0, %rax 209; BWON-F16C-NEXT: retq 210; 211; CHECK-I686-LABEL: test_fptosi_i64: 212; CHECK-I686: # %bb.0: 213; CHECK-I686-NEXT: subl $12, %esp 214; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 215; CHECK-I686-NEXT: movzwl (%eax), %eax 216; CHECK-I686-NEXT: movl %eax, (%esp) 217; CHECK-I686-NEXT: calll __gnu_h2f_ieee 218; CHECK-I686-NEXT: fstps (%esp) 219; CHECK-I686-NEXT: calll __fixsfdi 220; CHECK-I686-NEXT: addl $12, %esp 221; CHECK-I686-NEXT: retl 222 %a = load half, half* %p, align 2 223 %r = fptosi half %a to i64 224 ret i64 %r 225} 226 227define void @test_sitofp_i64(i64 %a, half* %p) #0 { 228; CHECK-LIBCALL-LABEL: test_sitofp_i64: 229; CHECK-LIBCALL: # %bb.0: 230; CHECK-LIBCALL-NEXT: pushq %rbx 231; CHECK-LIBCALL-NEXT: movq %rsi, %rbx 232; CHECK-LIBCALL-NEXT: cvtsi2ss %rdi, %xmm0 233; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 234; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 235; CHECK-LIBCALL-NEXT: popq %rbx 236; CHECK-LIBCALL-NEXT: retq 237; 238; BWON-F16C-LABEL: test_sitofp_i64: 239; BWON-F16C: # %bb.0: 240; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 241; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 242; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rsi) 243; BWON-F16C-NEXT: retq 244; 245; CHECK-I686-LABEL: test_sitofp_i64: 246; CHECK-I686: # %bb.0: 247; CHECK-I686-NEXT: pushl %esi 248; CHECK-I686-NEXT: subl $24, %esp 249; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 250; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 251; CHECK-I686-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) 252; CHECK-I686-NEXT: fildll {{[0-9]+}}(%esp) 253; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 254; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 255; CHECK-I686-NEXT: movss %xmm0, (%esp) 256; CHECK-I686-NEXT: calll __gnu_f2h_ieee 257; CHECK-I686-NEXT: movw %ax, (%esi) 258; CHECK-I686-NEXT: addl $24, %esp 259; CHECK-I686-NEXT: popl %esi 260; CHECK-I686-NEXT: retl 261 %r = sitofp i64 %a to half 262 store half %r, half* %p 263 ret void 264} 265 266define i64 @test_fptoui_i64(half* %p) #0 { 267; CHECK-LIBCALL-LABEL: test_fptoui_i64: 268; CHECK-LIBCALL: # %bb.0: 269; CHECK-LIBCALL-NEXT: pushq %rax 270; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 271; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 272; CHECK-LIBCALL-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 273; CHECK-LIBCALL-NEXT: movaps %xmm0, %xmm2 274; CHECK-LIBCALL-NEXT: subss %xmm1, %xmm2 275; CHECK-LIBCALL-NEXT: cvttss2si %xmm2, %rax 276; CHECK-LIBCALL-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 277; CHECK-LIBCALL-NEXT: xorq %rax, %rcx 278; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax 279; CHECK-LIBCALL-NEXT: ucomiss %xmm1, %xmm0 280; CHECK-LIBCALL-NEXT: cmovaeq %rcx, %rax 281; CHECK-LIBCALL-NEXT: popq %rcx 282; CHECK-LIBCALL-NEXT: retq 283; 284; BWON-F16C-LABEL: test_fptoui_i64: 285; BWON-F16C: # %bb.0: 286; BWON-F16C-NEXT: movzwl (%rdi), %eax 287; BWON-F16C-NEXT: vmovd %eax, %xmm0 288; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 289; BWON-F16C-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 290; BWON-F16C-NEXT: vsubss %xmm1, %xmm0, %xmm2 291; BWON-F16C-NEXT: vcvttss2si %xmm2, %rax 292; BWON-F16C-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 293; BWON-F16C-NEXT: xorq %rax, %rcx 294; BWON-F16C-NEXT: vcvttss2si %xmm0, %rax 295; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0 296; BWON-F16C-NEXT: cmovaeq %rcx, %rax 297; BWON-F16C-NEXT: retq 298; 299; CHECK-I686-LABEL: test_fptoui_i64: 300; CHECK-I686: # %bb.0: 301; CHECK-I686-NEXT: subl $12, %esp 302; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 303; CHECK-I686-NEXT: movzwl (%eax), %eax 304; CHECK-I686-NEXT: movl %eax, (%esp) 305; CHECK-I686-NEXT: calll __gnu_h2f_ieee 306; CHECK-I686-NEXT: fstps (%esp) 307; CHECK-I686-NEXT: calll __fixunssfdi 308; CHECK-I686-NEXT: addl $12, %esp 309; CHECK-I686-NEXT: retl 310 %a = load half, half* %p, align 2 311 %r = fptoui half %a to i64 312 ret i64 %r 313} 314 315define void @test_uitofp_i64(i64 %a, half* %p) #0 { 316; CHECK-LIBCALL-LABEL: test_uitofp_i64: 317; CHECK-LIBCALL: # %bb.0: 318; CHECK-LIBCALL-NEXT: pushq %rbx 319; CHECK-LIBCALL-NEXT: movq %rsi, %rbx 320; CHECK-LIBCALL-NEXT: testq %rdi, %rdi 321; CHECK-LIBCALL-NEXT: js .LBB10_1 322; CHECK-LIBCALL-NEXT: # %bb.2: 323; CHECK-LIBCALL-NEXT: cvtsi2ss %rdi, %xmm0 324; CHECK-LIBCALL-NEXT: jmp .LBB10_3 325; CHECK-LIBCALL-NEXT: .LBB10_1: 326; CHECK-LIBCALL-NEXT: movq %rdi, %rax 327; CHECK-LIBCALL-NEXT: shrq %rax 328; CHECK-LIBCALL-NEXT: andl $1, %edi 329; CHECK-LIBCALL-NEXT: orq %rax, %rdi 330; CHECK-LIBCALL-NEXT: cvtsi2ss %rdi, %xmm0 331; CHECK-LIBCALL-NEXT: addss %xmm0, %xmm0 332; CHECK-LIBCALL-NEXT: .LBB10_3: 333; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 334; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 335; CHECK-LIBCALL-NEXT: popq %rbx 336; CHECK-LIBCALL-NEXT: retq 337; 338; BWON-F16C-LABEL: test_uitofp_i64: 339; BWON-F16C: # %bb.0: 340; BWON-F16C-NEXT: testq %rdi, %rdi 341; BWON-F16C-NEXT: js .LBB10_1 342; BWON-F16C-NEXT: # %bb.2: 343; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 344; BWON-F16C-NEXT: jmp .LBB10_3 345; BWON-F16C-NEXT: .LBB10_1: 346; BWON-F16C-NEXT: movq %rdi, %rax 347; BWON-F16C-NEXT: shrq %rax 348; BWON-F16C-NEXT: andl $1, %edi 349; BWON-F16C-NEXT: orq %rax, %rdi 350; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 351; BWON-F16C-NEXT: vaddss %xmm0, %xmm0, %xmm0 352; BWON-F16C-NEXT: .LBB10_3: 353; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 354; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rsi) 355; BWON-F16C-NEXT: retq 356; 357; CHECK-I686-LABEL: test_uitofp_i64: 358; CHECK-I686: # %bb.0: 359; CHECK-I686-NEXT: pushl %esi 360; CHECK-I686-NEXT: subl $24, %esp 361; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 362; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 363; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 364; CHECK-I686-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) 365; CHECK-I686-NEXT: shrl $31, %eax 366; CHECK-I686-NEXT: fildll {{[0-9]+}}(%esp) 367; CHECK-I686-NEXT: fadds {{\.LCPI.*}}(,%eax,4) 368; CHECK-I686-NEXT: fstps (%esp) 369; CHECK-I686-NEXT: calll __gnu_f2h_ieee 370; CHECK-I686-NEXT: movw %ax, (%esi) 371; CHECK-I686-NEXT: addl $24, %esp 372; CHECK-I686-NEXT: popl %esi 373; CHECK-I686-NEXT: retl 374 %r = uitofp i64 %a to half 375 store half %r, half* %p 376 ret void 377} 378 379define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 { 380; CHECK-LIBCALL-LABEL: test_extend32_vec4: 381; CHECK-LIBCALL: # %bb.0: 382; CHECK-LIBCALL-NEXT: subq $88, %rsp 383; CHECK-LIBCALL-NEXT: movl (%rdi), %eax 384; CHECK-LIBCALL-NEXT: movl 4(%rdi), %ecx 385; CHECK-LIBCALL-NEXT: movl %eax, (%rsp) 386; CHECK-LIBCALL-NEXT: movl %ecx, {{[0-9]+}}(%rsp) 387; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 388; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 389; CHECK-LIBCALL-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm0 390; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 391; CHECK-LIBCALL-NEXT: pextrw $1, %xmm0, %edi 392; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 393; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 394; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 395; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %edi 396; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 397; CHECK-LIBCALL-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 398; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 399; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 400; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 401; CHECK-LIBCALL-NEXT: pextrw $1, %xmm0, %edi 402; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 403; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 404; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 405; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %edi 406; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 407; CHECK-LIBCALL-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 408; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 409; CHECK-LIBCALL-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 410; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0] 411; CHECK-LIBCALL-NEXT: addq $88, %rsp 412; CHECK-LIBCALL-NEXT: retq 413; 414; BWON-F16C-LABEL: test_extend32_vec4: 415; BWON-F16C: # %bb.0: 416; BWON-F16C-NEXT: vcvtph2ps (%rdi), %xmm0 417; BWON-F16C-NEXT: retq 418; 419; CHECK-I686-LABEL: test_extend32_vec4: 420; CHECK-I686: # %bb.0: 421; CHECK-I686-NEXT: subl $124, %esp 422; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 423; CHECK-I686-NEXT: movl (%eax), %ecx 424; CHECK-I686-NEXT: movl 4(%eax), %eax 425; CHECK-I686-NEXT: movl %eax, {{[0-9]+}}(%esp) 426; CHECK-I686-NEXT: movl %ecx, {{[0-9]+}}(%esp) 427; CHECK-I686-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 428; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 429; CHECK-I686-NEXT: movdqa {{[0-9]+}}(%esp), %xmm0 430; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 431; CHECK-I686-NEXT: pextrw $1, %xmm0, %eax 432; CHECK-I686-NEXT: movl %eax, (%esp) 433; CHECK-I686-NEXT: calll __gnu_h2f_ieee 434; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 435; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 436; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 437; CHECK-I686-NEXT: movl %eax, (%esp) 438; CHECK-I686-NEXT: calll __gnu_h2f_ieee 439; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 440; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 441; CHECK-I686-NEXT: pextrw $1, %xmm0, %eax 442; CHECK-I686-NEXT: movl %eax, (%esp) 443; CHECK-I686-NEXT: calll __gnu_h2f_ieee 444; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 445; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 446; CHECK-I686-NEXT: movl %eax, (%esp) 447; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 448; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 449; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 450; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 451; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 452; CHECK-I686-NEXT: calll __gnu_h2f_ieee 453; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 454; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 455; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 456; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 457; CHECK-I686-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 458; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 459; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 460; CHECK-I686-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 461; CHECK-I686-NEXT: addl $124, %esp 462; CHECK-I686-NEXT: retl 463 %a = load <4 x half>, <4 x half>* %p, align 8 464 %b = fpext <4 x half> %a to <4 x float> 465 ret <4 x float> %b 466} 467 468define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 { 469; CHECK-LIBCALL-LABEL: test_extend64_vec4: 470; CHECK-LIBCALL: # %bb.0: 471; CHECK-LIBCALL-NEXT: pushq %rbp 472; CHECK-LIBCALL-NEXT: pushq %r14 473; CHECK-LIBCALL-NEXT: pushq %rbx 474; CHECK-LIBCALL-NEXT: subq $32, %rsp 475; CHECK-LIBCALL-NEXT: movzwl 4(%rdi), %r14d 476; CHECK-LIBCALL-NEXT: movzwl 6(%rdi), %ebp 477; CHECK-LIBCALL-NEXT: movzwl (%rdi), %ebx 478; CHECK-LIBCALL-NEXT: movzwl 2(%rdi), %edi 479; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 480; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 481; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 482; CHECK-LIBCALL-NEXT: movl %ebx, %edi 483; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 484; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 485; CHECK-LIBCALL-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 486; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0] 487; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 488; CHECK-LIBCALL-NEXT: movl %ebp, %edi 489; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 490; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 491; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 492; CHECK-LIBCALL-NEXT: movl %r14d, %edi 493; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 494; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm1 495; CHECK-LIBCALL-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 496; CHECK-LIBCALL-NEXT: # xmm1 = xmm1[0],mem[0] 497; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 498; CHECK-LIBCALL-NEXT: addq $32, %rsp 499; CHECK-LIBCALL-NEXT: popq %rbx 500; CHECK-LIBCALL-NEXT: popq %r14 501; CHECK-LIBCALL-NEXT: popq %rbp 502; CHECK-LIBCALL-NEXT: retq 503; 504; BWON-F16C-LABEL: test_extend64_vec4: 505; BWON-F16C: # %bb.0: 506; BWON-F16C-NEXT: vcvtph2ps (%rdi), %xmm0 507; BWON-F16C-NEXT: vcvtps2pd %xmm0, %ymm0 508; BWON-F16C-NEXT: retq 509; 510; CHECK-I686-LABEL: test_extend64_vec4: 511; CHECK-I686: # %bb.0: 512; CHECK-I686-NEXT: pushl %ebx 513; CHECK-I686-NEXT: pushl %edi 514; CHECK-I686-NEXT: pushl %esi 515; CHECK-I686-NEXT: subl $64, %esp 516; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 517; CHECK-I686-NEXT: movzwl 6(%eax), %esi 518; CHECK-I686-NEXT: movzwl (%eax), %edi 519; CHECK-I686-NEXT: movzwl 2(%eax), %ebx 520; CHECK-I686-NEXT: movzwl 4(%eax), %eax 521; CHECK-I686-NEXT: movl %eax, (%esp) 522; CHECK-I686-NEXT: calll __gnu_h2f_ieee 523; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 524; CHECK-I686-NEXT: movl %ebx, (%esp) 525; CHECK-I686-NEXT: calll __gnu_h2f_ieee 526; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 527; CHECK-I686-NEXT: movl %edi, (%esp) 528; CHECK-I686-NEXT: calll __gnu_h2f_ieee 529; CHECK-I686-NEXT: movl %esi, (%esp) 530; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 531; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 532; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 533; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 534; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 535; CHECK-I686-NEXT: calll __gnu_h2f_ieee 536; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 537; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 538; CHECK-I686-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] 539; CHECK-I686-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 540; CHECK-I686-NEXT: movhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] 541; CHECK-I686-NEXT: addl $64, %esp 542; CHECK-I686-NEXT: popl %esi 543; CHECK-I686-NEXT: popl %edi 544; CHECK-I686-NEXT: popl %ebx 545; CHECK-I686-NEXT: retl 546 %a = load <4 x half>, <4 x half>* %p, align 8 547 %b = fpext <4 x half> %a to <4 x double> 548 ret <4 x double> %b 549} 550 551define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) #0 { 552; BWON-NOF16C-LABEL: test_trunc32_vec4: 553; BWON-NOF16C: # %bb.0: 554; BWON-NOF16C-NEXT: pushq %rbp 555; BWON-NOF16C-NEXT: pushq %r15 556; BWON-NOF16C-NEXT: pushq %r14 557; BWON-NOF16C-NEXT: pushq %rbx 558; BWON-NOF16C-NEXT: subq $24, %rsp 559; BWON-NOF16C-NEXT: movq %rdi, %rbx 560; BWON-NOF16C-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 561; BWON-NOF16C-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 562; BWON-NOF16C-NEXT: callq __gnu_f2h_ieee 563; BWON-NOF16C-NEXT: movl %eax, %r14d 564; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 565; BWON-NOF16C-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 566; BWON-NOF16C-NEXT: callq __gnu_f2h_ieee 567; BWON-NOF16C-NEXT: movl %eax, %r15d 568; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 569; BWON-NOF16C-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 570; BWON-NOF16C-NEXT: callq __gnu_f2h_ieee 571; BWON-NOF16C-NEXT: movl %eax, %ebp 572; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 573; BWON-NOF16C-NEXT: callq __gnu_f2h_ieee 574; BWON-NOF16C-NEXT: movw %ax, (%rbx) 575; BWON-NOF16C-NEXT: movw %bp, 6(%rbx) 576; BWON-NOF16C-NEXT: movw %r15w, 4(%rbx) 577; BWON-NOF16C-NEXT: movw %r14w, 2(%rbx) 578; BWON-NOF16C-NEXT: addq $24, %rsp 579; BWON-NOF16C-NEXT: popq %rbx 580; BWON-NOF16C-NEXT: popq %r14 581; BWON-NOF16C-NEXT: popq %r15 582; BWON-NOF16C-NEXT: popq %rbp 583; BWON-NOF16C-NEXT: retq 584; 585; BWOFF-LABEL: test_trunc32_vec4: 586; BWOFF: # %bb.0: 587; BWOFF-NEXT: pushq %rbp 588; BWOFF-NEXT: pushq %r15 589; BWOFF-NEXT: pushq %r14 590; BWOFF-NEXT: pushq %rbx 591; BWOFF-NEXT: subq $24, %rsp 592; BWOFF-NEXT: movq %rdi, %rbx 593; BWOFF-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 594; BWOFF-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 595; BWOFF-NEXT: callq __gnu_f2h_ieee 596; BWOFF-NEXT: movw %ax, %r14w 597; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 598; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 599; BWOFF-NEXT: callq __gnu_f2h_ieee 600; BWOFF-NEXT: movw %ax, %r15w 601; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 602; BWOFF-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 603; BWOFF-NEXT: callq __gnu_f2h_ieee 604; BWOFF-NEXT: movw %ax, %bp 605; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 606; BWOFF-NEXT: callq __gnu_f2h_ieee 607; BWOFF-NEXT: movw %ax, (%rbx) 608; BWOFF-NEXT: movw %bp, 6(%rbx) 609; BWOFF-NEXT: movw %r15w, 4(%rbx) 610; BWOFF-NEXT: movw %r14w, 2(%rbx) 611; BWOFF-NEXT: addq $24, %rsp 612; BWOFF-NEXT: popq %rbx 613; BWOFF-NEXT: popq %r14 614; BWOFF-NEXT: popq %r15 615; BWOFF-NEXT: popq %rbp 616; BWOFF-NEXT: retq 617; 618; BWON-F16C-LABEL: test_trunc32_vec4: 619; BWON-F16C: # %bb.0: 620; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, (%rdi) 621; BWON-F16C-NEXT: retq 622; 623; CHECK-I686-LABEL: test_trunc32_vec4: 624; CHECK-I686: # %bb.0: 625; CHECK-I686-NEXT: pushl %ebp 626; CHECK-I686-NEXT: pushl %ebx 627; CHECK-I686-NEXT: pushl %edi 628; CHECK-I686-NEXT: pushl %esi 629; CHECK-I686-NEXT: subl $44, %esp 630; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 631; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ebp 632; CHECK-I686-NEXT: movaps %xmm0, %xmm1 633; CHECK-I686-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] 634; CHECK-I686-NEXT: movss %xmm1, (%esp) 635; CHECK-I686-NEXT: calll __gnu_f2h_ieee 636; CHECK-I686-NEXT: movw %ax, %si 637; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 638; CHECK-I686-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 639; CHECK-I686-NEXT: movss %xmm0, (%esp) 640; CHECK-I686-NEXT: calll __gnu_f2h_ieee 641; CHECK-I686-NEXT: movw %ax, %di 642; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 643; CHECK-I686-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 644; CHECK-I686-NEXT: movss %xmm0, (%esp) 645; CHECK-I686-NEXT: calll __gnu_f2h_ieee 646; CHECK-I686-NEXT: movw %ax, %bx 647; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 648; CHECK-I686-NEXT: movss %xmm0, (%esp) 649; CHECK-I686-NEXT: calll __gnu_f2h_ieee 650; CHECK-I686-NEXT: movw %ax, (%ebp) 651; CHECK-I686-NEXT: movw %bx, 6(%ebp) 652; CHECK-I686-NEXT: movw %di, 4(%ebp) 653; CHECK-I686-NEXT: movw %si, 2(%ebp) 654; CHECK-I686-NEXT: addl $44, %esp 655; CHECK-I686-NEXT: popl %esi 656; CHECK-I686-NEXT: popl %edi 657; CHECK-I686-NEXT: popl %ebx 658; CHECK-I686-NEXT: popl %ebp 659; CHECK-I686-NEXT: retl 660 %v = fptrunc <4 x float> %a to <4 x half> 661 store <4 x half> %v, <4 x half>* %p 662 ret void 663} 664 665define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) #0 { 666; BWON-NOF16C-LABEL: test_trunc64_vec4: 667; BWON-NOF16C: # %bb.0: 668; BWON-NOF16C-NEXT: pushq %rbp 669; BWON-NOF16C-NEXT: pushq %r15 670; BWON-NOF16C-NEXT: pushq %r14 671; BWON-NOF16C-NEXT: pushq %rbx 672; BWON-NOF16C-NEXT: subq $40, %rsp 673; BWON-NOF16C-NEXT: movq %rdi, %rbx 674; BWON-NOF16C-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill 675; BWON-NOF16C-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 676; BWON-NOF16C-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 677; BWON-NOF16C-NEXT: callq __truncdfhf2 678; BWON-NOF16C-NEXT: movl %eax, %r14d 679; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 680; BWON-NOF16C-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 681; BWON-NOF16C-NEXT: callq __truncdfhf2 682; BWON-NOF16C-NEXT: movl %eax, %r15d 683; BWON-NOF16C-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 684; BWON-NOF16C-NEXT: callq __truncdfhf2 685; BWON-NOF16C-NEXT: movl %eax, %ebp 686; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 687; BWON-NOF16C-NEXT: callq __truncdfhf2 688; BWON-NOF16C-NEXT: movw %ax, 4(%rbx) 689; BWON-NOF16C-NEXT: movw %bp, (%rbx) 690; BWON-NOF16C-NEXT: movw %r15w, 6(%rbx) 691; BWON-NOF16C-NEXT: movw %r14w, 2(%rbx) 692; BWON-NOF16C-NEXT: addq $40, %rsp 693; BWON-NOF16C-NEXT: popq %rbx 694; BWON-NOF16C-NEXT: popq %r14 695; BWON-NOF16C-NEXT: popq %r15 696; BWON-NOF16C-NEXT: popq %rbp 697; BWON-NOF16C-NEXT: retq 698; 699; BWOFF-LABEL: test_trunc64_vec4: 700; BWOFF: # %bb.0: 701; BWOFF-NEXT: pushq %rbp 702; BWOFF-NEXT: pushq %r15 703; BWOFF-NEXT: pushq %r14 704; BWOFF-NEXT: pushq %rbx 705; BWOFF-NEXT: subq $40, %rsp 706; BWOFF-NEXT: movq %rdi, %rbx 707; BWOFF-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill 708; BWOFF-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 709; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 710; BWOFF-NEXT: callq __truncdfhf2 711; BWOFF-NEXT: movw %ax, %r14w 712; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 713; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 714; BWOFF-NEXT: callq __truncdfhf2 715; BWOFF-NEXT: movw %ax, %r15w 716; BWOFF-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 717; BWOFF-NEXT: callq __truncdfhf2 718; BWOFF-NEXT: movw %ax, %bp 719; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 720; BWOFF-NEXT: callq __truncdfhf2 721; BWOFF-NEXT: movw %ax, 4(%rbx) 722; BWOFF-NEXT: movw %bp, (%rbx) 723; BWOFF-NEXT: movw %r15w, 6(%rbx) 724; BWOFF-NEXT: movw %r14w, 2(%rbx) 725; BWOFF-NEXT: addq $40, %rsp 726; BWOFF-NEXT: popq %rbx 727; BWOFF-NEXT: popq %r14 728; BWOFF-NEXT: popq %r15 729; BWOFF-NEXT: popq %rbp 730; BWOFF-NEXT: retq 731; 732; BWON-F16C-LABEL: test_trunc64_vec4: 733; BWON-F16C: # %bb.0: 734; BWON-F16C-NEXT: pushq %rbp 735; BWON-F16C-NEXT: pushq %r15 736; BWON-F16C-NEXT: pushq %r14 737; BWON-F16C-NEXT: pushq %rbx 738; BWON-F16C-NEXT: subq $88, %rsp 739; BWON-F16C-NEXT: movq %rdi, %rbx 740; BWON-F16C-NEXT: vmovupd %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 741; BWON-F16C-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 742; BWON-F16C-NEXT: vzeroupper 743; BWON-F16C-NEXT: callq __truncdfhf2 744; BWON-F16C-NEXT: movl %eax, %r14d 745; BWON-F16C-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 746; BWON-F16C-NEXT: vextractf128 $1, %ymm0, %xmm0 747; BWON-F16C-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 748; BWON-F16C-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 749; BWON-F16C-NEXT: vzeroupper 750; BWON-F16C-NEXT: callq __truncdfhf2 751; BWON-F16C-NEXT: movl %eax, %r15d 752; BWON-F16C-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 753; BWON-F16C-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 754; BWON-F16C-NEXT: vzeroupper 755; BWON-F16C-NEXT: callq __truncdfhf2 756; BWON-F16C-NEXT: movl %eax, %ebp 757; BWON-F16C-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 758; BWON-F16C-NEXT: callq __truncdfhf2 759; BWON-F16C-NEXT: movw %ax, 4(%rbx) 760; BWON-F16C-NEXT: movw %bp, (%rbx) 761; BWON-F16C-NEXT: movw %r15w, 6(%rbx) 762; BWON-F16C-NEXT: movw %r14w, 2(%rbx) 763; BWON-F16C-NEXT: addq $88, %rsp 764; BWON-F16C-NEXT: popq %rbx 765; BWON-F16C-NEXT: popq %r14 766; BWON-F16C-NEXT: popq %r15 767; BWON-F16C-NEXT: popq %rbp 768; BWON-F16C-NEXT: retq 769; 770; CHECK-I686-LABEL: test_trunc64_vec4: 771; CHECK-I686: # %bb.0: 772; CHECK-I686-NEXT: pushl %ebp 773; CHECK-I686-NEXT: pushl %ebx 774; CHECK-I686-NEXT: pushl %edi 775; CHECK-I686-NEXT: pushl %esi 776; CHECK-I686-NEXT: subl $60, %esp 777; CHECK-I686-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 778; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 779; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ebp 780; CHECK-I686-NEXT: movlps %xmm0, (%esp) 781; CHECK-I686-NEXT: calll __truncdfhf2 782; CHECK-I686-NEXT: movw %ax, %si 783; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 784; CHECK-I686-NEXT: movhps %xmm0, (%esp) 785; CHECK-I686-NEXT: calll __truncdfhf2 786; CHECK-I686-NEXT: movw %ax, %di 787; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 788; CHECK-I686-NEXT: movlps %xmm0, (%esp) 789; CHECK-I686-NEXT: calll __truncdfhf2 790; CHECK-I686-NEXT: movw %ax, %bx 791; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 792; CHECK-I686-NEXT: movhps %xmm0, (%esp) 793; CHECK-I686-NEXT: calll __truncdfhf2 794; CHECK-I686-NEXT: movw %ax, 6(%ebp) 795; CHECK-I686-NEXT: movw %bx, 4(%ebp) 796; CHECK-I686-NEXT: movw %di, 2(%ebp) 797; CHECK-I686-NEXT: movw %si, (%ebp) 798; CHECK-I686-NEXT: addl $60, %esp 799; CHECK-I686-NEXT: popl %esi 800; CHECK-I686-NEXT: popl %edi 801; CHECK-I686-NEXT: popl %ebx 802; CHECK-I686-NEXT: popl %ebp 803; CHECK-I686-NEXT: retl 804 %v = fptrunc <4 x double> %a to <4 x half> 805 store <4 x half> %v, <4 x half>* %p 806 ret void 807} 808 809declare float @test_floatret(); 810 811; On i686, if SSE2 is available, the return value from test_floatret is loaded 812; to f80 and then rounded to f32. The DAG combiner should not combine this 813; fp_round and the subsequent fptrunc from float to half. 814define half @test_f80trunc_nodagcombine() #0 { 815; CHECK-LIBCALL-LABEL: test_f80trunc_nodagcombine: 816; CHECK-LIBCALL: # %bb.0: 817; CHECK-LIBCALL-NEXT: pushq %rax 818; CHECK-LIBCALL-NEXT: callq test_floatret 819; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 820; CHECK-LIBCALL-NEXT: popq %rcx 821; CHECK-LIBCALL-NEXT: retq 822; 823; BWON-F16C-LABEL: test_f80trunc_nodagcombine: 824; BWON-F16C: # %bb.0: 825; BWON-F16C-NEXT: pushq %rax 826; BWON-F16C-NEXT: callq test_floatret 827; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 828; BWON-F16C-NEXT: vmovd %xmm0, %eax 829; BWON-F16C-NEXT: # kill: def $ax killed $ax killed $eax 830; BWON-F16C-NEXT: popq %rcx 831; BWON-F16C-NEXT: retq 832; 833; CHECK-I686-LABEL: test_f80trunc_nodagcombine: 834; CHECK-I686: # %bb.0: 835; CHECK-I686-NEXT: subl $12, %esp 836; CHECK-I686-NEXT: calll test_floatret 837; CHECK-I686-NEXT: fstps (%esp) 838; CHECK-I686-NEXT: calll __gnu_f2h_ieee 839; CHECK-I686-NEXT: addl $12, %esp 840; CHECK-I686-NEXT: retl 841 %1 = call float @test_floatret() 842 %2 = fptrunc float %1 to half 843 ret half %2 844} 845 846 847 848 849define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 { 850; CHECK-LIBCALL-LABEL: test_sitofp_fadd_i32: 851; CHECK-LIBCALL: # %bb.0: 852; CHECK-LIBCALL-NEXT: pushq %rbx 853; CHECK-LIBCALL-NEXT: subq $16, %rsp 854; CHECK-LIBCALL-NEXT: movzwl (%rsi), %ebx 855; CHECK-LIBCALL-NEXT: cvtsi2ss %edi, %xmm0 856; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 857; CHECK-LIBCALL-NEXT: movzwl %ax, %edi 858; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 859; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 860; CHECK-LIBCALL-NEXT: movl %ebx, %edi 861; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 862; CHECK-LIBCALL-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 863; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 864; CHECK-LIBCALL-NEXT: movzwl %ax, %edi 865; CHECK-LIBCALL-NEXT: addq $16, %rsp 866; CHECK-LIBCALL-NEXT: popq %rbx 867; CHECK-LIBCALL-NEXT: jmp __gnu_h2f_ieee@PLT # TAILCALL 868; 869; BWON-F16C-LABEL: test_sitofp_fadd_i32: 870; BWON-F16C: # %bb.0: 871; BWON-F16C-NEXT: movzwl (%rsi), %eax 872; BWON-F16C-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 873; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 874; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 875; BWON-F16C-NEXT: vmovd %eax, %xmm1 876; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 877; BWON-F16C-NEXT: vaddss %xmm0, %xmm1, %xmm0 878; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 879; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 880; BWON-F16C-NEXT: retq 881; 882; CHECK-I686-LABEL: test_sitofp_fadd_i32: 883; CHECK-I686: # %bb.0: 884; CHECK-I686-NEXT: pushl %edi 885; CHECK-I686-NEXT: pushl %esi 886; CHECK-I686-NEXT: subl $20, %esp 887; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 888; CHECK-I686-NEXT: movzwl (%eax), %edi 889; CHECK-I686-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 890; CHECK-I686-NEXT: movss %xmm0, (%esp) 891; CHECK-I686-NEXT: calll __gnu_f2h_ieee 892; CHECK-I686-NEXT: movw %ax, %si 893; CHECK-I686-NEXT: movl %edi, (%esp) 894; CHECK-I686-NEXT: calll __gnu_h2f_ieee 895; CHECK-I686-NEXT: movzwl %si, %eax 896; CHECK-I686-NEXT: movl %eax, (%esp) 897; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 898; CHECK-I686-NEXT: calll __gnu_h2f_ieee 899; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 900; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 901; CHECK-I686-NEXT: addss {{[0-9]+}}(%esp), %xmm0 902; CHECK-I686-NEXT: movss %xmm0, (%esp) 903; CHECK-I686-NEXT: calll __gnu_f2h_ieee 904; CHECK-I686-NEXT: movzwl %ax, %eax 905; CHECK-I686-NEXT: movl %eax, (%esp) 906; CHECK-I686-NEXT: calll __gnu_h2f_ieee 907; CHECK-I686-NEXT: addl $20, %esp 908; CHECK-I686-NEXT: popl %esi 909; CHECK-I686-NEXT: popl %edi 910; CHECK-I686-NEXT: retl 911 %tmp0 = load half, half* %b 912 %tmp1 = sitofp i32 %a to half 913 %tmp2 = fadd half %tmp0, %tmp1 914 %tmp3 = fpext half %tmp2 to float 915 ret float %tmp3 916} 917 918define half @PR40273(half) #0 { 919; CHECK-LIBCALL-LABEL: PR40273: 920; CHECK-LIBCALL: # %bb.0: 921; CHECK-LIBCALL-NEXT: pushq %rax 922; CHECK-LIBCALL-NEXT: movzwl %di, %edi 923; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 924; CHECK-LIBCALL-NEXT: xorl %eax, %eax 925; CHECK-LIBCALL-NEXT: xorps %xmm1, %xmm1 926; CHECK-LIBCALL-NEXT: ucomiss %xmm1, %xmm0 927; CHECK-LIBCALL-NEXT: movl $15360, %ecx # imm = 0x3C00 928; CHECK-LIBCALL-NEXT: cmovnel %ecx, %eax 929; CHECK-LIBCALL-NEXT: cmovpl %ecx, %eax 930; CHECK-LIBCALL-NEXT: # kill: def $ax killed $ax killed $eax 931; CHECK-LIBCALL-NEXT: popq %rcx 932; CHECK-LIBCALL-NEXT: retq 933; 934; BWON-F16C-LABEL: PR40273: 935; BWON-F16C: # %bb.0: 936; BWON-F16C-NEXT: movzwl %di, %eax 937; BWON-F16C-NEXT: vmovd %eax, %xmm0 938; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 939; BWON-F16C-NEXT: xorl %eax, %eax 940; BWON-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 941; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0 942; BWON-F16C-NEXT: movl $15360, %ecx # imm = 0x3C00 943; BWON-F16C-NEXT: cmovnel %ecx, %eax 944; BWON-F16C-NEXT: cmovpl %ecx, %eax 945; BWON-F16C-NEXT: # kill: def $ax killed $ax killed $eax 946; BWON-F16C-NEXT: retq 947; 948; CHECK-I686-LABEL: PR40273: 949; CHECK-I686: # %bb.0: 950; CHECK-I686-NEXT: subl $12, %esp 951; CHECK-I686-NEXT: movzwl {{[0-9]+}}(%esp), %eax 952; CHECK-I686-NEXT: movl %eax, (%esp) 953; CHECK-I686-NEXT: calll __gnu_h2f_ieee 954; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 955; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 956; CHECK-I686-NEXT: xorl %eax, %eax 957; CHECK-I686-NEXT: xorps %xmm1, %xmm1 958; CHECK-I686-NEXT: ucomiss %xmm1, %xmm0 959; CHECK-I686-NEXT: movl $15360, %ecx # imm = 0x3C00 960; CHECK-I686-NEXT: cmovnel %ecx, %eax 961; CHECK-I686-NEXT: cmovpl %ecx, %eax 962; CHECK-I686-NEXT: # kill: def $ax killed $ax killed $eax 963; CHECK-I686-NEXT: addl $12, %esp 964; CHECK-I686-NEXT: retl 965 %2 = fcmp une half %0, 0xH0000 966 %3 = uitofp i1 %2 to half 967 ret half %3 968} 969 970attributes #0 = { nounwind } 971