1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=1 \
3; RUN:   | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWON,BWON-NOF16C
4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c  -fixup-byte-word-insts=0 \
5; RUN:   | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWOFF
6; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+f16c -fixup-byte-word-insts=1 \
7; RUN:    | FileCheck %s -check-prefixes=CHECK,BWON,BWON-F16C
8; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -fixup-byte-word-insts=0  \
9; RUN:    | FileCheck %s -check-prefixes=CHECK-I686
10
11define void @test_load_store(half* %in, half* %out) #0 {
12; BWON-LABEL: test_load_store:
13; BWON:       # %bb.0:
14; BWON-NEXT:    movzwl (%rdi), %eax
15; BWON-NEXT:    movw %ax, (%rsi)
16; BWON-NEXT:    retq
17;
18; BWOFF-LABEL: test_load_store:
19; BWOFF:       # %bb.0:
20; BWOFF-NEXT:    movw (%rdi), %ax
21; BWOFF-NEXT:    movw %ax, (%rsi)
22; BWOFF-NEXT:    retq
23;
24; CHECK-I686-LABEL: test_load_store:
25; CHECK-I686:       # %bb.0:
26; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
27; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
28; CHECK-I686-NEXT:    movw (%ecx), %cx
29; CHECK-I686-NEXT:    movw %cx, (%eax)
30; CHECK-I686-NEXT:    retl
31  %val = load half, half* %in
32  store half %val, half* %out
33  ret void
34}
35
36define i16 @test_bitcast_from_half(half* %addr) #0 {
37; BWON-LABEL: test_bitcast_from_half:
38; BWON:       # %bb.0:
39; BWON-NEXT:    movzwl (%rdi), %eax
40; BWON-NEXT:    retq
41;
42; BWOFF-LABEL: test_bitcast_from_half:
43; BWOFF:       # %bb.0:
44; BWOFF-NEXT:    movw (%rdi), %ax
45; BWOFF-NEXT:    retq
46;
47; CHECK-I686-LABEL: test_bitcast_from_half:
48; CHECK-I686:       # %bb.0:
49; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
50; CHECK-I686-NEXT:    movw (%eax), %ax
51; CHECK-I686-NEXT:    retl
52  %val = load half, half* %addr
53  %val_int = bitcast half %val to i16
54  ret i16 %val_int
55}
56
57define void @test_bitcast_to_half(half* %addr, i16 %in) #0 {
58; CHECK-LABEL: test_bitcast_to_half:
59; CHECK:       # %bb.0:
60; CHECK-NEXT:    movw %si, (%rdi)
61; CHECK-NEXT:    retq
62;
63; CHECK-I686-LABEL: test_bitcast_to_half:
64; CHECK-I686:       # %bb.0:
65; CHECK-I686-NEXT:    movw {{[0-9]+}}(%esp), %ax
66; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
67; CHECK-I686-NEXT:    movw %ax, (%ecx)
68; CHECK-I686-NEXT:    retl
69  %val_fp = bitcast i16 %in to half
70  store half %val_fp, half* %addr
71  ret void
72}
73
74define float @test_extend32(half* %addr) #0 {
75; CHECK-LIBCALL-LABEL: test_extend32:
76; CHECK-LIBCALL:       # %bb.0:
77; CHECK-LIBCALL-NEXT:    movzwl (%rdi), %edi
78; CHECK-LIBCALL-NEXT:    jmp __gnu_h2f_ieee@PLT # TAILCALL
79;
80; BWON-F16C-LABEL: test_extend32:
81; BWON-F16C:       # %bb.0:
82; BWON-F16C-NEXT:    movzwl (%rdi), %eax
83; BWON-F16C-NEXT:    vmovd %eax, %xmm0
84; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
85; BWON-F16C-NEXT:    retq
86;
87; CHECK-I686-LABEL: test_extend32:
88; CHECK-I686:       # %bb.0:
89; CHECK-I686-NEXT:    subl $12, %esp
90; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
91; CHECK-I686-NEXT:    movzwl (%eax), %eax
92; CHECK-I686-NEXT:    movl %eax, (%esp)
93; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
94; CHECK-I686-NEXT:    addl $12, %esp
95; CHECK-I686-NEXT:    retl
96  %val16 = load half, half* %addr
97  %val32 = fpext half %val16 to float
98  ret float %val32
99}
100
101define double @test_extend64(half* %addr) #0 {
102; CHECK-LIBCALL-LABEL: test_extend64:
103; CHECK-LIBCALL:       # %bb.0:
104; CHECK-LIBCALL-NEXT:    pushq %rax
105; CHECK-LIBCALL-NEXT:    movzwl (%rdi), %edi
106; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
107; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
108; CHECK-LIBCALL-NEXT:    popq %rax
109; CHECK-LIBCALL-NEXT:    retq
110;
111; BWON-F16C-LABEL: test_extend64:
112; BWON-F16C:       # %bb.0:
113; BWON-F16C-NEXT:    movzwl (%rdi), %eax
114; BWON-F16C-NEXT:    vmovd %eax, %xmm0
115; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
116; BWON-F16C-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
117; BWON-F16C-NEXT:    retq
118;
119; CHECK-I686-LABEL: test_extend64:
120; CHECK-I686:       # %bb.0:
121; CHECK-I686-NEXT:    subl $12, %esp
122; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
123; CHECK-I686-NEXT:    movzwl (%eax), %eax
124; CHECK-I686-NEXT:    movl %eax, (%esp)
125; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
126; CHECK-I686-NEXT:    addl $12, %esp
127; CHECK-I686-NEXT:    retl
128  %val16 = load half, half* %addr
129  %val32 = fpext half %val16 to double
130  ret double %val32
131}
132
133define void @test_trunc32(float %in, half* %addr) #0 {
134; CHECK-LIBCALL-LABEL: test_trunc32:
135; CHECK-LIBCALL:       # %bb.0:
136; CHECK-LIBCALL-NEXT:    pushq %rbx
137; CHECK-LIBCALL-NEXT:    movq %rdi, %rbx
138; CHECK-LIBCALL-NEXT:    callq __gnu_f2h_ieee
139; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
140; CHECK-LIBCALL-NEXT:    popq %rbx
141; CHECK-LIBCALL-NEXT:    retq
142;
143; BWON-F16C-LABEL: test_trunc32:
144; BWON-F16C:       # %bb.0:
145; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
146; BWON-F16C-NEXT:    vpextrw $0, %xmm0, (%rdi)
147; BWON-F16C-NEXT:    retq
148;
149; CHECK-I686-LABEL: test_trunc32:
150; CHECK-I686:       # %bb.0:
151; CHECK-I686-NEXT:    pushl %esi
152; CHECK-I686-NEXT:    subl $8, %esp
153; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
154; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
155; CHECK-I686-NEXT:    movss %xmm0, (%esp)
156; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
157; CHECK-I686-NEXT:    movw %ax, (%esi)
158; CHECK-I686-NEXT:    addl $8, %esp
159; CHECK-I686-NEXT:    popl %esi
160; CHECK-I686-NEXT:    retl
161  %val16 = fptrunc float %in to half
162  store half %val16, half* %addr
163  ret void
164}
165
166define void @test_trunc64(double %in, half* %addr) #0 {
167; CHECK-LABEL: test_trunc64:
168; CHECK:       # %bb.0:
169; CHECK-NEXT:    pushq %rbx
170; CHECK-NEXT:    movq %rdi, %rbx
171; CHECK-NEXT:    callq __truncdfhf2
172; CHECK-NEXT:    movw %ax, (%rbx)
173; CHECK-NEXT:    popq %rbx
174; CHECK-NEXT:    retq
175;
176; CHECK-I686-LABEL: test_trunc64:
177; CHECK-I686:       # %bb.0:
178; CHECK-I686-NEXT:    pushl %esi
179; CHECK-I686-NEXT:    subl $8, %esp
180; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
181; CHECK-I686-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
182; CHECK-I686-NEXT:    movsd %xmm0, (%esp)
183; CHECK-I686-NEXT:    calll __truncdfhf2
184; CHECK-I686-NEXT:    movw %ax, (%esi)
185; CHECK-I686-NEXT:    addl $8, %esp
186; CHECK-I686-NEXT:    popl %esi
187; CHECK-I686-NEXT:    retl
188  %val16 = fptrunc double %in to half
189  store half %val16, half* %addr
190  ret void
191}
192
193define i64 @test_fptosi_i64(half* %p) #0 {
194; CHECK-LIBCALL-LABEL: test_fptosi_i64:
195; CHECK-LIBCALL:       # %bb.0:
196; CHECK-LIBCALL-NEXT:    pushq %rax
197; CHECK-LIBCALL-NEXT:    movzwl (%rdi), %edi
198; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
199; CHECK-LIBCALL-NEXT:    cvttss2si %xmm0, %rax
200; CHECK-LIBCALL-NEXT:    popq %rcx
201; CHECK-LIBCALL-NEXT:    retq
202;
203; BWON-F16C-LABEL: test_fptosi_i64:
204; BWON-F16C:       # %bb.0:
205; BWON-F16C-NEXT:    movzwl (%rdi), %eax
206; BWON-F16C-NEXT:    vmovd %eax, %xmm0
207; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
208; BWON-F16C-NEXT:    vcvttss2si %xmm0, %rax
209; BWON-F16C-NEXT:    retq
210;
211; CHECK-I686-LABEL: test_fptosi_i64:
212; CHECK-I686:       # %bb.0:
213; CHECK-I686-NEXT:    subl $12, %esp
214; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
215; CHECK-I686-NEXT:    movzwl (%eax), %eax
216; CHECK-I686-NEXT:    movl %eax, (%esp)
217; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
218; CHECK-I686-NEXT:    fstps (%esp)
219; CHECK-I686-NEXT:    calll __fixsfdi
220; CHECK-I686-NEXT:    addl $12, %esp
221; CHECK-I686-NEXT:    retl
222  %a = load half, half* %p, align 2
223  %r = fptosi half %a to i64
224  ret i64 %r
225}
226
227define void @test_sitofp_i64(i64 %a, half* %p) #0 {
228; CHECK-LIBCALL-LABEL: test_sitofp_i64:
229; CHECK-LIBCALL:       # %bb.0:
230; CHECK-LIBCALL-NEXT:    pushq %rbx
231; CHECK-LIBCALL-NEXT:    movq %rsi, %rbx
232; CHECK-LIBCALL-NEXT:    cvtsi2ss %rdi, %xmm0
233; CHECK-LIBCALL-NEXT:    callq __gnu_f2h_ieee
234; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
235; CHECK-LIBCALL-NEXT:    popq %rbx
236; CHECK-LIBCALL-NEXT:    retq
237;
238; BWON-F16C-LABEL: test_sitofp_i64:
239; BWON-F16C:       # %bb.0:
240; BWON-F16C-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
241; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
242; BWON-F16C-NEXT:    vpextrw $0, %xmm0, (%rsi)
243; BWON-F16C-NEXT:    retq
244;
245; CHECK-I686-LABEL: test_sitofp_i64:
246; CHECK-I686:       # %bb.0:
247; CHECK-I686-NEXT:    pushl %esi
248; CHECK-I686-NEXT:    subl $24, %esp
249; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
250; CHECK-I686-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
251; CHECK-I686-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
252; CHECK-I686-NEXT:    fildll {{[0-9]+}}(%esp)
253; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
254; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
255; CHECK-I686-NEXT:    movss %xmm0, (%esp)
256; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
257; CHECK-I686-NEXT:    movw %ax, (%esi)
258; CHECK-I686-NEXT:    addl $24, %esp
259; CHECK-I686-NEXT:    popl %esi
260; CHECK-I686-NEXT:    retl
261  %r = sitofp i64 %a to half
262  store half %r, half* %p
263  ret void
264}
265
266define i64 @test_fptoui_i64(half* %p) #0 {
267; CHECK-LIBCALL-LABEL: test_fptoui_i64:
268; CHECK-LIBCALL:       # %bb.0:
269; CHECK-LIBCALL-NEXT:    pushq %rax
270; CHECK-LIBCALL-NEXT:    movzwl (%rdi), %edi
271; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
272; CHECK-LIBCALL-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
273; CHECK-LIBCALL-NEXT:    movaps %xmm0, %xmm2
274; CHECK-LIBCALL-NEXT:    subss %xmm1, %xmm2
275; CHECK-LIBCALL-NEXT:    cvttss2si %xmm2, %rax
276; CHECK-LIBCALL-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
277; CHECK-LIBCALL-NEXT:    xorq %rax, %rcx
278; CHECK-LIBCALL-NEXT:    cvttss2si %xmm0, %rax
279; CHECK-LIBCALL-NEXT:    ucomiss %xmm1, %xmm0
280; CHECK-LIBCALL-NEXT:    cmovaeq %rcx, %rax
281; CHECK-LIBCALL-NEXT:    popq %rcx
282; CHECK-LIBCALL-NEXT:    retq
283;
284; BWON-F16C-LABEL: test_fptoui_i64:
285; BWON-F16C:       # %bb.0:
286; BWON-F16C-NEXT:    movzwl (%rdi), %eax
287; BWON-F16C-NEXT:    vmovd %eax, %xmm0
288; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
289; BWON-F16C-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
290; BWON-F16C-NEXT:    vsubss %xmm1, %xmm0, %xmm2
291; BWON-F16C-NEXT:    vcvttss2si %xmm2, %rax
292; BWON-F16C-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
293; BWON-F16C-NEXT:    xorq %rax, %rcx
294; BWON-F16C-NEXT:    vcvttss2si %xmm0, %rax
295; BWON-F16C-NEXT:    vucomiss %xmm1, %xmm0
296; BWON-F16C-NEXT:    cmovaeq %rcx, %rax
297; BWON-F16C-NEXT:    retq
298;
299; CHECK-I686-LABEL: test_fptoui_i64:
300; CHECK-I686:       # %bb.0:
301; CHECK-I686-NEXT:    subl $12, %esp
302; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
303; CHECK-I686-NEXT:    movzwl (%eax), %eax
304; CHECK-I686-NEXT:    movl %eax, (%esp)
305; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
306; CHECK-I686-NEXT:    fstps (%esp)
307; CHECK-I686-NEXT:    calll __fixunssfdi
308; CHECK-I686-NEXT:    addl $12, %esp
309; CHECK-I686-NEXT:    retl
310  %a = load half, half* %p, align 2
311  %r = fptoui half %a to i64
312  ret i64 %r
313}
314
315define void @test_uitofp_i64(i64 %a, half* %p) #0 {
316; CHECK-LIBCALL-LABEL: test_uitofp_i64:
317; CHECK-LIBCALL:       # %bb.0:
318; CHECK-LIBCALL-NEXT:    pushq %rbx
319; CHECK-LIBCALL-NEXT:    movq %rsi, %rbx
320; CHECK-LIBCALL-NEXT:    testq %rdi, %rdi
321; CHECK-LIBCALL-NEXT:    js .LBB10_1
322; CHECK-LIBCALL-NEXT:  # %bb.2:
323; CHECK-LIBCALL-NEXT:    cvtsi2ss %rdi, %xmm0
324; CHECK-LIBCALL-NEXT:    jmp .LBB10_3
325; CHECK-LIBCALL-NEXT:  .LBB10_1:
326; CHECK-LIBCALL-NEXT:    movq %rdi, %rax
327; CHECK-LIBCALL-NEXT:    shrq %rax
328; CHECK-LIBCALL-NEXT:    andl $1, %edi
329; CHECK-LIBCALL-NEXT:    orq %rax, %rdi
330; CHECK-LIBCALL-NEXT:    cvtsi2ss %rdi, %xmm0
331; CHECK-LIBCALL-NEXT:    addss %xmm0, %xmm0
332; CHECK-LIBCALL-NEXT:  .LBB10_3:
333; CHECK-LIBCALL-NEXT:    callq __gnu_f2h_ieee
334; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
335; CHECK-LIBCALL-NEXT:    popq %rbx
336; CHECK-LIBCALL-NEXT:    retq
337;
338; BWON-F16C-LABEL: test_uitofp_i64:
339; BWON-F16C:       # %bb.0:
340; BWON-F16C-NEXT:    testq %rdi, %rdi
341; BWON-F16C-NEXT:    js .LBB10_1
342; BWON-F16C-NEXT:  # %bb.2:
343; BWON-F16C-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
344; BWON-F16C-NEXT:    jmp .LBB10_3
345; BWON-F16C-NEXT:  .LBB10_1:
346; BWON-F16C-NEXT:    movq %rdi, %rax
347; BWON-F16C-NEXT:    shrq %rax
348; BWON-F16C-NEXT:    andl $1, %edi
349; BWON-F16C-NEXT:    orq %rax, %rdi
350; BWON-F16C-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
351; BWON-F16C-NEXT:    vaddss %xmm0, %xmm0, %xmm0
352; BWON-F16C-NEXT:  .LBB10_3:
353; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
354; BWON-F16C-NEXT:    vpextrw $0, %xmm0, (%rsi)
355; BWON-F16C-NEXT:    retq
356;
357; CHECK-I686-LABEL: test_uitofp_i64:
358; CHECK-I686:       # %bb.0:
359; CHECK-I686-NEXT:    pushl %esi
360; CHECK-I686-NEXT:    subl $24, %esp
361; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
362; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
363; CHECK-I686-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
364; CHECK-I686-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
365; CHECK-I686-NEXT:    shrl $31, %eax
366; CHECK-I686-NEXT:    fildll {{[0-9]+}}(%esp)
367; CHECK-I686-NEXT:    fadds {{\.LCPI.*}}(,%eax,4)
368; CHECK-I686-NEXT:    fstps (%esp)
369; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
370; CHECK-I686-NEXT:    movw %ax, (%esi)
371; CHECK-I686-NEXT:    addl $24, %esp
372; CHECK-I686-NEXT:    popl %esi
373; CHECK-I686-NEXT:    retl
374  %r = uitofp i64 %a to half
375  store half %r, half* %p
376  ret void
377}
378
379define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 {
380; CHECK-LIBCALL-LABEL: test_extend32_vec4:
381; CHECK-LIBCALL:       # %bb.0:
382; CHECK-LIBCALL-NEXT:    subq $88, %rsp
383; CHECK-LIBCALL-NEXT:    movl (%rdi), %eax
384; CHECK-LIBCALL-NEXT:    movl 4(%rdi), %ecx
385; CHECK-LIBCALL-NEXT:    movl %eax, (%rsp)
386; CHECK-LIBCALL-NEXT:    movl %ecx, {{[0-9]+}}(%rsp)
387; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0
388; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
389; CHECK-LIBCALL-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm0
390; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
391; CHECK-LIBCALL-NEXT:    pextrw $1, %xmm0, %edi
392; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
393; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
394; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
395; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %edi
396; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
397; CHECK-LIBCALL-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
398; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
399; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
400; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
401; CHECK-LIBCALL-NEXT:    pextrw $1, %xmm0, %edi
402; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
403; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
404; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
405; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %edi
406; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
407; CHECK-LIBCALL-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
408; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
409; CHECK-LIBCALL-NEXT:    punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
410; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0]
411; CHECK-LIBCALL-NEXT:    addq $88, %rsp
412; CHECK-LIBCALL-NEXT:    retq
413;
414; BWON-F16C-LABEL: test_extend32_vec4:
415; BWON-F16C:       # %bb.0:
416; BWON-F16C-NEXT:    vcvtph2ps (%rdi), %xmm0
417; BWON-F16C-NEXT:    retq
418;
419; CHECK-I686-LABEL: test_extend32_vec4:
420; CHECK-I686:       # %bb.0:
421; CHECK-I686-NEXT:    subl $124, %esp
422; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
423; CHECK-I686-NEXT:    movl (%eax), %ecx
424; CHECK-I686-NEXT:    movl 4(%eax), %eax
425; CHECK-I686-NEXT:    movl %eax, {{[0-9]+}}(%esp)
426; CHECK-I686-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
427; CHECK-I686-NEXT:    movaps {{[0-9]+}}(%esp), %xmm0
428; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
429; CHECK-I686-NEXT:    movdqa {{[0-9]+}}(%esp), %xmm0
430; CHECK-I686-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
431; CHECK-I686-NEXT:    pextrw $1, %xmm0, %eax
432; CHECK-I686-NEXT:    movl %eax, (%esp)
433; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
434; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
435; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
436; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
437; CHECK-I686-NEXT:    movl %eax, (%esp)
438; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
439; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
440; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
441; CHECK-I686-NEXT:    pextrw $1, %xmm0, %eax
442; CHECK-I686-NEXT:    movl %eax, (%esp)
443; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
444; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
445; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
446; CHECK-I686-NEXT:    movl %eax, (%esp)
447; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
448; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
449; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
450; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
451; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
452; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
453; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
454; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
455; CHECK-I686-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
456; CHECK-I686-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
457; CHECK-I686-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
458; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
459; CHECK-I686-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
460; CHECK-I686-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
461; CHECK-I686-NEXT:    addl $124, %esp
462; CHECK-I686-NEXT:    retl
463  %a = load <4 x half>, <4 x half>* %p, align 8
464  %b = fpext <4 x half> %a to <4 x float>
465  ret <4 x float> %b
466}
467
468define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 {
469; CHECK-LIBCALL-LABEL: test_extend64_vec4:
470; CHECK-LIBCALL:       # %bb.0:
471; CHECK-LIBCALL-NEXT:    pushq %rbp
472; CHECK-LIBCALL-NEXT:    pushq %r14
473; CHECK-LIBCALL-NEXT:    pushq %rbx
474; CHECK-LIBCALL-NEXT:    subq $32, %rsp
475; CHECK-LIBCALL-NEXT:    movzwl 4(%rdi), %r14d
476; CHECK-LIBCALL-NEXT:    movzwl 6(%rdi), %ebp
477; CHECK-LIBCALL-NEXT:    movzwl (%rdi), %ebx
478; CHECK-LIBCALL-NEXT:    movzwl 2(%rdi), %edi
479; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
480; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
481; CHECK-LIBCALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
482; CHECK-LIBCALL-NEXT:    movl %ebx, %edi
483; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
484; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
485; CHECK-LIBCALL-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
486; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0]
487; CHECK-LIBCALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
488; CHECK-LIBCALL-NEXT:    movl %ebp, %edi
489; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
490; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
491; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
492; CHECK-LIBCALL-NEXT:    movl %r14d, %edi
493; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
494; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm1
495; CHECK-LIBCALL-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
496; CHECK-LIBCALL-NEXT:    # xmm1 = xmm1[0],mem[0]
497; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
498; CHECK-LIBCALL-NEXT:    addq $32, %rsp
499; CHECK-LIBCALL-NEXT:    popq %rbx
500; CHECK-LIBCALL-NEXT:    popq %r14
501; CHECK-LIBCALL-NEXT:    popq %rbp
502; CHECK-LIBCALL-NEXT:    retq
503;
504; BWON-F16C-LABEL: test_extend64_vec4:
505; BWON-F16C:       # %bb.0:
506; BWON-F16C-NEXT:    vcvtph2ps (%rdi), %xmm0
507; BWON-F16C-NEXT:    vcvtps2pd %xmm0, %ymm0
508; BWON-F16C-NEXT:    retq
509;
510; CHECK-I686-LABEL: test_extend64_vec4:
511; CHECK-I686:       # %bb.0:
512; CHECK-I686-NEXT:    pushl %ebx
513; CHECK-I686-NEXT:    pushl %edi
514; CHECK-I686-NEXT:    pushl %esi
515; CHECK-I686-NEXT:    subl $64, %esp
516; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
517; CHECK-I686-NEXT:    movzwl 6(%eax), %esi
518; CHECK-I686-NEXT:    movzwl (%eax), %edi
519; CHECK-I686-NEXT:    movzwl 2(%eax), %ebx
520; CHECK-I686-NEXT:    movzwl 4(%eax), %eax
521; CHECK-I686-NEXT:    movl %eax, (%esp)
522; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
523; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
524; CHECK-I686-NEXT:    movl %ebx, (%esp)
525; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
526; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
527; CHECK-I686-NEXT:    movl %edi, (%esp)
528; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
529; CHECK-I686-NEXT:    movl %esi, (%esp)
530; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
531; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
532; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
533; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
534; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
535; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
536; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
537; CHECK-I686-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
538; CHECK-I686-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
539; CHECK-I686-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
540; CHECK-I686-NEXT:    movhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
541; CHECK-I686-NEXT:    addl $64, %esp
542; CHECK-I686-NEXT:    popl %esi
543; CHECK-I686-NEXT:    popl %edi
544; CHECK-I686-NEXT:    popl %ebx
545; CHECK-I686-NEXT:    retl
546  %a = load <4 x half>, <4 x half>* %p, align 8
547  %b = fpext <4 x half> %a to <4 x double>
548  ret <4 x double> %b
549}
550
551define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) #0 {
552; BWON-NOF16C-LABEL: test_trunc32_vec4:
553; BWON-NOF16C:       # %bb.0:
554; BWON-NOF16C-NEXT:    pushq %rbp
555; BWON-NOF16C-NEXT:    pushq %r15
556; BWON-NOF16C-NEXT:    pushq %r14
557; BWON-NOF16C-NEXT:    pushq %rbx
558; BWON-NOF16C-NEXT:    subq $24, %rsp
559; BWON-NOF16C-NEXT:    movq %rdi, %rbx
560; BWON-NOF16C-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
561; BWON-NOF16C-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
562; BWON-NOF16C-NEXT:    callq __gnu_f2h_ieee
563; BWON-NOF16C-NEXT:    movl %eax, %r14d
564; BWON-NOF16C-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
565; BWON-NOF16C-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
566; BWON-NOF16C-NEXT:    callq __gnu_f2h_ieee
567; BWON-NOF16C-NEXT:    movl %eax, %r15d
568; BWON-NOF16C-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
569; BWON-NOF16C-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
570; BWON-NOF16C-NEXT:    callq __gnu_f2h_ieee
571; BWON-NOF16C-NEXT:    movl %eax, %ebp
572; BWON-NOF16C-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
573; BWON-NOF16C-NEXT:    callq __gnu_f2h_ieee
574; BWON-NOF16C-NEXT:    movw %ax, (%rbx)
575; BWON-NOF16C-NEXT:    movw %bp, 6(%rbx)
576; BWON-NOF16C-NEXT:    movw %r15w, 4(%rbx)
577; BWON-NOF16C-NEXT:    movw %r14w, 2(%rbx)
578; BWON-NOF16C-NEXT:    addq $24, %rsp
579; BWON-NOF16C-NEXT:    popq %rbx
580; BWON-NOF16C-NEXT:    popq %r14
581; BWON-NOF16C-NEXT:    popq %r15
582; BWON-NOF16C-NEXT:    popq %rbp
583; BWON-NOF16C-NEXT:    retq
584;
585; BWOFF-LABEL: test_trunc32_vec4:
586; BWOFF:       # %bb.0:
587; BWOFF-NEXT:    pushq %rbp
588; BWOFF-NEXT:    pushq %r15
589; BWOFF-NEXT:    pushq %r14
590; BWOFF-NEXT:    pushq %rbx
591; BWOFF-NEXT:    subq $24, %rsp
592; BWOFF-NEXT:    movq %rdi, %rbx
593; BWOFF-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
594; BWOFF-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
595; BWOFF-NEXT:    callq __gnu_f2h_ieee
596; BWOFF-NEXT:    movw %ax, %r14w
597; BWOFF-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
598; BWOFF-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
599; BWOFF-NEXT:    callq __gnu_f2h_ieee
600; BWOFF-NEXT:    movw %ax, %r15w
601; BWOFF-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
602; BWOFF-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
603; BWOFF-NEXT:    callq __gnu_f2h_ieee
604; BWOFF-NEXT:    movw %ax, %bp
605; BWOFF-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
606; BWOFF-NEXT:    callq __gnu_f2h_ieee
607; BWOFF-NEXT:    movw %ax, (%rbx)
608; BWOFF-NEXT:    movw %bp, 6(%rbx)
609; BWOFF-NEXT:    movw %r15w, 4(%rbx)
610; BWOFF-NEXT:    movw %r14w, 2(%rbx)
611; BWOFF-NEXT:    addq $24, %rsp
612; BWOFF-NEXT:    popq %rbx
613; BWOFF-NEXT:    popq %r14
614; BWOFF-NEXT:    popq %r15
615; BWOFF-NEXT:    popq %rbp
616; BWOFF-NEXT:    retq
617;
618; BWON-F16C-LABEL: test_trunc32_vec4:
619; BWON-F16C:       # %bb.0:
620; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, (%rdi)
621; BWON-F16C-NEXT:    retq
622;
623; CHECK-I686-LABEL: test_trunc32_vec4:
624; CHECK-I686:       # %bb.0:
625; CHECK-I686-NEXT:    pushl %ebp
626; CHECK-I686-NEXT:    pushl %ebx
627; CHECK-I686-NEXT:    pushl %edi
628; CHECK-I686-NEXT:    pushl %esi
629; CHECK-I686-NEXT:    subl $44, %esp
630; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
631; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
632; CHECK-I686-NEXT:    movaps %xmm0, %xmm1
633; CHECK-I686-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
634; CHECK-I686-NEXT:    movss %xmm1, (%esp)
635; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
636; CHECK-I686-NEXT:    movw %ax, %si
637; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
638; CHECK-I686-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
639; CHECK-I686-NEXT:    movss %xmm0, (%esp)
640; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
641; CHECK-I686-NEXT:    movw %ax, %di
642; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
643; CHECK-I686-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
644; CHECK-I686-NEXT:    movss %xmm0, (%esp)
645; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
646; CHECK-I686-NEXT:    movw %ax, %bx
647; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
648; CHECK-I686-NEXT:    movss %xmm0, (%esp)
649; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
650; CHECK-I686-NEXT:    movw %ax, (%ebp)
651; CHECK-I686-NEXT:    movw %bx, 6(%ebp)
652; CHECK-I686-NEXT:    movw %di, 4(%ebp)
653; CHECK-I686-NEXT:    movw %si, 2(%ebp)
654; CHECK-I686-NEXT:    addl $44, %esp
655; CHECK-I686-NEXT:    popl %esi
656; CHECK-I686-NEXT:    popl %edi
657; CHECK-I686-NEXT:    popl %ebx
658; CHECK-I686-NEXT:    popl %ebp
659; CHECK-I686-NEXT:    retl
660  %v = fptrunc <4 x float> %a to <4 x half>
661  store <4 x half> %v, <4 x half>* %p
662  ret void
663}
664
665define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) #0 {
666; BWON-NOF16C-LABEL: test_trunc64_vec4:
667; BWON-NOF16C:       # %bb.0:
668; BWON-NOF16C-NEXT:    pushq %rbp
669; BWON-NOF16C-NEXT:    pushq %r15
670; BWON-NOF16C-NEXT:    pushq %r14
671; BWON-NOF16C-NEXT:    pushq %rbx
672; BWON-NOF16C-NEXT:    subq $40, %rsp
673; BWON-NOF16C-NEXT:    movq %rdi, %rbx
674; BWON-NOF16C-NEXT:    movaps %xmm1, (%rsp) # 16-byte Spill
675; BWON-NOF16C-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
676; BWON-NOF16C-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
677; BWON-NOF16C-NEXT:    callq __truncdfhf2
678; BWON-NOF16C-NEXT:    movl %eax, %r14d
679; BWON-NOF16C-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
680; BWON-NOF16C-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
681; BWON-NOF16C-NEXT:    callq __truncdfhf2
682; BWON-NOF16C-NEXT:    movl %eax, %r15d
683; BWON-NOF16C-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
684; BWON-NOF16C-NEXT:    callq __truncdfhf2
685; BWON-NOF16C-NEXT:    movl %eax, %ebp
686; BWON-NOF16C-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
687; BWON-NOF16C-NEXT:    callq __truncdfhf2
688; BWON-NOF16C-NEXT:    movw %ax, 4(%rbx)
689; BWON-NOF16C-NEXT:    movw %bp, (%rbx)
690; BWON-NOF16C-NEXT:    movw %r15w, 6(%rbx)
691; BWON-NOF16C-NEXT:    movw %r14w, 2(%rbx)
692; BWON-NOF16C-NEXT:    addq $40, %rsp
693; BWON-NOF16C-NEXT:    popq %rbx
694; BWON-NOF16C-NEXT:    popq %r14
695; BWON-NOF16C-NEXT:    popq %r15
696; BWON-NOF16C-NEXT:    popq %rbp
697; BWON-NOF16C-NEXT:    retq
698;
699; BWOFF-LABEL: test_trunc64_vec4:
700; BWOFF:       # %bb.0:
701; BWOFF-NEXT:    pushq %rbp
702; BWOFF-NEXT:    pushq %r15
703; BWOFF-NEXT:    pushq %r14
704; BWOFF-NEXT:    pushq %rbx
705; BWOFF-NEXT:    subq $40, %rsp
706; BWOFF-NEXT:    movq %rdi, %rbx
707; BWOFF-NEXT:    movaps %xmm1, (%rsp) # 16-byte Spill
708; BWOFF-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
709; BWOFF-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
710; BWOFF-NEXT:    callq __truncdfhf2
711; BWOFF-NEXT:    movw %ax, %r14w
712; BWOFF-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
713; BWOFF-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
714; BWOFF-NEXT:    callq __truncdfhf2
715; BWOFF-NEXT:    movw %ax, %r15w
716; BWOFF-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
717; BWOFF-NEXT:    callq __truncdfhf2
718; BWOFF-NEXT:    movw %ax, %bp
719; BWOFF-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
720; BWOFF-NEXT:    callq __truncdfhf2
721; BWOFF-NEXT:    movw %ax, 4(%rbx)
722; BWOFF-NEXT:    movw %bp, (%rbx)
723; BWOFF-NEXT:    movw %r15w, 6(%rbx)
724; BWOFF-NEXT:    movw %r14w, 2(%rbx)
725; BWOFF-NEXT:    addq $40, %rsp
726; BWOFF-NEXT:    popq %rbx
727; BWOFF-NEXT:    popq %r14
728; BWOFF-NEXT:    popq %r15
729; BWOFF-NEXT:    popq %rbp
730; BWOFF-NEXT:    retq
731;
732; BWON-F16C-LABEL: test_trunc64_vec4:
733; BWON-F16C:       # %bb.0:
734; BWON-F16C-NEXT:    pushq %rbp
735; BWON-F16C-NEXT:    pushq %r15
736; BWON-F16C-NEXT:    pushq %r14
737; BWON-F16C-NEXT:    pushq %rbx
738; BWON-F16C-NEXT:    subq $88, %rsp
739; BWON-F16C-NEXT:    movq %rdi, %rbx
740; BWON-F16C-NEXT:    vmovupd %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
741; BWON-F16C-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
742; BWON-F16C-NEXT:    vzeroupper
743; BWON-F16C-NEXT:    callq __truncdfhf2
744; BWON-F16C-NEXT:    movl %eax, %r14d
745; BWON-F16C-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
746; BWON-F16C-NEXT:    vextractf128 $1, %ymm0, %xmm0
747; BWON-F16C-NEXT:    vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
748; BWON-F16C-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
749; BWON-F16C-NEXT:    vzeroupper
750; BWON-F16C-NEXT:    callq __truncdfhf2
751; BWON-F16C-NEXT:    movl %eax, %r15d
752; BWON-F16C-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
753; BWON-F16C-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
754; BWON-F16C-NEXT:    vzeroupper
755; BWON-F16C-NEXT:    callq __truncdfhf2
756; BWON-F16C-NEXT:    movl %eax, %ebp
757; BWON-F16C-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
758; BWON-F16C-NEXT:    callq __truncdfhf2
759; BWON-F16C-NEXT:    movw %ax, 4(%rbx)
760; BWON-F16C-NEXT:    movw %bp, (%rbx)
761; BWON-F16C-NEXT:    movw %r15w, 6(%rbx)
762; BWON-F16C-NEXT:    movw %r14w, 2(%rbx)
763; BWON-F16C-NEXT:    addq $88, %rsp
764; BWON-F16C-NEXT:    popq %rbx
765; BWON-F16C-NEXT:    popq %r14
766; BWON-F16C-NEXT:    popq %r15
767; BWON-F16C-NEXT:    popq %rbp
768; BWON-F16C-NEXT:    retq
769;
770; CHECK-I686-LABEL: test_trunc64_vec4:
771; CHECK-I686:       # %bb.0:
772; CHECK-I686-NEXT:    pushl %ebp
773; CHECK-I686-NEXT:    pushl %ebx
774; CHECK-I686-NEXT:    pushl %edi
775; CHECK-I686-NEXT:    pushl %esi
776; CHECK-I686-NEXT:    subl $60, %esp
777; CHECK-I686-NEXT:    movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
778; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
779; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
780; CHECK-I686-NEXT:    movlps %xmm0, (%esp)
781; CHECK-I686-NEXT:    calll __truncdfhf2
782; CHECK-I686-NEXT:    movw %ax, %si
783; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
784; CHECK-I686-NEXT:    movhps %xmm0, (%esp)
785; CHECK-I686-NEXT:    calll __truncdfhf2
786; CHECK-I686-NEXT:    movw %ax, %di
787; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
788; CHECK-I686-NEXT:    movlps %xmm0, (%esp)
789; CHECK-I686-NEXT:    calll __truncdfhf2
790; CHECK-I686-NEXT:    movw %ax, %bx
791; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
792; CHECK-I686-NEXT:    movhps %xmm0, (%esp)
793; CHECK-I686-NEXT:    calll __truncdfhf2
794; CHECK-I686-NEXT:    movw %ax, 6(%ebp)
795; CHECK-I686-NEXT:    movw %bx, 4(%ebp)
796; CHECK-I686-NEXT:    movw %di, 2(%ebp)
797; CHECK-I686-NEXT:    movw %si, (%ebp)
798; CHECK-I686-NEXT:    addl $60, %esp
799; CHECK-I686-NEXT:    popl %esi
800; CHECK-I686-NEXT:    popl %edi
801; CHECK-I686-NEXT:    popl %ebx
802; CHECK-I686-NEXT:    popl %ebp
803; CHECK-I686-NEXT:    retl
804  %v = fptrunc <4 x double> %a to <4 x half>
805  store <4 x half> %v, <4 x half>* %p
806  ret void
807}
808
809declare float @test_floatret();
810
811; On i686, if SSE2 is available, the return value from test_floatret is loaded
812; to f80 and then rounded to f32.  The DAG combiner should not combine this
813; fp_round and the subsequent fptrunc from float to half.
814define half @test_f80trunc_nodagcombine() #0 {
815; CHECK-LIBCALL-LABEL: test_f80trunc_nodagcombine:
816; CHECK-LIBCALL:       # %bb.0:
817; CHECK-LIBCALL-NEXT:    pushq %rax
818; CHECK-LIBCALL-NEXT:    callq test_floatret
819; CHECK-LIBCALL-NEXT:    callq __gnu_f2h_ieee
820; CHECK-LIBCALL-NEXT:    popq %rcx
821; CHECK-LIBCALL-NEXT:    retq
822;
823; BWON-F16C-LABEL: test_f80trunc_nodagcombine:
824; BWON-F16C:       # %bb.0:
825; BWON-F16C-NEXT:    pushq %rax
826; BWON-F16C-NEXT:    callq test_floatret
827; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
828; BWON-F16C-NEXT:    vmovd %xmm0, %eax
829; BWON-F16C-NEXT:    # kill: def $ax killed $ax killed $eax
830; BWON-F16C-NEXT:    popq %rcx
831; BWON-F16C-NEXT:    retq
832;
833; CHECK-I686-LABEL: test_f80trunc_nodagcombine:
834; CHECK-I686:       # %bb.0:
835; CHECK-I686-NEXT:    subl $12, %esp
836; CHECK-I686-NEXT:    calll test_floatret
837; CHECK-I686-NEXT:    fstps (%esp)
838; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
839; CHECK-I686-NEXT:    addl $12, %esp
840; CHECK-I686-NEXT:    retl
841  %1 = call float @test_floatret()
842  %2 = fptrunc float %1 to half
843  ret half %2
844}
845
846
847
848
849define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 {
850; CHECK-LIBCALL-LABEL: test_sitofp_fadd_i32:
851; CHECK-LIBCALL:       # %bb.0:
852; CHECK-LIBCALL-NEXT:    pushq %rbx
853; CHECK-LIBCALL-NEXT:    subq $16, %rsp
854; CHECK-LIBCALL-NEXT:    movzwl (%rsi), %ebx
855; CHECK-LIBCALL-NEXT:    cvtsi2ss %edi, %xmm0
856; CHECK-LIBCALL-NEXT:    callq __gnu_f2h_ieee
857; CHECK-LIBCALL-NEXT:    movzwl %ax, %edi
858; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
859; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
860; CHECK-LIBCALL-NEXT:    movl %ebx, %edi
861; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
862; CHECK-LIBCALL-NEXT:    addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
863; CHECK-LIBCALL-NEXT:    callq __gnu_f2h_ieee
864; CHECK-LIBCALL-NEXT:    movzwl %ax, %edi
865; CHECK-LIBCALL-NEXT:    addq $16, %rsp
866; CHECK-LIBCALL-NEXT:    popq %rbx
867; CHECK-LIBCALL-NEXT:    jmp __gnu_h2f_ieee@PLT # TAILCALL
868;
869; BWON-F16C-LABEL: test_sitofp_fadd_i32:
870; BWON-F16C:       # %bb.0:
871; BWON-F16C-NEXT:    movzwl (%rsi), %eax
872; BWON-F16C-NEXT:    vcvtsi2ss %edi, %xmm0, %xmm0
873; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
874; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
875; BWON-F16C-NEXT:    vmovd %eax, %xmm1
876; BWON-F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
877; BWON-F16C-NEXT:    vaddss %xmm0, %xmm1, %xmm0
878; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
879; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
880; BWON-F16C-NEXT:    retq
881;
882; CHECK-I686-LABEL: test_sitofp_fadd_i32:
883; CHECK-I686:       # %bb.0:
884; CHECK-I686-NEXT:    pushl %edi
885; CHECK-I686-NEXT:    pushl %esi
886; CHECK-I686-NEXT:    subl $20, %esp
887; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
888; CHECK-I686-NEXT:    movzwl (%eax), %edi
889; CHECK-I686-NEXT:    cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
890; CHECK-I686-NEXT:    movss %xmm0, (%esp)
891; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
892; CHECK-I686-NEXT:    movw %ax, %si
893; CHECK-I686-NEXT:    movl %edi, (%esp)
894; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
895; CHECK-I686-NEXT:    movzwl %si, %eax
896; CHECK-I686-NEXT:    movl %eax, (%esp)
897; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
898; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
899; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
900; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
901; CHECK-I686-NEXT:    addss {{[0-9]+}}(%esp), %xmm0
902; CHECK-I686-NEXT:    movss %xmm0, (%esp)
903; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
904; CHECK-I686-NEXT:    movzwl %ax, %eax
905; CHECK-I686-NEXT:    movl %eax, (%esp)
906; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
907; CHECK-I686-NEXT:    addl $20, %esp
908; CHECK-I686-NEXT:    popl %esi
909; CHECK-I686-NEXT:    popl %edi
910; CHECK-I686-NEXT:    retl
911  %tmp0 = load half, half* %b
912  %tmp1 = sitofp i32 %a to half
913  %tmp2 = fadd half %tmp0, %tmp1
914  %tmp3 = fpext half %tmp2 to float
915  ret float %tmp3
916}
917
918define half @PR40273(half) #0 {
919; CHECK-LIBCALL-LABEL: PR40273:
920; CHECK-LIBCALL:       # %bb.0:
921; CHECK-LIBCALL-NEXT:    pushq %rax
922; CHECK-LIBCALL-NEXT:    movzwl %di, %edi
923; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
924; CHECK-LIBCALL-NEXT:    xorl %eax, %eax
925; CHECK-LIBCALL-NEXT:    xorps %xmm1, %xmm1
926; CHECK-LIBCALL-NEXT:    ucomiss %xmm1, %xmm0
927; CHECK-LIBCALL-NEXT:    movl $15360, %ecx # imm = 0x3C00
928; CHECK-LIBCALL-NEXT:    cmovnel %ecx, %eax
929; CHECK-LIBCALL-NEXT:    cmovpl %ecx, %eax
930; CHECK-LIBCALL-NEXT:    # kill: def $ax killed $ax killed $eax
931; CHECK-LIBCALL-NEXT:    popq %rcx
932; CHECK-LIBCALL-NEXT:    retq
933;
934; BWON-F16C-LABEL: PR40273:
935; BWON-F16C:       # %bb.0:
936; BWON-F16C-NEXT:    movzwl %di, %eax
937; BWON-F16C-NEXT:    vmovd %eax, %xmm0
938; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
939; BWON-F16C-NEXT:    xorl %eax, %eax
940; BWON-F16C-NEXT:    vxorps %xmm1, %xmm1, %xmm1
941; BWON-F16C-NEXT:    vucomiss %xmm1, %xmm0
942; BWON-F16C-NEXT:    movl $15360, %ecx # imm = 0x3C00
943; BWON-F16C-NEXT:    cmovnel %ecx, %eax
944; BWON-F16C-NEXT:    cmovpl %ecx, %eax
945; BWON-F16C-NEXT:    # kill: def $ax killed $ax killed $eax
946; BWON-F16C-NEXT:    retq
947;
948; CHECK-I686-LABEL: PR40273:
949; CHECK-I686:       # %bb.0:
950; CHECK-I686-NEXT:    subl $12, %esp
951; CHECK-I686-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
952; CHECK-I686-NEXT:    movl %eax, (%esp)
953; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
954; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
955; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
956; CHECK-I686-NEXT:    xorl %eax, %eax
957; CHECK-I686-NEXT:    xorps %xmm1, %xmm1
958; CHECK-I686-NEXT:    ucomiss %xmm1, %xmm0
959; CHECK-I686-NEXT:    movl $15360, %ecx # imm = 0x3C00
960; CHECK-I686-NEXT:    cmovnel %ecx, %eax
961; CHECK-I686-NEXT:    cmovpl %ecx, %eax
962; CHECK-I686-NEXT:    # kill: def $ax killed $ax killed $eax
963; CHECK-I686-NEXT:    addl $12, %esp
964; CHECK-I686-NEXT:    retl
965  %2 = fcmp une half %0, 0xH0000
966  %3 = uitofp i1 %2 to half
967  ret half %3
968}
969
970attributes #0 = { nounwind }
971