1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE-32
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE-64
4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX-32
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX-64
6
7define void @PR15298(<4 x float>* nocapture %source, <8 x float>* nocapture %dest) nounwind noinline {
8; SSE-32-LABEL: PR15298:
9; SSE-32:       # %bb.0: # %L.entry
10; SSE-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
11; SSE-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
12; SSE-32-NEXT:    xorps %xmm0, %xmm0
13; SSE-32-NEXT:    xorps %xmm1, %xmm1
14; SSE-32-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],mem[0,0]
15; SSE-32-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2,3,1]
16; SSE-32-NEXT:    movups %xmm0, 624(%eax)
17; SSE-32-NEXT:    movups %xmm1, 608(%eax)
18; SSE-32-NEXT:    retl
19;
20; SSE-64-LABEL: PR15298:
21; SSE-64:       # %bb.0: # %L.entry
22; SSE-64-NEXT:    xorps %xmm0, %xmm0
23; SSE-64-NEXT:    xorps %xmm1, %xmm1
24; SSE-64-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],mem[0,0]
25; SSE-64-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2,3,1]
26; SSE-64-NEXT:    movups %xmm0, 624(%rsi)
27; SSE-64-NEXT:    movups %xmm1, 608(%rsi)
28; SSE-64-NEXT:    retq
29;
30; AVX-32-LABEL: PR15298:
31; AVX-32:       # %bb.0: # %L.entry
32; AVX-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
33; AVX-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
34; AVX-32-NEXT:    vbroadcastss 304(%ecx), %xmm0
35; AVX-32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
36; AVX-32-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6,7]
37; AVX-32-NEXT:    vmovups %ymm0, 608(%eax)
38; AVX-32-NEXT:    vzeroupper
39; AVX-32-NEXT:    retl
40;
41; AVX-64-LABEL: PR15298:
42; AVX-64:       # %bb.0: # %L.entry
43; AVX-64-NEXT:    vbroadcastss 304(%rdi), %xmm0
44; AVX-64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
45; AVX-64-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6,7]
46; AVX-64-NEXT:    vmovups %ymm0, 608(%rsi)
47; AVX-64-NEXT:    vzeroupper
48; AVX-64-NEXT:    retq
49L.entry:
50  %0 = getelementptr inbounds <4 x float>, <4 x float>* %source, i32 19
51  %1 = load <4 x float>, <4 x float>* %0, align 16
52  %2 = extractelement <4 x float> %1, i32 0
53  %3 = insertelement <8 x float> <float 0.000000e+00, float undef, float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %2, i32 2
54  %4 = insertelement <8 x float> %3, float %2, i32 1
55  %5 = getelementptr <8 x float>, <8 x float>* %dest, i32 19
56  store <8 x float> %4, <8 x float>* %5, align 4
57  ret void
58}
59