1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s
3
4declare <4 x double> @llvm.sin.v4f64(<4 x double> %p)
5declare <4 x double> @llvm.cos.v4f64(<4 x double> %p)
6declare <4 x double> @llvm.pow.v4f64(<4 x double> %p, <4 x double> %q)
7declare <4 x double> @llvm.powi.v4f64(<4 x double> %p, i32)
8
9define <4 x double> @foo(<4 x double> %p)
10; CHECK-LABEL: foo:
11; CHECK:       # %bb.0:
12; CHECK-NEXT:    subq $56, %rsp
13; CHECK-NEXT:    .cfi_def_cfa_offset 64
14; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
15; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
16; CHECK-NEXT:    callq sin
17; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
18; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
19; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
20; CHECK-NEXT:    callq sin
21; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
22; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
23; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
24; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
25; CHECK-NEXT:    callq sin
26; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
27; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
28; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
29; CHECK-NEXT:    callq sin
30; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
31; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
32; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
33; CHECK-NEXT:    addq $56, %rsp
34; CHECK-NEXT:    .cfi_def_cfa_offset 8
35; CHECK-NEXT:    retq
36{
37  %t = call <4 x double> @llvm.sin.v4f64(<4 x double> %p)
38  ret <4 x double> %t
39}
40define <4 x double> @goo(<4 x double> %p)
41; CHECK-LABEL: goo:
42; CHECK:       # %bb.0:
43; CHECK-NEXT:    subq $56, %rsp
44; CHECK-NEXT:    .cfi_def_cfa_offset 64
45; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
46; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
47; CHECK-NEXT:    callq cos
48; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
49; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
50; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
51; CHECK-NEXT:    callq cos
52; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
53; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
54; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
55; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
56; CHECK-NEXT:    callq cos
57; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
58; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
59; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
60; CHECK-NEXT:    callq cos
61; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
62; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
63; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
64; CHECK-NEXT:    addq $56, %rsp
65; CHECK-NEXT:    .cfi_def_cfa_offset 8
66; CHECK-NEXT:    retq
67{
68  %t = call <4 x double> @llvm.cos.v4f64(<4 x double> %p)
69  ret <4 x double> %t
70}
71define <4 x double> @moo(<4 x double> %p, <4 x double> %q)
72; CHECK-LABEL: moo:
73; CHECK:       # %bb.0:
74; CHECK-NEXT:    subq $88, %rsp
75; CHECK-NEXT:    .cfi_def_cfa_offset 96
76; CHECK-NEXT:    movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
77; CHECK-NEXT:    movaps %xmm2, (%rsp) # 16-byte Spill
78; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
79; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
80; CHECK-NEXT:    movaps %xmm2, %xmm1
81; CHECK-NEXT:    callq pow
82; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
83; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
84; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
85; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
86; CHECK-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
87; CHECK-NEXT:    callq pow
88; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
89; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
90; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
91; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
92; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
93; CHECK-NEXT:    callq pow
94; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
95; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
96; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
97; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
98; CHECK-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
99; CHECK-NEXT:    callq pow
100; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
101; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
102; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
103; CHECK-NEXT:    addq $88, %rsp
104; CHECK-NEXT:    .cfi_def_cfa_offset 8
105; CHECK-NEXT:    retq
106{
107  %t = call <4 x double> @llvm.pow.v4f64(<4 x double> %p, <4 x double> %q)
108  ret <4 x double> %t
109}
110define <4 x double> @zoo(<4 x double> %p, i32 %q)
111; CHECK-LABEL: zoo:
112; CHECK:       # %bb.0:
113; CHECK-NEXT:    pushq %rbx
114; CHECK-NEXT:    .cfi_def_cfa_offset 16
115; CHECK-NEXT:    subq $48, %rsp
116; CHECK-NEXT:    .cfi_def_cfa_offset 64
117; CHECK-NEXT:    .cfi_offset %rbx, -16
118; CHECK-NEXT:    movl %edi, %ebx
119; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
120; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
121; CHECK-NEXT:    callq __powidf2
122; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
123; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
124; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
125; CHECK-NEXT:    movl %ebx, %edi
126; CHECK-NEXT:    callq __powidf2
127; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
128; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
129; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
130; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
131; CHECK-NEXT:    movl %ebx, %edi
132; CHECK-NEXT:    callq __powidf2
133; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
134; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
135; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
136; CHECK-NEXT:    movl %ebx, %edi
137; CHECK-NEXT:    callq __powidf2
138; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
139; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
140; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
141; CHECK-NEXT:    addq $48, %rsp
142; CHECK-NEXT:    .cfi_def_cfa_offset 16
143; CHECK-NEXT:    popq %rbx
144; CHECK-NEXT:    .cfi_def_cfa_offset 8
145; CHECK-NEXT:    retq
146{
147  %t = call <4 x double> @llvm.powi.v4f64(<4 x double> %p, i32 %q)
148  ret <4 x double> %t
149}
150
151
152declare <9 x double> @llvm.exp.v9f64(<9 x double> %a)
153declare <9 x double> @llvm.pow.v9f64(<9 x double> %a, <9 x double> %b)
154declare <9 x double> @llvm.powi.v9f64(<9 x double> %a, i32)
155
156define void @a(<9 x double>* %p) nounwind {
157; CHECK-LABEL: a:
158; CHECK:       # %bb.0:
159; CHECK-NEXT:    pushq %rbx
160; CHECK-NEXT:    subq $96, %rsp
161; CHECK-NEXT:    movq %rdi, %rbx
162; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
163; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
164; CHECK-NEXT:    movaps (%rdi), %xmm0
165; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
166; CHECK-NEXT:    movaps 16(%rdi), %xmm0
167; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
168; CHECK-NEXT:    movaps 32(%rdi), %xmm0
169; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
170; CHECK-NEXT:    movaps 48(%rdi), %xmm0
171; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
172; CHECK-NEXT:    callq exp
173; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
174; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
175; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
176; CHECK-NEXT:    callq exp
177; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
178; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
179; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
180; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
181; CHECK-NEXT:    callq exp
182; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
183; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
184; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
185; CHECK-NEXT:    callq exp
186; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
187; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
188; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
189; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
190; CHECK-NEXT:    callq exp
191; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
192; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
193; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
194; CHECK-NEXT:    callq exp
195; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
196; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
197; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
198; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
199; CHECK-NEXT:    callq exp
200; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
201; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
202; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
203; CHECK-NEXT:    callq exp
204; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
205; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
206; CHECK-NEXT:    movaps %xmm1, (%rsp) # 16-byte Spill
207; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
208; CHECK-NEXT:    # xmm0 = mem[0],zero
209; CHECK-NEXT:    callq exp
210; CHECK-NEXT:    movsd %xmm0, 64(%rbx)
211; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
212; CHECK-NEXT:    movaps %xmm0, (%rbx)
213; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
214; CHECK-NEXT:    movaps %xmm0, 16(%rbx)
215; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
216; CHECK-NEXT:    movaps %xmm0, 32(%rbx)
217; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
218; CHECK-NEXT:    movaps %xmm0, 48(%rbx)
219; CHECK-NEXT:    addq $96, %rsp
220; CHECK-NEXT:    popq %rbx
221; CHECK-NEXT:    retq
222  %a = load <9 x double>, <9 x double>* %p
223  %r = call <9 x double> @llvm.exp.v9f64(<9 x double> %a)
224  store <9 x double> %r, <9 x double>* %p
225  ret void
226}
227define void @b(<9 x double>* %p, <9 x double>* %q) nounwind {
228; CHECK-LABEL: b:
229; CHECK:       # %bb.0:
230; CHECK-NEXT:    pushq %rbx
231; CHECK-NEXT:    subq $160, %rsp
232; CHECK-NEXT:    movq %rdi, %rbx
233; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
234; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
235; CHECK-NEXT:    movaps (%rdi), %xmm0
236; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
237; CHECK-NEXT:    movaps 16(%rdi), %xmm0
238; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
239; CHECK-NEXT:    movaps 32(%rdi), %xmm0
240; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
241; CHECK-NEXT:    movaps 48(%rdi), %xmm2
242; CHECK-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
243; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
244; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
245; CHECK-NEXT:    movaps (%rsi), %xmm0
246; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
247; CHECK-NEXT:    movaps 16(%rsi), %xmm0
248; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
249; CHECK-NEXT:    movaps 32(%rsi), %xmm0
250; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
251; CHECK-NEXT:    movaps 48(%rsi), %xmm1
252; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
253; CHECK-NEXT:    movaps %xmm2, %xmm0
254; CHECK-NEXT:    callq pow
255; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
256; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
257; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
258; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
259; CHECK-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
260; CHECK-NEXT:    callq pow
261; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
262; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
263; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
264; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
265; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
266; CHECK-NEXT:    callq pow
267; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
268; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
269; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
270; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
271; CHECK-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
272; CHECK-NEXT:    callq pow
273; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
274; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
275; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
276; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
277; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
278; CHECK-NEXT:    callq pow
279; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
280; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
281; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
282; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
283; CHECK-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
284; CHECK-NEXT:    callq pow
285; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
286; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
287; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
288; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
289; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
290; CHECK-NEXT:    callq pow
291; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
292; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
293; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
294; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
295; CHECK-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
296; CHECK-NEXT:    callq pow
297; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
298; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
299; CHECK-NEXT:    movaps %xmm1, (%rsp) # 16-byte Spill
300; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
301; CHECK-NEXT:    # xmm0 = mem[0],zero
302; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
303; CHECK-NEXT:    # xmm1 = mem[0],zero
304; CHECK-NEXT:    callq pow
305; CHECK-NEXT:    movsd %xmm0, 64(%rbx)
306; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
307; CHECK-NEXT:    movaps %xmm0, (%rbx)
308; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
309; CHECK-NEXT:    movaps %xmm0, 16(%rbx)
310; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
311; CHECK-NEXT:    movaps %xmm0, 32(%rbx)
312; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
313; CHECK-NEXT:    movaps %xmm0, 48(%rbx)
314; CHECK-NEXT:    addq $160, %rsp
315; CHECK-NEXT:    popq %rbx
316; CHECK-NEXT:    retq
317  %a = load <9 x double>, <9 x double>* %p
318  %b = load <9 x double>, <9 x double>* %q
319  %r = call <9 x double> @llvm.pow.v9f64(<9 x double> %a, <9 x double> %b)
320  store <9 x double> %r, <9 x double>* %p
321  ret void
322}
323define void @c(<9 x double>* %p, i32 %n) nounwind {
324; CHECK-LABEL: c:
325; CHECK:       # %bb.0:
326; CHECK-NEXT:    pushq %rbp
327; CHECK-NEXT:    pushq %rbx
328; CHECK-NEXT:    subq $104, %rsp
329; CHECK-NEXT:    movl %esi, %ebp
330; CHECK-NEXT:    movq %rdi, %rbx
331; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
332; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
333; CHECK-NEXT:    movaps (%rdi), %xmm0
334; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
335; CHECK-NEXT:    movaps 16(%rdi), %xmm0
336; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
337; CHECK-NEXT:    movaps 32(%rdi), %xmm0
338; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
339; CHECK-NEXT:    movaps 48(%rdi), %xmm0
340; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
341; CHECK-NEXT:    movl %esi, %edi
342; CHECK-NEXT:    callq __powidf2
343; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
344; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
345; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
346; CHECK-NEXT:    movl %ebp, %edi
347; CHECK-NEXT:    callq __powidf2
348; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
349; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
350; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
351; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
352; CHECK-NEXT:    movl %ebp, %edi
353; CHECK-NEXT:    callq __powidf2
354; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
355; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
356; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
357; CHECK-NEXT:    movl %ebp, %edi
358; CHECK-NEXT:    callq __powidf2
359; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
360; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
361; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
362; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
363; CHECK-NEXT:    movl %ebp, %edi
364; CHECK-NEXT:    callq __powidf2
365; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
366; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
367; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
368; CHECK-NEXT:    movl %ebp, %edi
369; CHECK-NEXT:    callq __powidf2
370; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
371; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
372; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
373; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
374; CHECK-NEXT:    movl %ebp, %edi
375; CHECK-NEXT:    callq __powidf2
376; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
377; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
378; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
379; CHECK-NEXT:    movl %ebp, %edi
380; CHECK-NEXT:    callq __powidf2
381; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
382; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
383; CHECK-NEXT:    movaps %xmm1, (%rsp) # 16-byte Spill
384; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
385; CHECK-NEXT:    # xmm0 = mem[0],zero
386; CHECK-NEXT:    movl %ebp, %edi
387; CHECK-NEXT:    callq __powidf2
388; CHECK-NEXT:    movsd %xmm0, 64(%rbx)
389; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
390; CHECK-NEXT:    movaps %xmm0, (%rbx)
391; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
392; CHECK-NEXT:    movaps %xmm0, 16(%rbx)
393; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
394; CHECK-NEXT:    movaps %xmm0, 32(%rbx)
395; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
396; CHECK-NEXT:    movaps %xmm0, 48(%rbx)
397; CHECK-NEXT:    addq $104, %rsp
398; CHECK-NEXT:    popq %rbx
399; CHECK-NEXT:    popq %rbp
400; CHECK-NEXT:    retq
401  %a = load <9 x double>, <9 x double>* %p
402  %r = call <9 x double> @llvm.powi.v9f64(<9 x double> %a, i32 %n)
403  store <9 x double> %r, <9 x double>* %p
404  ret void
405}
406