1; RUN: opt < %s  -cost-model -analyze | FileCheck %s
2target datalayout = "E-m:e-i64:64-n32:64"
3target triple = "powerpc64-unknown-linux-gnu"
4
5define <16 x i8> @test_l_v16i8(<16 x i8>* %p) #0 {
6entry:
7  %r = load <16 x i8>, <16 x i8>* %p, align 1
8  ret <16 x i8> %r
9
10; CHECK-LABEL: test_l_v16i8
11; CHECK: cost of 2 for instruction:   %r = load <16 x i8>, <16 x i8>* %p, align 1
12}
13
14define <32 x i8> @test_l_v32i8(<32 x i8>* %p) #0 {
15entry:
16  %r = load <32 x i8>, <32 x i8>* %p, align 1
17  ret <32 x i8> %r
18
19; CHECK-LABEL: test_l_v32i8
20; CHECK: cost of 4 for instruction:   %r = load <32 x i8>, <32 x i8>* %p, align 1
21}
22
23define <8 x i16> @test_l_v8i16(<8 x i16>* %p) #0 {
24entry:
25  %r = load <8 x i16>, <8 x i16>* %p, align 2
26  ret <8 x i16> %r
27
28; CHECK-LABEL: test_l_v8i16
29; CHECK: cost of 2 for instruction:   %r = load <8 x i16>, <8 x i16>* %p, align 2
30}
31
32define <16 x i16> @test_l_v16i16(<16 x i16>* %p) #0 {
33entry:
34  %r = load <16 x i16>, <16 x i16>* %p, align 2
35  ret <16 x i16> %r
36
37; CHECK-LABEL: test_l_v16i16
38; CHECK: cost of 4 for instruction:   %r = load <16 x i16>, <16 x i16>* %p, align 2
39}
40
41define <4 x i32> @test_l_v4i32(<4 x i32>* %p) #0 {
42entry:
43  %r = load <4 x i32>, <4 x i32>* %p, align 4
44  ret <4 x i32> %r
45
46; CHECK-LABEL: test_l_v4i32
47; CHECK: cost of 2 for instruction:   %r = load <4 x i32>, <4 x i32>* %p, align 4
48}
49
50define <8 x i32> @test_l_v8i32(<8 x i32>* %p) #0 {
51entry:
52  %r = load <8 x i32>, <8 x i32>* %p, align 4
53  ret <8 x i32> %r
54
55; CHECK-LABEL: test_l_v8i32
56; CHECK: cost of 4 for instruction:   %r = load <8 x i32>, <8 x i32>* %p, align 4
57}
58
59define <2 x i64> @test_l_v2i64(<2 x i64>* %p) #0 {
60entry:
61  %r = load <2 x i64>, <2 x i64>* %p, align 8
62  ret <2 x i64> %r
63
64; CHECK-LABEL: test_l_v2i64
65; CHECK: cost of 1 for instruction:   %r = load <2 x i64>, <2 x i64>* %p, align 8
66}
67
68define <4 x i64> @test_l_v4i64(<4 x i64>* %p) #0 {
69entry:
70  %r = load <4 x i64>, <4 x i64>* %p, align 8
71  ret <4 x i64> %r
72
73; CHECK-LABEL: test_l_v4i64
74; CHECK: cost of 2 for instruction:   %r = load <4 x i64>, <4 x i64>* %p, align 8
75}
76
77define <4 x float> @test_l_v4float(<4 x float>* %p) #0 {
78entry:
79  %r = load <4 x float>, <4 x float>* %p, align 4
80  ret <4 x float> %r
81
82; CHECK-LABEL: test_l_v4float
83; CHECK: cost of 2 for instruction:   %r = load <4 x float>, <4 x float>* %p, align 4
84}
85
86define <8 x float> @test_l_v8float(<8 x float>* %p) #0 {
87entry:
88  %r = load <8 x float>, <8 x float>* %p, align 4
89  ret <8 x float> %r
90
91; CHECK-LABEL: test_l_v8float
92; CHECK: cost of 4 for instruction:   %r = load <8 x float>, <8 x float>* %p, align 4
93}
94
95define <2 x double> @test_l_v2double(<2 x double>* %p) #0 {
96entry:
97  %r = load <2 x double>, <2 x double>* %p, align 8
98  ret <2 x double> %r
99
100; CHECK-LABEL: test_l_v2double
101; CHECK: cost of 1 for instruction:   %r = load <2 x double>, <2 x double>* %p, align 8
102}
103
104define <4 x double> @test_l_v4double(<4 x double>* %p) #0 {
105entry:
106  %r = load <4 x double>, <4 x double>* %p, align 8
107  ret <4 x double> %r
108
109; CHECK-LABEL: test_l_v4double
110; CHECK: cost of 2 for instruction:   %r = load <4 x double>, <4 x double>* %p, align 8
111}
112
113define <16 x i8> @test_l_p8v16i8(<16 x i8>* %p) #2 {
114entry:
115  %r = load <16 x i8>, <16 x i8>* %p, align 1
116  ret <16 x i8> %r
117
118; CHECK-LABEL: test_l_p8v16i8
119; CHECK: cost of 1 for instruction:   %r = load <16 x i8>, <16 x i8>* %p, align 1
120}
121
122define <32 x i8> @test_l_p8v32i8(<32 x i8>* %p) #2 {
123entry:
124  %r = load <32 x i8>, <32 x i8>* %p, align 1
125  ret <32 x i8> %r
126
127; CHECK-LABEL: test_l_p8v32i8
128; CHECK: cost of 2 for instruction:   %r = load <32 x i8>, <32 x i8>* %p, align 1
129}
130
131define <8 x i16> @test_l_p8v8i16(<8 x i16>* %p) #2 {
132entry:
133  %r = load <8 x i16>, <8 x i16>* %p, align 2
134  ret <8 x i16> %r
135
136; CHECK-LABEL: test_l_p8v8i16
137; CHECK: cost of 1 for instruction:   %r = load <8 x i16>, <8 x i16>* %p, align 2
138}
139
140define <16 x i16> @test_l_p8v16i16(<16 x i16>* %p) #2 {
141entry:
142  %r = load <16 x i16>, <16 x i16>* %p, align 2
143  ret <16 x i16> %r
144
145; CHECK-LABEL: test_l_p8v16i16
146; CHECK: cost of 2 for instruction:   %r = load <16 x i16>, <16 x i16>* %p, align 2
147}
148
149define <4 x i32> @test_l_p8v4i32(<4 x i32>* %p) #2 {
150entry:
151  %r = load <4 x i32>, <4 x i32>* %p, align 4
152  ret <4 x i32> %r
153
154; CHECK-LABEL: test_l_p8v4i32
155; CHECK: cost of 1 for instruction:   %r = load <4 x i32>, <4 x i32>* %p, align 4
156}
157
158define <8 x i32> @test_l_p8v8i32(<8 x i32>* %p) #2 {
159entry:
160  %r = load <8 x i32>, <8 x i32>* %p, align 4
161  ret <8 x i32> %r
162
163; CHECK-LABEL: test_l_p8v8i32
164; CHECK: cost of 2 for instruction:   %r = load <8 x i32>, <8 x i32>* %p, align 4
165}
166
167define <2 x i64> @test_l_p8v2i64(<2 x i64>* %p) #2 {
168entry:
169  %r = load <2 x i64>, <2 x i64>* %p, align 8
170  ret <2 x i64> %r
171
172; CHECK-LABEL: test_l_p8v2i64
173; CHECK: cost of 1 for instruction:   %r = load <2 x i64>, <2 x i64>* %p, align 8
174}
175
176define <4 x i64> @test_l_p8v4i64(<4 x i64>* %p) #2 {
177entry:
178  %r = load <4 x i64>, <4 x i64>* %p, align 8
179  ret <4 x i64> %r
180
181; CHECK-LABEL: test_l_p8v4i64
182; CHECK: cost of 2 for instruction:   %r = load <4 x i64>, <4 x i64>* %p, align 8
183}
184
185define <4 x float> @test_l_p8v4float(<4 x float>* %p) #2 {
186entry:
187  %r = load <4 x float>, <4 x float>* %p, align 4
188  ret <4 x float> %r
189
190; CHECK-LABEL: test_l_p8v4float
191; CHECK: cost of 1 for instruction:   %r = load <4 x float>, <4 x float>* %p, align 4
192}
193
194define <8 x float> @test_l_p8v8float(<8 x float>* %p) #2 {
195entry:
196  %r = load <8 x float>, <8 x float>* %p, align 4
197  ret <8 x float> %r
198
199; CHECK-LABEL: test_l_p8v8float
200; CHECK: cost of 2 for instruction:   %r = load <8 x float>, <8 x float>* %p, align 4
201}
202
203define <2 x double> @test_l_p8v2double(<2 x double>* %p) #2 {
204entry:
205  %r = load <2 x double>, <2 x double>* %p, align 8
206  ret <2 x double> %r
207
208; CHECK-LABEL: test_l_p8v2double
209; CHECK: cost of 1 for instruction:   %r = load <2 x double>, <2 x double>* %p, align 8
210}
211
212define <4 x double> @test_l_p8v4double(<4 x double>* %p) #2 {
213entry:
214  %r = load <4 x double>, <4 x double>* %p, align 8
215  ret <4 x double> %r
216
217; CHECK-LABEL: test_l_p8v4double
218; CHECK: cost of 2 for instruction:   %r = load <4 x double>, <4 x double>* %p, align 8
219}
220
221define <4 x float> @test_l_qv4float(<4 x float>* %p) #1 {
222entry:
223  %r = load <4 x float>, <4 x float>* %p, align 4
224  ret <4 x float> %r
225
226; CHECK-LABEL: test_l_qv4float
227; CHECK: cost of 2 for instruction:   %r = load <4 x float>, <4 x float>* %p, align 4
228}
229
230define <8 x float> @test_l_qv8float(<8 x float>* %p) #1 {
231entry:
232  %r = load <8 x float>, <8 x float>* %p, align 4
233  ret <8 x float> %r
234
235; CHECK-LABEL: test_l_qv8float
236; CHECK: cost of 4 for instruction:   %r = load <8 x float>, <8 x float>* %p, align 4
237}
238
239define <4 x double> @test_l_qv4double(<4 x double>* %p) #1 {
240entry:
241  %r = load <4 x double>, <4 x double>* %p, align 8
242  ret <4 x double> %r
243
244; CHECK-LABEL: test_l_qv4double
245; CHECK: cost of 2 for instruction:   %r = load <4 x double>, <4 x double>* %p, align 8
246}
247
248define <8 x double> @test_l_qv8double(<8 x double>* %p) #1 {
249entry:
250  %r = load <8 x double>, <8 x double>* %p, align 8
251  ret <8 x double> %r
252
253; CHECK-LABEL: test_l_qv8double
254; CHECK: cost of 4 for instruction:   %r = load <8 x double>, <8 x double>* %p, align 8
255}
256
257define void @test_s_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 {
258entry:
259  store <16 x i8> %v, <16 x i8>* %p, align 1
260  ret void
261
262; CHECK-LABEL: test_s_v16i8
263; CHECK: cost of 1 for instruction:   store <16 x i8> %v, <16 x i8>* %p, align 1
264}
265
266define void @test_s_v32i8(<32 x i8>* %p, <32 x i8> %v) #0 {
267entry:
268  store <32 x i8> %v, <32 x i8>* %p, align 1
269  ret void
270
271; CHECK-LABEL: test_s_v32i8
272; CHECK: cost of 2 for instruction:   store <32 x i8> %v, <32 x i8>* %p, align 1
273}
274
275define void @test_s_v8i16(<8 x i16>* %p, <8 x i16> %v) #0 {
276entry:
277  store <8 x i16> %v, <8 x i16>* %p, align 2
278  ret void
279
280; CHECK-LABEL: test_s_v8i16
281; CHECK: cost of 1 for instruction:   store <8 x i16> %v, <8 x i16>* %p, align 2
282}
283
284define void @test_s_v16i16(<16 x i16>* %p, <16 x i16> %v) #0 {
285entry:
286  store <16 x i16> %v, <16 x i16>* %p, align 2
287  ret void
288
289; CHECK-LABEL: test_s_v16i16
290; CHECK: cost of 2 for instruction:   store <16 x i16> %v, <16 x i16>* %p, align 2
291}
292
293define void @test_s_v4i32(<4 x i32>* %p, <4 x i32> %v) #0 {
294entry:
295  store <4 x i32> %v, <4 x i32>* %p, align 4
296  ret void
297
298; CHECK-LABEL: test_s_v4i32
299; CHECK: cost of 1 for instruction:   store <4 x i32> %v, <4 x i32>* %p, align 4
300}
301
302define void @test_s_v8i32(<8 x i32>* %p, <8 x i32> %v) #0 {
303entry:
304  store <8 x i32> %v, <8 x i32>* %p, align 4
305  ret void
306
307; CHECK-LABEL: test_s_v8i32
308; CHECK: cost of 2 for instruction:   store <8 x i32> %v, <8 x i32>* %p, align 4
309}
310
311define void @test_s_v2i64(<2 x i64>* %p, <2 x i64> %v) #0 {
312entry:
313  store <2 x i64> %v, <2 x i64>* %p, align 8
314  ret void
315
316; CHECK-LABEL: test_s_v2i64
317; CHECK: cost of 1 for instruction:   store <2 x i64> %v, <2 x i64>* %p, align 8
318}
319
320define void @test_s_v4i64(<4 x i64>* %p, <4 x i64> %v) #0 {
321entry:
322  store <4 x i64> %v, <4 x i64>* %p, align 8
323  ret void
324
325; CHECK-LABEL: test_s_v4i64
326; CHECK: cost of 2 for instruction:   store <4 x i64> %v, <4 x i64>* %p, align 8
327}
328
329define void @test_s_v4float(<4 x float>* %p, <4 x float> %v) #0 {
330entry:
331  store <4 x float> %v, <4 x float>* %p, align 4
332  ret void
333
334; CHECK-LABEL: test_s_v4float
335; CHECK: cost of 1 for instruction:   store <4 x float> %v, <4 x float>* %p, align 4
336}
337
338define void @test_s_v8float(<8 x float>* %p, <8 x float> %v) #0 {
339entry:
340  store <8 x float> %v, <8 x float>* %p, align 4
341  ret void
342
343; CHECK-LABEL: test_s_v8float
344; CHECK: cost of 2 for instruction:   store <8 x float> %v, <8 x float>* %p, align 4
345}
346
347define void @test_s_v2double(<2 x double>* %p, <2 x double> %v) #0 {
348entry:
349  store <2 x double> %v, <2 x double>* %p, align 8
350  ret void
351
352; CHECK-LABEL: test_s_v2double
353; CHECK: cost of 1 for instruction:   store <2 x double> %v, <2 x double>* %p, align 8
354}
355
356define void @test_s_v4double(<4 x double>* %p, <4 x double> %v) #0 {
357entry:
358  store <4 x double> %v, <4 x double>* %p, align 8
359  ret void
360
361; CHECK-LABEL: test_s_v4double
362; CHECK: cost of 2 for instruction:   store <4 x double> %v, <4 x double>* %p, align 8
363}
364
365define void @test_s_qv4float(<4 x float>* %p, <4 x float> %v) #1 {
366entry:
367  store <4 x float> %v, <4 x float>* %p, align 4
368  ret void
369
370; CHECK-LABEL: test_s_qv4float
371; CHECK: cost of 7 for instruction:   store <4 x float> %v, <4 x float>* %p, align 4
372}
373
374define void @test_s_qv8float(<8 x float>* %p, <8 x float> %v) #1 {
375entry:
376  store <8 x float> %v, <8 x float>* %p, align 4
377  ret void
378
379; CHECK-LABEL: test_s_qv8float
380; CHECK: cost of 15 for instruction:   store <8 x float> %v, <8 x float>* %p, align 4
381}
382
383define void @test_s_qv4double(<4 x double>* %p, <4 x double> %v) #1 {
384entry:
385  store <4 x double> %v, <4 x double>* %p, align 8
386  ret void
387
388; CHECK-LABEL: test_s_qv4double
389; CHECK: cost of 7 for instruction:   store <4 x double> %v, <4 x double>* %p, align 8
390}
391
392define void @test_s_qv8double(<8 x double>* %p, <8 x double> %v) #1 {
393entry:
394  store <8 x double> %v, <8 x double>* %p, align 8
395  ret void
396
397; CHECK-LABEL: test_s_qv8double
398; CHECK: cost of 15 for instruction:   store <8 x double> %v, <8 x double>* %p, align 8
399}
400
401attributes #0 = { nounwind "target-cpu"="pwr7" }
402attributes #1 = { nounwind "target-cpu"="a2q" }
403attributes #2 = { nounwind "target-cpu"="pwr8" }
404
405