1; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s 2; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck %s --check-prefix=SSE3 3; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=AVX2 4 5target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 6target triple = "x86_64-apple-macosx10.8.0" 7 8define i32 @add(i32 %arg) { 9 ;CHECK: cost of 1 {{.*}} add 10 %A = add <4 x i32> undef, undef 11 ;CHECK: cost of 4 {{.*}} add 12 %B = add <8 x i32> undef, undef 13 ;CHECK: cost of 1 {{.*}} add 14 %C = add <2 x i64> undef, undef 15 ;CHECK: cost of 4 {{.*}} add 16 %D = add <4 x i64> undef, undef 17 ;CHECK: cost of 8 {{.*}} add 18 %E = add <8 x i64> undef, undef 19 ;CHECK: cost of 0 {{.*}} ret 20 ret i32 undef 21} 22 23 24define i32 @xor(i32 %arg) { 25 ;CHECK: cost of 1 {{.*}} xor 26 %A = xor <4 x i32> undef, undef 27 ;CHECK: cost of 1 {{.*}} xor 28 %B = xor <8 x i32> undef, undef 29 ;CHECK: cost of 1 {{.*}} xor 30 %C = xor <2 x i64> undef, undef 31 ;CHECK: cost of 1 {{.*}} xor 32 %D = xor <4 x i64> undef, undef 33 ;CHECK: cost of 0 {{.*}} ret 34 ret i32 undef 35} 36 37; CHECK: mul 38define void @mul() { 39 ; A <2 x i32> gets expanded to a <2 x i64> vector. 40 ; A <2 x i64> vector multiply is implemented using 41 ; 3 PMULUDQ and 2 PADDS and 4 shifts. 42 ;CHECK: cost of 9 {{.*}} mul 43 %A0 = mul <2 x i32> undef, undef 44 ;CHECK: cost of 9 {{.*}} mul 45 %A1 = mul <2 x i64> undef, undef 46 ;CHECK: cost of 18 {{.*}} mul 47 %A2 = mul <4 x i64> undef, undef 48 ret void 49} 50 51; SSE3: sse3mull 52define void @sse3mull() { 53 ; SSE3: cost of 6 {{.*}} mul 54 %A0 = mul <4 x i32> undef, undef 55 ret void 56 ; SSE3: avx2mull 57} 58 59; AVX2: avx2mull 60define void @avx2mull() { 61 ; AVX2: cost of 9 {{.*}} mul 62 %A0 = mul <4 x i64> undef, undef 63 ret void 64 ; AVX2: fmul 65} 66 67; CHECK: fmul 68define i32 @fmul(i32 %arg) { 69 ;CHECK: cost of 2 {{.*}} fmul 70 %A = fmul <4 x float> undef, undef 71 ;CHECK: cost of 2 {{.*}} fmul 72 %B = fmul <8 x float> undef, undef 73 ret i32 undef 74} 75 76; AVX: shift 77; AVX2: shift 78define void @shift() { 79 ; AVX: cost of 2 {{.*}} shl 80 ; AVX2: cost of 1 {{.*}} shl 81 %A0 = shl <4 x i32> undef, undef 82 ; AVX: cost of 2 {{.*}} shl 83 ; AVX2: cost of 1 {{.*}} shl 84 %A1 = shl <2 x i64> undef, undef 85 86 ; AVX: cost of 2 {{.*}} lshr 87 ; AVX2: cost of 1 {{.*}} lshr 88 %B0 = lshr <4 x i32> undef, undef 89 ; AVX: cost of 2 {{.*}} lshr 90 ; AVX2: cost of 1 {{.*}} lshr 91 %B1 = lshr <2 x i64> undef, undef 92 93 ; AVX: cost of 2 {{.*}} ashr 94 ; AVX2: cost of 1 {{.*}} ashr 95 %C0 = ashr <4 x i32> undef, undef 96 ; AVX: cost of 6 {{.*}} ashr 97 ; AVX2: cost of 20 {{.*}} ashr 98 %C1 = ashr <2 x i64> undef, undef 99 100 ret void 101} 102 103; AVX: avx2shift 104; AVX2: avx2shift 105define void @avx2shift() { 106 ; AVX: cost of 2 {{.*}} shl 107 ; AVX2: cost of 1 {{.*}} shl 108 %A0 = shl <8 x i32> undef, undef 109 ; AVX: cost of 2 {{.*}} shl 110 ; AVX2: cost of 1 {{.*}} shl 111 %A1 = shl <4 x i64> undef, undef 112 113 ; AVX: cost of 2 {{.*}} lshr 114 ; AVX2: cost of 1 {{.*}} lshr 115 %B0 = lshr <8 x i32> undef, undef 116 ; AVX: cost of 2 {{.*}} lshr 117 ; AVX2: cost of 1 {{.*}} lshr 118 %B1 = lshr <4 x i64> undef, undef 119 120 ; AVX: cost of 2 {{.*}} ashr 121 ; AVX2: cost of 1 {{.*}} ashr 122 %C0 = ashr <8 x i32> undef, undef 123 ; AVX: cost of 12 {{.*}} ashr 124 ; AVX2: cost of 40 {{.*}} ashr 125 %C1 = ashr <4 x i64> undef, undef 126 127 ret void 128} 129