1; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=+half-rate-64-ops < %s | FileCheck %s 2; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck %s 3 4; CHECK: 'add_i32' 5; CHECK: estimated cost of 1 for {{.*}} add i32 6define amdgpu_kernel void @add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 { 7 %vec = load i32, i32 addrspace(1)* %vaddr 8 %add = add i32 %vec, %b 9 store i32 %add, i32 addrspace(1)* %out 10 ret void 11} 12 13; CHECK: 'add_v2i32' 14; CHECK: estimated cost of 2 for {{.*}} add <2 x i32> 15define amdgpu_kernel void @add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %vaddr, <2 x i32> %b) #0 { 16 %vec = load <2 x i32>, <2 x i32> addrspace(1)* %vaddr 17 %add = add <2 x i32> %vec, %b 18 store <2 x i32> %add, <2 x i32> addrspace(1)* %out 19 ret void 20} 21 22; CHECK: 'add_v3i32' 23; CHECK: estimated cost of 3 for {{.*}} add <3 x i32> 24define amdgpu_kernel void @add_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %vaddr, <3 x i32> %b) #0 { 25 %vec = load <3 x i32>, <3 x i32> addrspace(1)* %vaddr 26 %add = add <3 x i32> %vec, %b 27 store <3 x i32> %add, <3 x i32> addrspace(1)* %out 28 ret void 29} 30 31; CHECK: 'add_v4i32' 32; CHECK: estimated cost of 4 for {{.*}} add <4 x i32> 33define amdgpu_kernel void @add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %vaddr, <4 x i32> %b) #0 { 34 %vec = load <4 x i32>, <4 x i32> addrspace(1)* %vaddr 35 %add = add <4 x i32> %vec, %b 36 store <4 x i32> %add, <4 x i32> addrspace(1)* %out 37 ret void 38} 39 40; CHECK: 'add_i64' 41; CHECK: estimated cost of 2 for {{.*}} add i64 42define amdgpu_kernel void @add_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 { 43 %vec = load i64, i64 addrspace(1)* %vaddr 44 %add = add i64 %vec, %b 45 store i64 %add, i64 addrspace(1)* %out 46 ret void 47} 48 49; CHECK: 'add_v2i64' 50; CHECK: estimated cost of 4 for {{.*}} add <2 x i64> 51define amdgpu_kernel void @add_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %vaddr, <2 x i64> %b) #0 { 52 %vec = load <2 x i64>, <2 x i64> addrspace(1)* %vaddr 53 %add = add <2 x i64> %vec, %b 54 store <2 x i64> %add, <2 x i64> addrspace(1)* %out 55 ret void 56} 57 58; CHECK: 'add_v3i64' 59; CHECK: estimated cost of 6 for {{.*}} add <3 x i64> 60define amdgpu_kernel void @add_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(1)* %vaddr, <3 x i64> %b) #0 { 61 %vec = load <3 x i64>, <3 x i64> addrspace(1)* %vaddr 62 %add = add <3 x i64> %vec, %b 63 store <3 x i64> %add, <3 x i64> addrspace(1)* %out 64 ret void 65} 66 67; CHECK: 'add_v4i64' 68; CHECK: estimated cost of 8 for {{.*}} add <4 x i64> 69define amdgpu_kernel void @add_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %vaddr, <4 x i64> %b) #0 { 70 %vec = load <4 x i64>, <4 x i64> addrspace(1)* %vaddr 71 %add = add <4 x i64> %vec, %b 72 store <4 x i64> %add, <4 x i64> addrspace(1)* %out 73 ret void 74} 75 76; CHECK: 'add_v16i64' 77; CHECK: estimated cost of 32 for {{.*}} add <16 x i64> 78define amdgpu_kernel void @add_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(1)* %vaddr, <16 x i64> %b) #0 { 79 %vec = load <16 x i64>, <16 x i64> addrspace(1)* %vaddr 80 %add = add <16 x i64> %vec, %b 81 store <16 x i64> %add, <16 x i64> addrspace(1)* %out 82 ret void 83} 84 85; CHECK: 'add_i16' 86; CHECK: estimated cost of 1 for {{.*}} add i16 87define amdgpu_kernel void @add_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %vaddr, i16 %b) #0 { 88 %vec = load i16, i16 addrspace(1)* %vaddr 89 %add = add i16 %vec, %b 90 store i16 %add, i16 addrspace(1)* %out 91 ret void 92} 93 94; CHECK: 'add_v2i16' 95; CHECK: estimated cost of 2 for {{.*}} add <2 x i16> 96define amdgpu_kernel void @add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 { 97 %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr 98 %add = add <2 x i16> %vec, %b 99 store <2 x i16> %add, <2 x i16> addrspace(1)* %out 100 ret void 101} 102 103; CHECK: 'sub_i32' 104; CHECK: estimated cost of 1 for {{.*}} sub i32 105define amdgpu_kernel void @sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 { 106 %vec = load i32, i32 addrspace(1)* %vaddr 107 %sub = sub i32 %vec, %b 108 store i32 %sub, i32 addrspace(1)* %out 109 ret void 110} 111 112; CHECK: 'sub_i64' 113; CHECK: estimated cost of 2 for {{.*}} sub i64 114define amdgpu_kernel void @sub_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 { 115 %vec = load i64, i64 addrspace(1)* %vaddr 116 %sub = sub i64 %vec, %b 117 store i64 %sub, i64 addrspace(1)* %out 118 ret void 119} 120; CHECK: 'sub_i16' 121; CHECK: estimated cost of 1 for {{.*}} sub i16 122define amdgpu_kernel void @sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %vaddr, i16 %b) #0 { 123 %vec = load i16, i16 addrspace(1)* %vaddr 124 %sub = sub i16 %vec, %b 125 store i16 %sub, i16 addrspace(1)* %out 126 ret void 127} 128 129; CHECK: 'sub_v2i16' 130; CHECK: estimated cost of 2 for {{.*}} sub <2 x i16> 131define amdgpu_kernel void @sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 { 132 %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr 133 %sub = sub <2 x i16> %vec, %b 134 store <2 x i16> %sub, <2 x i16> addrspace(1)* %out 135 ret void 136} 137 138attributes #0 = { nounwind } 139