1; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-FASTFMAF -check-prefix=SI -check-prefix=FUNC %s 2; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-SLOWFMAF -check-prefix=SI -check-prefix=FUNC %s 3 4declare i32 @llvm.r600.read.tidig.x() #0 5declare double @llvm.fabs.f64(double) #0 6declare double @llvm.fma.f64(double, double, double) #0 7declare float @llvm.fma.f32(float, float, float) #0 8 9; (fadd (fmul x, y), z) -> (fma x, y, z) 10; FUNC-LABEL: {{^}}combine_to_fma_f64_0: 11; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 12; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 13; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 14; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]] 15; SI: buffer_store_dwordx2 [[RESULT]] 16define void @combine_to_fma_f64_0(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { 17 %tid = tail call i32 @llvm.r600.read.tidig.x() #0 18 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 19 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 20 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 21 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid 22 23 %a = load double, double addrspace(1)* %gep.0 24 %b = load double, double addrspace(1)* %gep.1 25 %c = load double, double addrspace(1)* %gep.2 26 27 %mul = fmul double %a, %b 28 %fma = fadd double %mul, %c 29 store double %fma, double addrspace(1)* %gep.out 30 ret void 31} 32 33; (fadd (fmul x, y), z) -> (fma x, y, z) 34; FUNC-LABEL: {{^}}combine_to_fma_f64_0_2use: 35; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 36; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 37; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 38; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}} 39; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]] 40; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[D]] 41; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 42; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 43; SI: s_endpgm 44define void @combine_to_fma_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { 45 %tid = tail call i32 @llvm.r600.read.tidig.x() #0 46 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 47 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 48 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 49 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3 50 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid 51 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1 52 53 %a = load double, double addrspace(1)* %gep.0 54 %b = load double, double addrspace(1)* %gep.1 55 %c = load double, double addrspace(1)* %gep.2 56 %d = load double, double addrspace(1)* %gep.3 57 58 %mul = fmul double %a, %b 59 %fma0 = fadd double %mul, %c 60 %fma1 = fadd double %mul, %d 61 store double %fma0, double addrspace(1)* %gep.out.0 62 store double %fma1, double addrspace(1)* %gep.out.1 63 ret void 64} 65 66; (fadd x, (fmul y, z)) -> (fma y, z, x) 67; FUNC-LABEL: {{^}}combine_to_fma_f64_1: 68; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 69; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 70; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 71; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]] 72; SI: buffer_store_dwordx2 [[RESULT]] 73define void @combine_to_fma_f64_1(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { 74 %tid = tail call i32 @llvm.r600.read.tidig.x() #0 75 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 76 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 77 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 78 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid 79 80 %a = load double, double addrspace(1)* %gep.0 81 %b = load double, double addrspace(1)* %gep.1 82 %c = load double, double addrspace(1)* %gep.2 83 84 %mul = fmul double %a, %b 85 %fma = fadd double %c, %mul 86 store double %fma, double addrspace(1)* %gep.out 87 ret void 88} 89 90; (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) 91; FUNC-LABEL: {{^}}combine_to_fma_fsub_0_f64: 92; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 93; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 94; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 95; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[C]] 96; SI: buffer_store_dwordx2 [[RESULT]] 97define void @combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { 98 %tid = tail call i32 @llvm.r600.read.tidig.x() #0 99 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 100 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 101 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 102 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid 103 104 %a = load double, double addrspace(1)* %gep.0 105 %b = load double, double addrspace(1)* %gep.1 106 %c = load double, double addrspace(1)* %gep.2 107 108 %mul = fmul double %a, %b 109 %fma = fsub double %mul, %c 110 store double %fma, double addrspace(1)* %gep.out 111 ret void 112} 113 114; (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) 115; FUNC-LABEL: {{^}}combine_to_fma_fsub_f64_0_2use: 116; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 117; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 118; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 119; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}} 120; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[C]] 121; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[D]] 122; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 123; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 124; SI: s_endpgm 125define void @combine_to_fma_fsub_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { 126 %tid = tail call i32 @llvm.r600.read.tidig.x() #0 127 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 128 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 129 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 130 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3 131 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid 132 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1 133 134 %a = load double, double addrspace(1)* %gep.0 135 %b = load double, double addrspace(1)* %gep.1 136 %c = load double, double addrspace(1)* %gep.2 137 %d = load double, double addrspace(1)* %gep.3 138 139 %mul = fmul double %a, %b 140 %fma0 = fsub double %mul, %c 141 %fma1 = fsub double %mul, %d 142 store double %fma0, double addrspace(1)* %gep.out.0 143 store double %fma1, double addrspace(1)* %gep.out.1 144 ret void 145} 146 147; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) 148; FUNC-LABEL: {{^}}combine_to_fma_fsub_1_f64: 149; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 150; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 151; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 152; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[C]] 153; SI: buffer_store_dwordx2 [[RESULT]] 154define void @combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { 155 %tid = tail call i32 @llvm.r600.read.tidig.x() #0 156 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 157 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 158 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 159 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid 160 161 %a = load double, double addrspace(1)* %gep.0 162 %b = load double, double addrspace(1)* %gep.1 163 %c = load double, double addrspace(1)* %gep.2 164 165 %mul = fmul double %a, %b 166 %fma = fsub double %c, %mul 167 store double %fma, double addrspace(1)* %gep.out 168 ret void 169} 170 171; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) 172; FUNC-LABEL: {{^}}combine_to_fma_fsub_1_f64_2use: 173; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 174; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 175; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 176; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}} 177; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[C]] 178; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[D]] 179; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 180; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 181; SI: s_endpgm 182define void @combine_to_fma_fsub_1_f64_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { 183 %tid = tail call i32 @llvm.r600.read.tidig.x() #0 184 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 185 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 186 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 187 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3 188 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid 189 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1 190 191 %a = load double, double addrspace(1)* %gep.0 192 %b = load double, double addrspace(1)* %gep.1 193 %c = load double, double addrspace(1)* %gep.2 194 %d = load double, double addrspace(1)* %gep.3 195 196 %mul = fmul double %a, %b 197 %fma0 = fsub double %c, %mul 198 %fma1 = fsub double %d, %mul 199 store double %fma0, double addrspace(1)* %gep.out.0 200 store double %fma1, double addrspace(1)* %gep.out.1 201 ret void 202} 203 204; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z)) 205; FUNC-LABEL: {{^}}combine_to_fma_fsub_2_f64: 206; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 207; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 208; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 209; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]] 210; SI: buffer_store_dwordx2 [[RESULT]] 211define void @combine_to_fma_fsub_2_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { 212 %tid = tail call i32 @llvm.r600.read.tidig.x() #0 213 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 214 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 215 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 216 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid 217 218 %a = load double, double addrspace(1)* %gep.0 219 %b = load double, double addrspace(1)* %gep.1 220 %c = load double, double addrspace(1)* %gep.2 221 222 %mul = fmul double %a, %b 223 %mul.neg = fsub double -0.0, %mul 224 %fma = fsub double %mul.neg, %c 225 226 store double %fma, double addrspace(1)* %gep.out 227 ret void 228} 229 230; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z)) 231; FUNC-LABEL: {{^}}combine_to_fma_fsub_2_f64_2uses_neg: 232; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 233; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 234; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 235; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]] 236; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[D]] 237; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 238; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 239; SI: s_endpgm 240define void @combine_to_fma_fsub_2_f64_2uses_neg(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { 241 %tid = tail call i32 @llvm.r600.read.tidig.x() #0 242 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 243 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 244 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 245 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3 246 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid 247 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1 248 249 %a = load double, double addrspace(1)* %gep.0 250 %b = load double, double addrspace(1)* %gep.1 251 %c = load double, double addrspace(1)* %gep.2 252 %d = load double, double addrspace(1)* %gep.3 253 254 %mul = fmul double %a, %b 255 %mul.neg = fsub double -0.0, %mul 256 %fma0 = fsub double %mul.neg, %c 257 %fma1 = fsub double %mul.neg, %d 258 259 store double %fma0, double addrspace(1)* %gep.out.0 260 store double %fma1, double addrspace(1)* %gep.out.1 261 ret void 262} 263 264; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z)) 265; FUNC-LABEL: {{^}}combine_to_fma_fsub_2_f64_2uses_mul: 266; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 267; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 268; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 269; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]] 270; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[D]] 271; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 272; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 273; SI: s_endpgm 274define void @combine_to_fma_fsub_2_f64_2uses_mul(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { 275 %tid = tail call i32 @llvm.r600.read.tidig.x() #0 276 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 277 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 278 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 279 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3 280 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid 281 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1 282 283 %a = load double, double addrspace(1)* %gep.0 284 %b = load double, double addrspace(1)* %gep.1 285 %c = load double, double addrspace(1)* %gep.2 286 %d = load double, double addrspace(1)* %gep.3 287 288 %mul = fmul double %a, %b 289 %mul.neg = fsub double -0.0, %mul 290 %fma0 = fsub double %mul.neg, %c 291 %fma1 = fsub double %mul, %d 292 293 store double %fma0, double addrspace(1)* %gep.out.0 294 store double %fma1, double addrspace(1)* %gep.out.1 295 ret void 296} 297 298; fold (fsub (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, (fneg z))) 299 300; FUNC-LABEL: {{^}}aggressive_combine_to_fma_fsub_0_f64: 301; SI-DAG: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 302; SI-DAG: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 303; SI-DAG: buffer_load_dwordx2 [[Z:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 304; SI-DAG: buffer_load_dwordx2 [[U:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}} 305; SI-DAG: buffer_load_dwordx2 [[V:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:32{{$}} 306; SI: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[U]], [[V]], -[[Z]] 307; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], [[Y]], [[FMA0]] 308; SI: buffer_store_dwordx2 [[RESULT]] 309define void @aggressive_combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { 310 %tid = tail call i32 @llvm.r600.read.tidig.x() #0 311 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 312 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 313 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 314 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3 315 %gep.4 = getelementptr double, double addrspace(1)* %gep.0, i32 4 316 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid 317 318 %x = load double, double addrspace(1)* %gep.0 319 %y = load double, double addrspace(1)* %gep.1 320 %z = load double, double addrspace(1)* %gep.2 321 %u = load double, double addrspace(1)* %gep.3 322 %v = load double, double addrspace(1)* %gep.4 323 324 %tmp0 = fmul double %u, %v 325 %tmp1 = call double @llvm.fma.f64(double %x, double %y, double %tmp0) #0 326 %tmp2 = fsub double %tmp1, %z 327 328 store double %tmp2, double addrspace(1)* %gep.out 329 ret void 330} 331 332; fold (fsub x, (fma y, z, (fmul u, v))) 333; -> (fma (fneg y), z, (fma (fneg u), v, x)) 334 335; FUNC-LABEL: {{^}}aggressive_combine_to_fma_fsub_1_f64: 336; SI-DAG: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 337; SI-DAG: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 338; SI-DAG: buffer_load_dwordx2 [[Z:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 339; SI-DAG: buffer_load_dwordx2 [[U:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}} 340; SI-DAG: buffer_load_dwordx2 [[V:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:32{{$}} 341; SI: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], -[[U]], [[V]], [[X]] 342; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[Y]], [[Z]], [[FMA0]] 343; SI: buffer_store_dwordx2 [[RESULT]] 344define void @aggressive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { 345 %tid = tail call i32 @llvm.r600.read.tidig.x() #0 346 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 347 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 348 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 349 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3 350 %gep.4 = getelementptr double, double addrspace(1)* %gep.0, i32 4 351 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid 352 353 %x = load double, double addrspace(1)* %gep.0 354 %y = load double, double addrspace(1)* %gep.1 355 %z = load double, double addrspace(1)* %gep.2 356 %u = load double, double addrspace(1)* %gep.3 357 %v = load double, double addrspace(1)* %gep.4 358 359 %tmp0 = fmul double %u, %v 360 %tmp1 = call double @llvm.fma.f64(double %y, double %z, double %tmp0) #0 361 %tmp2 = fsub double %x, %tmp1 362 363 store double %tmp2, double addrspace(1)* %gep.out 364 ret void 365} 366 367attributes #0 = { nounwind readnone } 368attributes #1 = { nounwind } 369