1; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
2; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
3
4; FUNC-LABEL: {{^}}fmul_f64:
5; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
6define amdgpu_kernel void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
7                      double addrspace(1)* %in2) {
8   %r0 = load double, double addrspace(1)* %in1
9   %r1 = load double, double addrspace(1)* %in2
10   %r2 = fmul double %r0, %r1
11   store double %r2, double addrspace(1)* %out
12   ret void
13}
14
15; FUNC-LABEL: {{^}}fmul_v2f64:
16; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
17; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
18define amdgpu_kernel void @fmul_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
19                        <2 x double> addrspace(1)* %in2) {
20   %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1
21   %r1 = load <2 x double>, <2 x double> addrspace(1)* %in2
22   %r2 = fmul <2 x double> %r0, %r1
23   store <2 x double> %r2, <2 x double> addrspace(1)* %out
24   ret void
25}
26
27; FUNC-LABEL: {{^}}fmul_v4f64:
28; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
29; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
30; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
31; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
32define amdgpu_kernel void @fmul_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
33                        <4 x double> addrspace(1)* %in2) {
34   %r0 = load <4 x double>, <4 x double> addrspace(1)* %in1
35   %r1 = load <4 x double>, <4 x double> addrspace(1)* %in2
36   %r2 = fmul <4 x double> %r0, %r1
37   store <4 x double> %r2, <4 x double> addrspace(1)* %out
38   ret void
39}
40