1; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s 2; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s 3; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s 4 5; These tests check that fdiv is expanded correctly and also test that the 6; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate 7; instruction groups. 8 9; FUNC-LABEL: {{^}}fdiv_f32: 10; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z 11; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y 12; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS 13; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS 14 15; SI-DAG: v_rcp_f32 16; SI-DAG: v_mul_f32 17define void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) { 18entry: 19 %0 = fdiv float %a, %b 20 store float %0, float addrspace(1)* %out 21 ret void 22} 23 24 25 26; FUNC-LABEL: {{^}}fdiv_v2f32: 27; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z 28; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y 29; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS 30; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS 31 32; SI-DAG: v_rcp_f32 33; SI-DAG: v_mul_f32 34; SI-DAG: v_rcp_f32 35; SI-DAG: v_mul_f32 36define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { 37entry: 38 %0 = fdiv <2 x float> %a, %b 39 store <2 x float> %0, <2 x float> addrspace(1)* %out 40 ret void 41} 42 43; FUNC-LABEL: {{^}}fdiv_v4f32: 44; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 45; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 46; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 47; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 48; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 49; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 50; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 51; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 52 53; SI-DAG: v_rcp_f32 54; SI-DAG: v_mul_f32 55; SI-DAG: v_rcp_f32 56; SI-DAG: v_mul_f32 57; SI-DAG: v_rcp_f32 58; SI-DAG: v_mul_f32 59; SI-DAG: v_rcp_f32 60; SI-DAG: v_mul_f32 61define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { 62 %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 63 %a = load <4 x float>, <4 x float> addrspace(1) * %in 64 %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr 65 %result = fdiv <4 x float> %a, %b 66 store <4 x float> %result, <4 x float> addrspace(1)* %out 67 ret void 68} 69