1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
5declare float @llvm.ceil.f32(float) nounwind readnone
6declare <2 x float> @llvm.ceil.v2f32(<2 x float>) nounwind readnone
7declare <3 x float> @llvm.ceil.v3f32(<3 x float>) nounwind readnone
8declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone
9declare <8 x float> @llvm.ceil.v8f32(<8 x float>) nounwind readnone
10declare <16 x float> @llvm.ceil.v16f32(<16 x float>) nounwind readnone
11
12; FUNC-LABEL: {{^}}fceil_f32:
13; SI: v_ceil_f32_e32
14; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
15; EG: CEIL {{\*? *}}[[RESULT]]
16define amdgpu_kernel void @fceil_f32(float addrspace(1)* %out, float %x) {
17  %y = call float @llvm.ceil.f32(float %x) nounwind readnone
18  store float %y, float addrspace(1)* %out
19  ret void
20}
21
22; FUNC-LABEL: {{^}}fceil_v2f32:
23; SI: v_ceil_f32_e32
24; SI: v_ceil_f32_e32
25; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
26; EG: CEIL {{\*? *}}[[RESULT]]
27; EG: CEIL {{\*? *}}[[RESULT]]
28define amdgpu_kernel void @fceil_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) {
29  %y = call <2 x float> @llvm.ceil.v2f32(<2 x float> %x) nounwind readnone
30  store <2 x float> %y, <2 x float> addrspace(1)* %out
31  ret void
32}
33
34; FUNC-LABEL: {{^}}fceil_v3f32:
35; FIXME-SI: v_ceil_f32_e32
36; FIXME-SI: v_ceil_f32_e32
37; FIXME-SI: v_ceil_f32_e32
38; FIXME-EG: v3 is treated as v2 and v1, hence 2 stores
39; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
40; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
41; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
42; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
43; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
44define amdgpu_kernel void @fceil_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) {
45  %y = call <3 x float> @llvm.ceil.v3f32(<3 x float> %x) nounwind readnone
46  store <3 x float> %y, <3 x float> addrspace(1)* %out
47  ret void
48}
49
50; FUNC-LABEL: {{^}}fceil_v4f32:
51; SI: v_ceil_f32_e32
52; SI: v_ceil_f32_e32
53; SI: v_ceil_f32_e32
54; SI: v_ceil_f32_e32
55; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
56; EG: CEIL {{\*? *}}[[RESULT]]
57; EG: CEIL {{\*? *}}[[RESULT]]
58; EG: CEIL {{\*? *}}[[RESULT]]
59; EG: CEIL {{\*? *}}[[RESULT]]
60define amdgpu_kernel void @fceil_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) {
61  %y = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) nounwind readnone
62  store <4 x float> %y, <4 x float> addrspace(1)* %out
63  ret void
64}
65
66; FUNC-LABEL: {{^}}fceil_v8f32:
67; SI: v_ceil_f32_e32
68; SI: v_ceil_f32_e32
69; SI: v_ceil_f32_e32
70; SI: v_ceil_f32_e32
71; SI: v_ceil_f32_e32
72; SI: v_ceil_f32_e32
73; SI: v_ceil_f32_e32
74; SI: v_ceil_f32_e32
75; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
76; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
77; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
78; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
79; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
80; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
81; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
82; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
83; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
84; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
85define amdgpu_kernel void @fceil_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) {
86  %y = call <8 x float> @llvm.ceil.v8f32(<8 x float> %x) nounwind readnone
87  store <8 x float> %y, <8 x float> addrspace(1)* %out
88  ret void
89}
90
91; FUNC-LABEL: {{^}}fceil_v16f32:
92; SI: v_ceil_f32_e32
93; SI: v_ceil_f32_e32
94; SI: v_ceil_f32_e32
95; SI: v_ceil_f32_e32
96; SI: v_ceil_f32_e32
97; SI: v_ceil_f32_e32
98; SI: v_ceil_f32_e32
99; SI: v_ceil_f32_e32
100; SI: v_ceil_f32_e32
101; SI: v_ceil_f32_e32
102; SI: v_ceil_f32_e32
103; SI: v_ceil_f32_e32
104; SI: v_ceil_f32_e32
105; SI: v_ceil_f32_e32
106; SI: v_ceil_f32_e32
107; SI: v_ceil_f32_e32
108; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
109; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
110; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT3:T[0-9]+]]{{\.[XYZW]}}
111; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT4:T[0-9]+]]{{\.[XYZW]}}
112; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
113; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
114; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
115; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
116; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
117; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
118; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
119; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
120; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
121; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
122; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
123; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
124; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
125; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
126; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
127; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
128define amdgpu_kernel void @fceil_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %x) {
129  %y = call <16 x float> @llvm.ceil.v16f32(<16 x float> %x) nounwind readnone
130  store <16 x float> %y, <16 x float> addrspace(1)* %out
131  ret void
132}
133