1; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9_10,GFX9 %s
2; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9_10,GFX10 %s
3
4; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_1:
5; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0 op_sel:[0,1] op_sel_hi:[1,0]{{$}}
6define amdgpu_kernel void @test_pk_max_f16_literal_0_1(<2 x half> addrspace(1)* nocapture %arg) {
7bb:
8  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
9  %tmp1 = zext i32 %tmp to i64
10  %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
11  %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
12  %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH0000, half 0xH3C00>)
13  store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
14  ret void
15}
16
17; GCN-LABEL: {{^}}test_pk_max_f16_literal_1_0:
18; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0{{$}}
19define amdgpu_kernel void @test_pk_max_f16_literal_1_0(<2 x half> addrspace(1)* nocapture %arg) {
20bb:
21  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
22  %tmp1 = zext i32 %tmp to i64
23  %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
24  %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
25  %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH3C00, half 0xH0000>)
26  store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
27  ret void
28}
29
30; GCN-LABEL: {{^}}test_pk_max_f16_literal_1_1:
31; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0 op_sel_hi:[1,0]{{$}}
32define amdgpu_kernel void @test_pk_max_f16_literal_1_1(<2 x half> addrspace(1)* nocapture %arg) {
33bb:
34  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
35  %tmp1 = zext i32 %tmp to i64
36  %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
37  %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
38  %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH3C00, half 0xH3C00>)
39  store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
40  ret void
41}
42
43; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_m1:
44; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0 op_sel:[0,1] op_sel_hi:[1,0]{{$}}
45define amdgpu_kernel void @test_pk_max_f16_literal_0_m1(<2 x half> addrspace(1)* nocapture %arg) {
46bb:
47  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
48  %tmp1 = zext i32 %tmp to i64
49  %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
50  %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
51  %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH0000, half 0xHBC00>)
52  store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
53  ret void
54}
55
56; GCN-LABEL: {{^}}test_pk_max_f16_literal_m1_0:
57; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0{{$}}
58define amdgpu_kernel void @test_pk_max_f16_literal_m1_0(<2 x half> addrspace(1)* nocapture %arg) {
59bb:
60  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
61  %tmp1 = zext i32 %tmp to i64
62  %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
63  %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
64  %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xHBC00, half 0xH0000>)
65  store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
66  ret void
67}
68
69; GCN-LABEL: {{^}}test_pk_max_f16_literal_m1_m1:
70; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0 op_sel_hi:[1,0]{{$}}
71define amdgpu_kernel void @test_pk_max_f16_literal_m1_m1(<2 x half> addrspace(1)* nocapture %arg) {
72bb:
73  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
74  %tmp1 = zext i32 %tmp to i64
75  %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
76  %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
77  %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xHBC00, half 0xHBC00>)
78  store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
79  ret void
80}
81
82; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_0:
83; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 0{{$}}
84define amdgpu_kernel void @test_pk_max_f16_literal_0_0(<2 x half> addrspace(1)* nocapture %arg) {
85bb:
86  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
87  %tmp1 = zext i32 %tmp to i64
88  %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
89  %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
90  %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH0000, half 0xH0000>)
91  store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
92  ret void
93}
94
95; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_41c8:
96; GFX9:  s_mov_b32 [[C:s[0-9]+]], 0x41c80000
97; GFX9:  v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, [[C]]{{$}}
98; GFX10: v_pk_max_f16 v{{[0-9]+}}, 0x41c8, v{{[0-9]+}} op_sel:[1,0] op_sel_hi:[0,1]{{$}}
99define amdgpu_kernel void @test_pk_max_f16_literal_0_41c8(<2 x half> addrspace(1)* nocapture %arg) {
100bb:
101  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
102  %tmp1 = zext i32 %tmp to i64
103  %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
104  %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
105  %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH0000, half 0xH41C8>)
106  store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
107  ret void
108}
109
110; GCN-LABEL: {{^}}test_pk_max_f16_literal_41c8_0:
111; GFX9:  s_movk_i32 [[C:s[0-9]+]], 0x41c8
112; GFX9:  v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, [[C]]{{$}}
113; GFX10: v_pk_max_f16 v{{[0-9]+}}, 0x41c8, v{{[0-9]+}}{{$}}
114define amdgpu_kernel void @test_pk_max_f16_literal_41c8_0(<2 x half> addrspace(1)* nocapture %arg) {
115bb:
116  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
117  %tmp1 = zext i32 %tmp to i64
118  %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
119  %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
120  %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH41C8, half 0xH0>)
121  store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
122  ret void
123}
124
125; GCN-LABEL: {{^}}test_pk_max_f16_literal_42ca_41c8:
126; GFX9:  s_mov_b32 [[C:s[0-9]+]], 0x41c842ca
127; GFX9:  v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, [[C]]{{$}}
128; GFX10: v_pk_max_f16 v{{[0-9]+}}, 0x41c842ca, v{{[0-9]+}}{{$}}
129define amdgpu_kernel void @test_pk_max_f16_literal_42ca_41c8(<2 x half> addrspace(1)* nocapture %arg) {
130bb:
131  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
132  %tmp1 = zext i32 %tmp to i64
133  %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
134  %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
135  %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH42CA, half 0xH41C8>)
136  store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
137  ret void
138}
139
140declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
141declare i32 @llvm.amdgcn.workitem.id.x()
142