1; RUN: llc < %s -asm-verbose=false -mtriple=aarch64-none-eabi | FileCheck %s
2
3define <4 x half> @add_h(<4 x half> %a, <4 x half> %b) {
4entry:
5; CHECK-LABEL: add_h:
6; CHECK-DAG: fcvtl [[OP1:v[0-9]+\.4s]], v0.4h
7; CHECK-DAG: fcvtl [[OP2:v[0-9]+\.4s]], v1.4h
8; CHECK: fadd [[RES:v[0-9]+.4s]], [[OP1]], [[OP2]]
9; CHECK: fcvtn v0.4h, [[RES]]
10  %0 = fadd <4 x half> %a, %b
11  ret <4 x half> %0
12}
13
14
15define <4 x half> @build_h4(<4 x half> %a) {
16entry:
17; CHECK-LABEL: build_h4:
18; CHECK: movz [[GPR:w[0-9]+]], #0x3ccd
19; CHECK: dup v0.4h, [[GPR]]
20  ret <4 x half> <half 0xH3CCD, half 0xH3CCD, half 0xH3CCD, half 0xH3CCD>
21}
22
23
24define <4 x half> @sub_h(<4 x half> %a, <4 x half> %b) {
25entry:
26; CHECK-LABEL: sub_h:
27; CHECK-DAG: fcvtl [[OP1:v[0-9]+\.4s]], v0.4h
28; CHECK-DAG: fcvtl [[OP2:v[0-9]+\.4s]], v1.4h
29; CHECK: fsub [[RES:v[0-9]+.4s]], [[OP1]], [[OP2]]
30; CHECK: fcvtn v0.4h, [[RES]]
31  %0 = fsub <4 x half> %a, %b
32  ret <4 x half> %0
33}
34
35
36define <4 x half> @mul_h(<4 x half> %a, <4 x half> %b) {
37entry:
38; CHECK-LABEL: mul_h:
39; CHECK-DAG: fcvtl [[OP1:v[0-9]+\.4s]], v0.4h
40; CHECK-DAG: fcvtl [[OP2:v[0-9]+\.4s]], v1.4h
41; CHECK: fmul [[RES:v[0-9]+.4s]], [[OP1]], [[OP2]]
42; CHECK: fcvtn v0.4h, [[RES]]
43  %0 = fmul <4 x half> %a, %b
44  ret <4 x half> %0
45}
46
47
48define <4 x half> @div_h(<4 x half> %a, <4 x half> %b) {
49entry:
50; CHECK-LABEL: div_h:
51; CHECK-DAG: fcvtl [[OP1:v[0-9]+\.4s]], v0.4h
52; CHECK-DAG: fcvtl [[OP2:v[0-9]+\.4s]], v1.4h
53; CHECK: fdiv [[RES:v[0-9]+.4s]], [[OP1]], [[OP2]]
54; CHECK: fcvtn v0.4h, [[RES]]
55  %0 = fdiv <4 x half> %a, %b
56  ret <4 x half> %0
57}
58
59
60define <4 x half> @load_h(<4 x half>* %a) {
61entry:
62; CHECK-LABEL: load_h:
63; CHECK: ldr d0, [x0]
64  %0 = load <4 x half>, <4 x half>* %a, align 4
65  ret <4 x half> %0
66}
67
68
69define void @store_h(<4 x half>* %a, <4 x half> %b) {
70entry:
71; CHECK-LABEL: store_h:
72; CHECK: str d0, [x0]
73  store <4 x half> %b, <4 x half>* %a, align 4
74  ret void
75}
76
77define <4 x half> @s_to_h(<4 x float> %a) {
78; CHECK-LABEL: s_to_h:
79; CHECK: fcvtn v0.4h, v0.4s
80  %1 = fptrunc <4 x float> %a to <4 x half>
81  ret <4 x half> %1
82}
83
84define <4 x half> @d_to_h(<4 x double> %a) {
85; CHECK-LABEL: d_to_h:
86; CHECK-DAG: fcvt
87; CHECK-DAG: fcvt
88; CHECK-DAG: fcvt
89; CHECK-DAG: fcvt
90; CHECK-DAG: ins
91; CHECK-DAG: ins
92; CHECK-DAG: ins
93; CHECK-DAG: ins
94  %1 = fptrunc <4 x double> %a to <4 x half>
95  ret <4 x half> %1
96}
97
98define <4 x float> @h_to_s(<4 x half> %a) {
99; CHECK-LABEL: h_to_s:
100; CHECK: fcvtl v0.4s, v0.4h
101  %1 = fpext <4 x half> %a to <4 x float>
102  ret <4 x float> %1
103}
104
105define <4 x double> @h_to_d(<4 x half> %a) {
106; CHECK-LABEL: h_to_d:
107; CHECK-DAG: fcvt
108; CHECK-DAG: fcvt
109; CHECK-DAG: fcvt
110; CHECK-DAG: fcvt
111; CHECK-DAG: ins
112; CHECK-DAG: ins
113; CHECK-DAG: ins
114; CHECK-DAG: ins
115  %1 = fpext <4 x half> %a to <4 x double>
116  ret <4 x double> %1
117}
118
119define <4 x half> @bitcast_i_to_h(float, <4 x i16> %a) {
120; CHECK-LABEL: bitcast_i_to_h:
121; CHECK: mov v0.16b, v1.16b
122  %2 = bitcast <4 x i16> %a to <4 x half>
123  ret <4 x half> %2
124}
125
126define <4 x i16> @bitcast_h_to_i(float, <4 x half> %a) {
127; CHECK-LABEL: bitcast_h_to_i:
128; CHECK: mov v0.16b, v1.16b
129  %2 = bitcast <4 x half> %a to <4 x i16>
130  ret <4 x i16> %2
131}
132
133
134define <4 x half> @sitofp_i8(<4 x i8> %a) #0 {
135; CHECK-LABEL: sitofp_i8:
136; CHECK-NEXT: shl [[OP1:v[0-9]+\.4h]], v0.4h, #8
137; CHECK-NEXT: sshr [[OP2:v[0-9]+\.4h]], [[OP1]], #8
138; CHECK-NEXT: sshll [[OP3:v[0-9]+\.4s]], [[OP2]], #0
139; CHECK-NEXT: scvtf [[OP4:v[0-9]+\.4s]], [[OP3]]
140; CHECK-NEXT: fcvtn v0.4h, [[OP4]]
141; CHECK-NEXT: ret
142  %1 = sitofp <4 x i8> %a to <4 x half>
143  ret <4 x half> %1
144}
145
146
147define <4 x half> @sitofp_i16(<4 x i16> %a) #0 {
148; CHECK-LABEL: sitofp_i16:
149; CHECK-NEXT: sshll [[OP1:v[0-9]+\.4s]], v0.4h, #0
150; CHECK-NEXT: scvtf [[OP2:v[0-9]+\.4s]], [[OP1]]
151; CHECK-NEXT: fcvtn v0.4h, [[OP2]]
152; CHECK-NEXT: ret
153  %1 = sitofp <4 x i16> %a to <4 x half>
154  ret <4 x half> %1
155}
156
157
158define <4 x half> @sitofp_i32(<4 x i32> %a) #0 {
159; CHECK-LABEL: sitofp_i32:
160; CHECK-NEXT: scvtf [[OP1:v[0-9]+\.4s]], v0.4s
161; CHECK-NEXT: fcvtn v0.4h, [[OP1]]
162  %1 = sitofp <4 x i32> %a to <4 x half>
163  ret <4 x half> %1
164}
165
166
167define <4 x half> @sitofp_i64(<4 x i64> %a) #0 {
168; CHECK-LABEL: sitofp_i64:
169; CHECK-DAG: scvtf [[OP1:v[0-9]+\.2d]], v0.2d
170; CHECK-DAG: scvtf [[OP2:v[0-9]+\.2d]], v1.2d
171; CHECK-DAG: fcvtn [[OP3:v[0-9]+]].2s, [[OP1]]
172; CHECK-NEXT: fcvtn2 [[OP3]].4s, [[OP2]]
173; CHECK-NEXT: fcvtn v0.4h, [[OP3]].4s
174  %1 = sitofp <4 x i64> %a to <4 x half>
175  ret <4 x half> %1
176}
177
178define <4 x half> @uitofp_i8(<4 x i8> %a) #0 {
179; CHECK-LABEL: uitofp_i8:
180; CHECK-NEXT: bic v0.4h, #0xff, lsl #8
181; CHECK-NEXT: ushll [[OP1:v[0-9]+\.4s]], v0.4h, #0
182; CHECK-NEXT: ucvtf [[OP2:v[0-9]+\.4s]], [[OP1]]
183; CHECK-NEXT: fcvtn v0.4h, [[OP2]]
184; CHECK-NEXT: ret
185  %1 = uitofp <4 x i8> %a to <4 x half>
186  ret <4 x half> %1
187}
188
189
190define <4 x half> @uitofp_i16(<4 x i16> %a) #0 {
191; CHECK-LABEL: uitofp_i16:
192; CHECK-NEXT: ushll [[OP1:v[0-9]+\.4s]], v0.4h, #0
193; CHECK-NEXT: ucvtf [[OP2:v[0-9]+\.4s]], [[OP1]]
194; CHECK-NEXT: fcvtn v0.4h, [[OP2]]
195; CHECK-NEXT: ret
196  %1 = uitofp <4 x i16> %a to <4 x half>
197  ret <4 x half> %1
198}
199
200
201define <4 x half> @uitofp_i32(<4 x i32> %a) #0 {
202; CHECK-LABEL: uitofp_i32:
203; CHECK-NEXT: ucvtf [[OP1:v[0-9]+\.4s]], v0.4s
204; CHECK-NEXT: fcvtn v0.4h, [[OP1]]
205  %1 = uitofp <4 x i32> %a to <4 x half>
206  ret <4 x half> %1
207}
208
209
210define <4 x half> @uitofp_i64(<4 x i64> %a) #0 {
211; CHECK-LABEL: uitofp_i64:
212; CHECK-DAG: ucvtf [[OP1:v[0-9]+\.2d]], v0.2d
213; CHECK-DAG: ucvtf [[OP2:v[0-9]+\.2d]], v1.2d
214; CHECK-DAG: fcvtn [[OP3:v[0-9]+]].2s, [[OP1]]
215; CHECK-NEXT: fcvtn2 [[OP3]].4s, [[OP2]]
216; CHECK-NEXT: fcvtn v0.4h, [[OP3]].4s
217  %1 = uitofp <4 x i64> %a to <4 x half>
218  ret <4 x half> %1
219}
220
221attributes #0 = { nounwind }
222