1 // REQUIRES: aarch64-registered-target
2 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -S -O3 -o - %s | FileCheck %s
3
4 // Test new aarch64 intrinsics and types
5
6 #include <arm_neon.h>
7
test_vmla_n_f32(float32x2_t a,float32x2_t b,float32_t c)8 float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
9 // CHECK-LABEL: test_vmla_n_f32
10 return vmla_n_f32(a, b, c);
11 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
12 // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
13 // CHECK-FMA: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
14 // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
15 }
16
test_vmlaq_n_f32(float32x4_t a,float32x4_t b,float32_t c)17 float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
18 // CHECK-LABEL: test_vmlaq_n_f32
19 return vmlaq_n_f32(a, b, c);
20 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
21 // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
22 // CHECK-FMA: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
23 // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
24 }
25
test_vmlaq_n_f64(float64x2_t a,float64x2_t b,float64_t c)26 float64x2_t test_vmlaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
27 // CHECK-LABEL: test_vmlaq_n_f64
28 return vmlaq_n_f64(a, b, c);
29 // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
30 // CHECK: fadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
31 // CHECK-FMA: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
32 // CHECK-FMA: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
33 }
34
test_vmlsq_n_f32(float32x4_t a,float32x4_t b,float32_t c)35 float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
36 // CHECK-LABEL: test_vmlsq_n_f32
37 return vmlsq_n_f32(a, b, c);
38 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
39 // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
40 // CHECK-FMA: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
41 // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
42 }
43
test_vmls_n_f32(float32x2_t a,float32x2_t b,float32_t c)44 float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
45 // CHECK-LABEL: test_vmls_n_f32
46 return vmls_n_f32(a, b, c);
47 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
48 // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
49 // CHECK-FMA: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
50 // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
51 }
52
test_vmlsq_n_f64(float64x2_t a,float64x2_t b,float64_t c)53 float64x2_t test_vmlsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
54 // CHECK-LABEL: test_vmlsq_n_f64
55 return vmlsq_n_f64(a, b, c);
56 // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
57 // CHECK: fsub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
58 // CHECK-FMA: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
59 // CHECK-FMA: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
60 }
61
test_vmla_lane_f32_0(float32x2_t a,float32x2_t b,float32x2_t v)62 float32x2_t test_vmla_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
63 // CHECK-LABEL: test_vmla_lane_f32_0
64 return vmla_lane_f32(a, b, v, 0);
65 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
66 // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
67 // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
68 }
69
test_vmlaq_lane_f32_0(float32x4_t a,float32x4_t b,float32x2_t v)70 float32x4_t test_vmlaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
71 // CHECK-LABEL: test_vmlaq_lane_f32_0
72 return vmlaq_lane_f32(a, b, v, 0);
73 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
74 // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
75 // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
76 }
77
test_vmla_laneq_f32_0(float32x2_t a,float32x2_t b,float32x4_t v)78 float32x2_t test_vmla_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
79 // CHECK-LABEL: test_vmla_laneq_f32_0
80 return vmla_laneq_f32(a, b, v, 0);
81 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
82 // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
83 // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
84 }
85
test_vmlaq_laneq_f32_0(float32x4_t a,float32x4_t b,float32x4_t v)86 float32x4_t test_vmlaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
87 // CHECK-LABEL: test_vmlaq_laneq_f32_0
88 return vmlaq_laneq_f32(a, b, v, 0);
89 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
90 // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
91 // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
92 }
93
test_vmls_lane_f32_0(float32x2_t a,float32x2_t b,float32x2_t v)94 float32x2_t test_vmls_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
95 // CHECK-LABEL: test_vmls_lane_f32_0
96 return vmls_lane_f32(a, b, v, 0);
97 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
98 // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
99 // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
100 }
101
test_vmlsq_lane_f32_0(float32x4_t a,float32x4_t b,float32x2_t v)102 float32x4_t test_vmlsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
103 // CHECK-LABEL: test_vmlsq_lane_f32_0
104 return vmlsq_lane_f32(a, b, v, 0);
105 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
106 // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
107 // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
108 }
109
test_vmls_laneq_f32_0(float32x2_t a,float32x2_t b,float32x4_t v)110 float32x2_t test_vmls_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
111 // CHECK-LABEL: test_vmls_laneq_f32_0
112 return vmls_laneq_f32(a, b, v, 0);
113 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
114 // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
115 // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
116 }
117
test_vmlsq_laneq_f32_0(float32x4_t a,float32x4_t b,float32x4_t v)118 float32x4_t test_vmlsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
119 // CHECK-LABEL: test_vmlsq_laneq_f32_0
120 return vmlsq_laneq_f32(a, b, v, 0);
121 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
122 // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
123 // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
124 }
125
test_vmla_lane_f32(float32x2_t a,float32x2_t b,float32x2_t v)126 float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
127 // CHECK-LABEL: test_vmla_lane_f32
128 return vmla_lane_f32(a, b, v, 1);
129 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
130 // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
131 // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
132 }
133
test_vmlaq_lane_f32(float32x4_t a,float32x4_t b,float32x2_t v)134 float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
135 // CHECK-LABEL: test_vmlaq_lane_f32
136 return vmlaq_lane_f32(a, b, v, 1);
137 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
138 // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
139 // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
140 }
141
test_vmla_laneq_f32(float32x2_t a,float32x2_t b,float32x4_t v)142 float32x2_t test_vmla_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
143 // CHECK-LABEL: test_vmla_laneq_f32
144 return vmla_laneq_f32(a, b, v, 3);
145 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
146 // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
147 // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
148 }
149
test_vmlaq_laneq_f32(float32x4_t a,float32x4_t b,float32x4_t v)150 float32x4_t test_vmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
151 // CHECK-LABEL: test_vmlaq_laneq_f32
152 return vmlaq_laneq_f32(a, b, v, 3);
153 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
154 // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
155 // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
156 }
157
test_vmls_lane_f32(float32x2_t a,float32x2_t b,float32x2_t v)158 float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
159 // CHECK-LABEL: test_vmls_lane_f32
160 return vmls_lane_f32(a, b, v, 1);
161 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
162 // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
163 // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
164 }
165
test_vmlsq_lane_f32(float32x4_t a,float32x4_t b,float32x2_t v)166 float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
167 // CHECK-LABEL: test_vmlsq_lane_f32
168 return vmlsq_lane_f32(a, b, v, 1);
169 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
170 // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
171 // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
172 }
test_vmls_laneq_f32(float32x2_t a,float32x2_t b,float32x4_t v)173 float32x2_t test_vmls_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
174 // CHECK-LABEL: test_vmls_laneq_f32
175 return vmls_laneq_f32(a, b, v, 3);
176 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
177 // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
178 // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
179 }
180
test_vmlsq_laneq_f32(float32x4_t a,float32x4_t b,float32x4_t v)181 float32x4_t test_vmlsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
182 // CHECK-LABEL: test_vmlsq_laneq_f32
183 return vmlsq_laneq_f32(a, b, v, 3);
184 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
185 // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
186 // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
187 }
188
test_vfmaq_n_f64(float64x2_t a,float64x2_t b,float64_t c)189 float64x2_t test_vfmaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
190 // CHECK-LABEL: test_vfmaq_n_f64:
191 return vfmaq_n_f64(a, b, c);
192 // CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+\.2d|v[0-9]+\.d\[0\]}}
193 }
194
test_vfmsq_n_f64(float64x2_t a,float64x2_t b,float64_t c)195 float64x2_t test_vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
196 // CHECK-LABEL: test_vfmsq_n_f64:
197 return vfmsq_n_f64(a, b, c);
198 // CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+\.2d|v[0-9]+\.d\[0\]}}
199 }
200