1 // REQUIRES: aarch64-registered-target
2 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -S -O3 -o - %s | FileCheck %s
3 
4 // Test new aarch64 intrinsics and types
5 
6 #include <arm_neon.h>
7 
test_vmla_n_f32(float32x2_t a,float32x2_t b,float32_t c)8 float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
9   // CHECK-LABEL: test_vmla_n_f32
10   return vmla_n_f32(a, b, c);
11   // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
12   // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
13   // CHECK-FMA: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
14   // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
15 }
16 
test_vmlaq_n_f32(float32x4_t a,float32x4_t b,float32_t c)17 float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
18   // CHECK-LABEL: test_vmlaq_n_f32
19   return vmlaq_n_f32(a, b, c);
20   // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
21   // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
22   // CHECK-FMA: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
23   // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
24 }
25 
test_vmlaq_n_f64(float64x2_t a,float64x2_t b,float64_t c)26 float64x2_t test_vmlaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
27   // CHECK-LABEL: test_vmlaq_n_f64
28   return vmlaq_n_f64(a, b, c);
29   // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
30   // CHECK: fadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
31   // CHECK-FMA: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
32   // CHECK-FMA: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
33 }
34 
test_vmlsq_n_f32(float32x4_t a,float32x4_t b,float32_t c)35 float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
36   // CHECK-LABEL: test_vmlsq_n_f32
37   return vmlsq_n_f32(a, b, c);
38   // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
39   // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
40   // CHECK-FMA: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
41   // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
42 }
43 
test_vmls_n_f32(float32x2_t a,float32x2_t b,float32_t c)44 float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
45   // CHECK-LABEL: test_vmls_n_f32
46   return vmls_n_f32(a, b, c);
47   // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
48   // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
49   // CHECK-FMA: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
50   // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
51 }
52 
test_vmlsq_n_f64(float64x2_t a,float64x2_t b,float64_t c)53 float64x2_t test_vmlsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
54   // CHECK-LABEL: test_vmlsq_n_f64
55   return vmlsq_n_f64(a, b, c);
56   // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
57   // CHECK: fsub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
58   // CHECK-FMA: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
59   // CHECK-FMA: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
60 }
61 
test_vmla_lane_f32_0(float32x2_t a,float32x2_t b,float32x2_t v)62 float32x2_t test_vmla_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
63   // CHECK-LABEL: test_vmla_lane_f32_0
64   return vmla_lane_f32(a, b, v, 0);
65   // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
66   // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
67   // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
68 }
69 
test_vmlaq_lane_f32_0(float32x4_t a,float32x4_t b,float32x2_t v)70 float32x4_t test_vmlaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
71   // CHECK-LABEL: test_vmlaq_lane_f32_0
72   return vmlaq_lane_f32(a, b, v, 0);
73   // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
74   // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
75   // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
76 }
77 
test_vmla_laneq_f32_0(float32x2_t a,float32x2_t b,float32x4_t v)78 float32x2_t test_vmla_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
79   // CHECK-LABEL: test_vmla_laneq_f32_0
80   return vmla_laneq_f32(a, b, v, 0);
81   // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
82   // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
83   // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
84 }
85 
test_vmlaq_laneq_f32_0(float32x4_t a,float32x4_t b,float32x4_t v)86 float32x4_t test_vmlaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
87   // CHECK-LABEL: test_vmlaq_laneq_f32_0
88   return vmlaq_laneq_f32(a, b, v, 0);
89   // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
90   // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
91   // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
92 }
93 
test_vmls_lane_f32_0(float32x2_t a,float32x2_t b,float32x2_t v)94 float32x2_t test_vmls_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
95   // CHECK-LABEL: test_vmls_lane_f32_0
96   return vmls_lane_f32(a, b, v, 0);
97   // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
98   // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
99   // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
100 }
101 
test_vmlsq_lane_f32_0(float32x4_t a,float32x4_t b,float32x2_t v)102 float32x4_t test_vmlsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
103   // CHECK-LABEL: test_vmlsq_lane_f32_0
104   return vmlsq_lane_f32(a, b, v, 0);
105   // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
106   // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
107   // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
108 }
109 
test_vmls_laneq_f32_0(float32x2_t a,float32x2_t b,float32x4_t v)110 float32x2_t test_vmls_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
111   // CHECK-LABEL: test_vmls_laneq_f32_0
112   return vmls_laneq_f32(a, b, v, 0);
113   // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
114   // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
115   // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
116 }
117 
test_vmlsq_laneq_f32_0(float32x4_t a,float32x4_t b,float32x4_t v)118 float32x4_t test_vmlsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
119   // CHECK-LABEL: test_vmlsq_laneq_f32_0
120   return vmlsq_laneq_f32(a, b, v, 0);
121   // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
122   // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
123   // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
124 }
125 
test_vmla_lane_f32(float32x2_t a,float32x2_t b,float32x2_t v)126 float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
127   // CHECK-LABEL: test_vmla_lane_f32
128   return vmla_lane_f32(a, b, v, 1);
129   // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
130   // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
131   // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
132 }
133 
test_vmlaq_lane_f32(float32x4_t a,float32x4_t b,float32x2_t v)134 float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
135   // CHECK-LABEL: test_vmlaq_lane_f32
136   return vmlaq_lane_f32(a, b, v, 1);
137   // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
138   // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
139   // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
140 }
141 
test_vmla_laneq_f32(float32x2_t a,float32x2_t b,float32x4_t v)142 float32x2_t test_vmla_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
143   // CHECK-LABEL: test_vmla_laneq_f32
144   return vmla_laneq_f32(a, b, v, 3);
145   // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
146   // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
147   // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
148 }
149 
test_vmlaq_laneq_f32(float32x4_t a,float32x4_t b,float32x4_t v)150 float32x4_t test_vmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
151   // CHECK-LABEL: test_vmlaq_laneq_f32
152   return vmlaq_laneq_f32(a, b, v, 3);
153   // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
154   // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
155   // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
156 }
157 
test_vmls_lane_f32(float32x2_t a,float32x2_t b,float32x2_t v)158 float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
159   // CHECK-LABEL: test_vmls_lane_f32
160   return vmls_lane_f32(a, b, v, 1);
161   // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
162   // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
163   // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
164 }
165 
test_vmlsq_lane_f32(float32x4_t a,float32x4_t b,float32x2_t v)166 float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
167   // CHECK-LABEL: test_vmlsq_lane_f32
168   return vmlsq_lane_f32(a, b, v, 1);
169   // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
170   // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
171   // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
172 }
test_vmls_laneq_f32(float32x2_t a,float32x2_t b,float32x4_t v)173 float32x2_t test_vmls_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
174   // CHECK-LABEL: test_vmls_laneq_f32
175   return vmls_laneq_f32(a, b, v, 3);
176   // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
177   // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
178   // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
179 }
180 
test_vmlsq_laneq_f32(float32x4_t a,float32x4_t b,float32x4_t v)181 float32x4_t test_vmlsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
182   // CHECK-LABEL: test_vmlsq_laneq_f32
183   return vmlsq_laneq_f32(a, b, v, 3);
184   // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
185   // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
186   // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
187 }
188 
test_vfmaq_n_f64(float64x2_t a,float64x2_t b,float64_t c)189 float64x2_t test_vfmaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
190   // CHECK-LABEL: test_vfmaq_n_f64:
191   return vfmaq_n_f64(a, b, c);
192   // CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+\.2d|v[0-9]+\.d\[0\]}}
193 }
194 
test_vfmsq_n_f64(float64x2_t a,float64x2_t b,float64_t c)195 float64x2_t test_vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
196   // CHECK-LABEL: test_vfmsq_n_f64:
197   return vfmsq_n_f64(a, b, c);
198   // CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+\.2d|v[0-9]+\.d\[0\]}}
199 }
200