Lines Matching full:fast
4 …UN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3,fast-hops | FileCheck %s --check-prefix=SSSE3-F…
6 … RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,fast-hops | FileCheck %s --check-prefix=AVX1-…
32 ; SSSE3-FAST-LABEL: PR37890_v4f32:
33 ; SSSE3-FAST: # %bb.0:
34 ; SSSE3-FAST-NEXT: haddps %xmm0, %xmm0
35 ; SSSE3-FAST-NEXT: haddps %xmm0, %xmm0
36 ; SSSE3-FAST-NEXT: retq
46 ; AVX1-FAST-LABEL: PR37890_v4f32:
47 ; AVX1-FAST: # %bb.0:
48 ; AVX1-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0
49 ; AVX1-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0
50 ; AVX1-FAST-NEXT: retq
61 %sum0 = fadd fast <2 x float> %lo0, %hi0
63 %sum1 = fadd fast <2 x float> %sum0, %hi1
87 ; SSSE3-FAST-LABEL: PR37890_v4f64:
88 ; SSSE3-FAST: # %bb.0:
89 ; SSSE3-FAST-NEXT: addpd %xmm1, %xmm0
90 ; SSSE3-FAST-NEXT: haddpd %xmm0, %xmm0
91 ; SSSE3-FAST-NEXT: retq
102 ; AVX1-FAST-LABEL: PR37890_v4f64:
103 ; AVX1-FAST: # %bb.0:
104 ; AVX1-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1
105 ; AVX1-FAST-NEXT: vhaddpd %xmm0, %xmm1, %xmm0
106 ; AVX1-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
107 ; AVX1-FAST-NEXT: vzeroupper
108 ; AVX1-FAST-NEXT: retq
120 %sum0 = fadd fast <2 x double> %lo0, %hi0
122 %sum1 = fadd fast <2 x double> %sum0, %hi1
150 ; SSSE3-FAST-LABEL: PR37890_v8f32:
151 ; SSSE3-FAST: # %bb.0:
152 ; SSSE3-FAST-NEXT: addps %xmm1, %xmm0
153 ; SSSE3-FAST-NEXT: haddps %xmm0, %xmm0
154 ; SSSE3-FAST-NEXT: haddps %xmm0, %xmm0
155 ; SSSE3-FAST-NEXT: retq
168 ; AVX1-FAST-LABEL: PR37890_v8f32:
169 ; AVX1-FAST: # %bb.0:
170 ; AVX1-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1
171 ; AVX1-FAST-NEXT: vhaddps %xmm0, %xmm1, %xmm0
172 ; AVX1-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0
173 ; AVX1-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0
174 ; AVX1-FAST-NEXT: vzeroupper
175 ; AVX1-FAST-NEXT: retq
189 %sum0 = fadd fast <4 x float> %lo0, %hi0
192 %sum1 = fadd fast <2 x float> %lo1, %hi1
194 %sum2 = fadd fast <2 x float> %sum1, %hi2
220 ; SSSE3-FAST-LABEL: PR37890_v8f64:
221 ; SSSE3-FAST: # %bb.0:
222 ; SSSE3-FAST-NEXT: addpd %xmm3, %xmm1
223 ; SSSE3-FAST-NEXT: addpd %xmm2, %xmm1
224 ; SSSE3-FAST-NEXT: addpd %xmm0, %xmm1
225 ; SSSE3-FAST-NEXT: haddpd %xmm1, %xmm1
226 ; SSSE3-FAST-NEXT: movapd %xmm1, %xmm0
227 ; SSSE3-FAST-NEXT: retq
239 ; AVX1-FAST-LABEL: PR37890_v8f64:
240 ; AVX1-FAST: # %bb.0:
241 ; AVX1-FAST-NEXT: vaddpd %ymm1, %ymm0, %ymm0
242 ; AVX1-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1
243 ; AVX1-FAST-NEXT: vhaddpd %xmm0, %xmm1, %xmm0
244 ; AVX1-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
245 ; AVX1-FAST-NEXT: vzeroupper
246 ; AVX1-FAST-NEXT: retq
259 %sum0 = fadd fast <4 x double> %lo0, %hi0
262 %sum1 = fadd fast <2 x double> %lo1, %hi1
264 %sum2 = fadd fast <2 x double> %sum1, %hi2
295 ; SSSE3-FAST-LABEL: PR37890_v16f32:
296 ; SSSE3-FAST: # %bb.0:
297 ; SSSE3-FAST-NEXT: addps %xmm3, %xmm1
298 ; SSSE3-FAST-NEXT: addps %xmm2, %xmm1
299 ; SSSE3-FAST-NEXT: addps %xmm0, %xmm1
300 ; SSSE3-FAST-NEXT: movaps %xmm1, %xmm0
301 ; SSSE3-FAST-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
302 ; SSSE3-FAST-NEXT: addps %xmm1, %xmm0
303 ; SSSE3-FAST-NEXT: haddps %xmm0, %xmm0
304 ; SSSE3-FAST-NEXT: retq
318 ; AVX1-FAST-LABEL: PR37890_v16f32:
319 ; AVX1-FAST: # %bb.0:
320 ; AVX1-FAST-NEXT: vaddps %ymm1, %ymm0, %ymm0
321 ; AVX1-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1
322 ; AVX1-FAST-NEXT: vhaddps %xmm0, %xmm1, %xmm0
323 ; AVX1-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0
324 ; AVX1-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0
325 ; AVX1-FAST-NEXT: vzeroupper
326 ; AVX1-FAST-NEXT: retq
341 %sum0 = fadd fast <8 x float> %lo0, %hi0
344 %sum1 = fadd fast <4 x float> %lo1, %hi1
347 %sum2 = fadd fast <2 x float> %lo2, %hi2
349 %sum3 = fadd fast <2 x float> %sum2, %hi3