Home
last modified time | relevance | path

Searched refs:vb01234567 (Results 1 – 25 of 124) sorted by relevance

12345

/external/XNNPACK/src/qu8-igemm/
D4x8-minmax-neon.c96 const uint8x8_t vb01234567 = vld1_u8(w); w = (void*) ((uintptr_t) w + sizeof(uint8x8_t)); in xnn_qu8_igemm_minmax_ukernel_4x8__neon() local
97 const int16x8_t vxb01234567 = vreinterpretq_s16_u16(vsubl_u8(vb01234567, vb_zero_point)); in xnn_qu8_igemm_minmax_ukernel_4x8__neon()
110 const uint8x8_t vb01234567 = vld1_u8(w); w = (void*) ((uintptr_t) w + sizeof(uint8x8_t)); in xnn_qu8_igemm_minmax_ukernel_4x8__neon() local
111 const int16x8_t vxb01234567 = vreinterpretq_s16_u16(vsubl_u8(vb01234567, vb_zero_point)); in xnn_qu8_igemm_minmax_ukernel_4x8__neon()
124 const uint8x8_t vb01234567 = vld1_u8(w); w = (void*) ((uintptr_t) w + sizeof(uint8x8_t)); in xnn_qu8_igemm_minmax_ukernel_4x8__neon() local
125 const int16x8_t vxb01234567 = vreinterpretq_s16_u16(vsubl_u8(vb01234567, vb_zero_point)); in xnn_qu8_igemm_minmax_ukernel_4x8__neon()
138 const uint8x8_t vb01234567 = vld1_u8(w); w = (void*) ((uintptr_t) w + sizeof(uint8x8_t)); in xnn_qu8_igemm_minmax_ukernel_4x8__neon() local
139 const int16x8_t vxb01234567 = vreinterpretq_s16_u16(vsubl_u8(vb01234567, vb_zero_point)); in xnn_qu8_igemm_minmax_ukernel_4x8__neon()
152 const uint8x8_t vb01234567 = vld1_u8(w); w = (void*) ((uintptr_t) w + sizeof(uint8x8_t)); in xnn_qu8_igemm_minmax_ukernel_4x8__neon() local
153 const int16x8_t vxb01234567 = vreinterpretq_s16_u16(vsubl_u8(vb01234567, vb_zero_point)); in xnn_qu8_igemm_minmax_ukernel_4x8__neon()
[all …]
D8x8-minmax-neon.c144 const uint8x8_t vb01234567 = vld1_u8(w); w = (void*) ((uintptr_t) w + sizeof(uint8x8_t)); in xnn_qu8_igemm_minmax_ukernel_8x8__neon() local
145 const int16x8_t vxb01234567 = vreinterpretq_s16_u16(vsubl_u8(vb01234567, vb_zero_point)); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
166 const uint8x8_t vb01234567 = vld1_u8(w); w = (void*) ((uintptr_t) w + sizeof(uint8x8_t)); in xnn_qu8_igemm_minmax_ukernel_8x8__neon() local
167 const int16x8_t vxb01234567 = vreinterpretq_s16_u16(vsubl_u8(vb01234567, vb_zero_point)); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
188 const uint8x8_t vb01234567 = vld1_u8(w); w = (void*) ((uintptr_t) w + sizeof(uint8x8_t)); in xnn_qu8_igemm_minmax_ukernel_8x8__neon() local
189 const int16x8_t vxb01234567 = vreinterpretq_s16_u16(vsubl_u8(vb01234567, vb_zero_point)); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
210 const uint8x8_t vb01234567 = vld1_u8(w); w = (void*) ((uintptr_t) w + sizeof(uint8x8_t)); in xnn_qu8_igemm_minmax_ukernel_8x8__neon() local
211 const int16x8_t vxb01234567 = vreinterpretq_s16_u16(vsubl_u8(vb01234567, vb_zero_point)); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
232 const uint8x8_t vb01234567 = vld1_u8(w); w = (void*) ((uintptr_t) w + sizeof(uint8x8_t)); in xnn_qu8_igemm_minmax_ukernel_8x8__neon() local
233 const int16x8_t vxb01234567 = vreinterpretq_s16_u16(vsubl_u8(vb01234567, vb_zero_point)); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
[all …]
/external/XNNPACK/src/f16-vbinary/gen/
Dvmax-neonfp16arith-x16.c38 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vmax_ukernel__neonfp16arith_x16() local
42 float16x8_t vy01234567 = vmaxq_f16(va01234567, vb01234567); in xnn_f16_vmax_ukernel__neonfp16arith_x16()
52 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vmax_ukernel__neonfp16arith_x16() local
54 float16x8_t vy01234567 = vmaxq_f16(va01234567, vb01234567); in xnn_f16_vmax_ukernel__neonfp16arith_x16()
59 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vmax_ukernel__neonfp16arith_x16() local
61 float16x8_t vy01234567 = vmaxq_f16(va01234567, vb01234567); in xnn_f16_vmax_ukernel__neonfp16arith_x16()
Dvmin-neonfp16arith-x16.c38 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vmin_ukernel__neonfp16arith_x16() local
42 float16x8_t vy01234567 = vminq_f16(va01234567, vb01234567); in xnn_f16_vmin_ukernel__neonfp16arith_x16()
52 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vmin_ukernel__neonfp16arith_x16() local
54 float16x8_t vy01234567 = vminq_f16(va01234567, vb01234567); in xnn_f16_vmin_ukernel__neonfp16arith_x16()
59 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vmin_ukernel__neonfp16arith_x16() local
61 float16x8_t vy01234567 = vminq_f16(va01234567, vb01234567); in xnn_f16_vmin_ukernel__neonfp16arith_x16()
Dvsqrdiff-neonfp16arith-x16.c38 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vsqrdiff_ukernel__neonfp16arith_x16() local
42 float16x8_t vy01234567 = vsubq_f16(va01234567, vb01234567); in xnn_f16_vsqrdiff_ukernel__neonfp16arith_x16()
54 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vsqrdiff_ukernel__neonfp16arith_x16() local
56 float16x8_t vy01234567 = vsubq_f16(va01234567, vb01234567); in xnn_f16_vsqrdiff_ukernel__neonfp16arith_x16()
62 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vsqrdiff_ukernel__neonfp16arith_x16() local
64 float16x8_t vy01234567 = vsubq_f16(va01234567, vb01234567); in xnn_f16_vsqrdiff_ukernel__neonfp16arith_x16()
Dvdiv-minmax-neonfp16arith-x16.c40 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vdiv_minmax_ukernel__neonfp16arith_x16() local
44 float16x8_t vy01234567 = vdivq_f16(va01234567, vb01234567); in xnn_f16_vdiv_minmax_ukernel__neonfp16arith_x16()
59 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vdiv_minmax_ukernel__neonfp16arith_x16() local
61 float16x8_t vy01234567 = vdivq_f16(va01234567, vb01234567); in xnn_f16_vdiv_minmax_ukernel__neonfp16arith_x16()
68 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vdiv_minmax_ukernel__neonfp16arith_x16() local
70 float16x8_t vy01234567 = vdivq_f16(va01234567, vb01234567); in xnn_f16_vdiv_minmax_ukernel__neonfp16arith_x16()
Dvsub-minmax-neonfp16arith-x16.c40 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16() local
44 float16x8_t vy01234567 = vsubq_f16(va01234567, vb01234567); in xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16()
59 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16() local
61 float16x8_t vy01234567 = vsubq_f16(va01234567, vb01234567); in xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16()
68 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16() local
70 float16x8_t vy01234567 = vsubq_f16(va01234567, vb01234567); in xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16()
Dvmul-minmax-neonfp16arith-x16.c40 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vmul_minmax_ukernel__neonfp16arith_x16() local
44 float16x8_t vy01234567 = vmulq_f16(va01234567, vb01234567); in xnn_f16_vmul_minmax_ukernel__neonfp16arith_x16()
59 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vmul_minmax_ukernel__neonfp16arith_x16() local
61 float16x8_t vy01234567 = vmulq_f16(va01234567, vb01234567); in xnn_f16_vmul_minmax_ukernel__neonfp16arith_x16()
68 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vmul_minmax_ukernel__neonfp16arith_x16() local
70 float16x8_t vy01234567 = vmulq_f16(va01234567, vb01234567); in xnn_f16_vmul_minmax_ukernel__neonfp16arith_x16()
Dvadd-minmax-neonfp16arith-x16.c40 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vadd_minmax_ukernel__neonfp16arith_x16() local
44 float16x8_t vy01234567 = vaddq_f16(va01234567, vb01234567); in xnn_f16_vadd_minmax_ukernel__neonfp16arith_x16()
59 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vadd_minmax_ukernel__neonfp16arith_x16() local
61 float16x8_t vy01234567 = vaddq_f16(va01234567, vb01234567); in xnn_f16_vadd_minmax_ukernel__neonfp16arith_x16()
68 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vadd_minmax_ukernel__neonfp16arith_x16() local
70 float16x8_t vy01234567 = vaddq_f16(va01234567, vb01234567); in xnn_f16_vadd_minmax_ukernel__neonfp16arith_x16()
Dvmin-neonfp16arith-x8.c38 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vmin_ukernel__neonfp16arith_x8() local
40 float16x8_t vy01234567 = vminq_f16(va01234567, vb01234567); in xnn_f16_vmin_ukernel__neonfp16arith_x8()
48 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vmin_ukernel__neonfp16arith_x8() local
50 float16x8_t vy01234567 = vminq_f16(va01234567, vb01234567); in xnn_f16_vmin_ukernel__neonfp16arith_x8()
Dvmax-neonfp16arith-x8.c38 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vmax_ukernel__neonfp16arith_x8() local
40 float16x8_t vy01234567 = vmaxq_f16(va01234567, vb01234567); in xnn_f16_vmax_ukernel__neonfp16arith_x8()
48 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vmax_ukernel__neonfp16arith_x8() local
50 float16x8_t vy01234567 = vmaxq_f16(va01234567, vb01234567); in xnn_f16_vmax_ukernel__neonfp16arith_x8()
Dvsqrdiff-neonfp16arith-x8.c38 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vsqrdiff_ukernel__neonfp16arith_x8() local
40 float16x8_t vy01234567 = vsubq_f16(va01234567, vb01234567); in xnn_f16_vsqrdiff_ukernel__neonfp16arith_x8()
49 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vsqrdiff_ukernel__neonfp16arith_x8() local
51 float16x8_t vy01234567 = vsubq_f16(va01234567, vb01234567); in xnn_f16_vsqrdiff_ukernel__neonfp16arith_x8()
Dvdiv-minmax-neonfp16arith-x8.c40 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vdiv_minmax_ukernel__neonfp16arith_x8() local
42 float16x8_t vy01234567 = vdivq_f16(va01234567, vb01234567); in xnn_f16_vdiv_minmax_ukernel__neonfp16arith_x8()
53 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vdiv_minmax_ukernel__neonfp16arith_x8() local
55 float16x8_t vy01234567 = vdivq_f16(va01234567, vb01234567); in xnn_f16_vdiv_minmax_ukernel__neonfp16arith_x8()
Dvadd-minmax-neonfp16arith-x8.c40 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vadd_minmax_ukernel__neonfp16arith_x8() local
42 float16x8_t vy01234567 = vaddq_f16(va01234567, vb01234567); in xnn_f16_vadd_minmax_ukernel__neonfp16arith_x8()
53 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vadd_minmax_ukernel__neonfp16arith_x8() local
55 float16x8_t vy01234567 = vaddq_f16(va01234567, vb01234567); in xnn_f16_vadd_minmax_ukernel__neonfp16arith_x8()
Dvsub-minmax-neonfp16arith-x8.c40 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8() local
42 float16x8_t vy01234567 = vsubq_f16(va01234567, vb01234567); in xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8()
53 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8() local
55 float16x8_t vy01234567 = vsubq_f16(va01234567, vb01234567); in xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8()
Dvmul-minmax-neonfp16arith-x8.c40 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vmul_minmax_ukernel__neonfp16arith_x8() local
42 float16x8_t vy01234567 = vmulq_f16(va01234567, vb01234567); in xnn_f16_vmul_minmax_ukernel__neonfp16arith_x8()
53 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vmul_minmax_ukernel__neonfp16arith_x8() local
55 float16x8_t vy01234567 = vmulq_f16(va01234567, vb01234567); in xnn_f16_vmul_minmax_ukernel__neonfp16arith_x8()
/external/XNNPACK/src/f32-gemm/gen-inc/
D8x8inc-minmax-fma3-broadcast.c115 const __m256 vb01234567 = _mm256_load_ps(w); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast() local
118 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast()
119 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567, vacc1x01234567); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast()
120 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast()
121 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567, vacc3x01234567); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast()
122 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567, vacc4x01234567); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast()
123 vacc5x01234567 = _mm256_fmadd_ps(va5, vb01234567, vacc5x01234567); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast()
124 vacc6x01234567 = _mm256_fmadd_ps(va6, vb01234567, vacc6x01234567); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast()
125 vacc7x01234567 = _mm256_fmadd_ps(va7, vb01234567, vacc7x01234567); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast()
D7x8inc-minmax-fma3-broadcast.c106 const __m256 vb01234567 = _mm256_load_ps(w); in xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast() local
109 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast()
110 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567, vacc1x01234567); in xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast()
111 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast()
112 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567, vacc3x01234567); in xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast()
113 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567, vacc4x01234567); in xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast()
114 vacc5x01234567 = _mm256_fmadd_ps(va5, vb01234567, vacc5x01234567); in xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast()
115 vacc6x01234567 = _mm256_fmadd_ps(va6, vb01234567, vacc6x01234567); in xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast()
D7x8inc-minmax-avx-broadcast.c106 const __m256 vb01234567 = _mm256_load_ps(w); in xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast() local
109 vacc0x01234567 = _mm256_add_ps(vacc0x01234567, _mm256_mul_ps(va0, vb01234567)); in xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast()
110 vacc1x01234567 = _mm256_add_ps(vacc1x01234567, _mm256_mul_ps(va1, vb01234567)); in xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast()
111 vacc2x01234567 = _mm256_add_ps(vacc2x01234567, _mm256_mul_ps(va2, vb01234567)); in xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast()
112 vacc3x01234567 = _mm256_add_ps(vacc3x01234567, _mm256_mul_ps(va3, vb01234567)); in xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast()
113 vacc4x01234567 = _mm256_add_ps(vacc4x01234567, _mm256_mul_ps(va4, vb01234567)); in xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast()
114 vacc5x01234567 = _mm256_add_ps(vacc5x01234567, _mm256_mul_ps(va5, vb01234567)); in xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast()
115 vacc6x01234567 = _mm256_add_ps(vacc6x01234567, _mm256_mul_ps(va6, vb01234567)); in xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast()
/external/XNNPACK/src/f32-igemm/gen/
D8x8-minmax-fma3-broadcast.c130 const __m256 vb01234567 = _mm256_load_ps(w); in xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast() local
150 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast()
151 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567, vacc1x01234567); in xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast()
152 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast()
153 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567, vacc3x01234567); in xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast()
154 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567, vacc4x01234567); in xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast()
155 vacc5x01234567 = _mm256_fmadd_ps(va5, vb01234567, vacc5x01234567); in xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast()
156 vacc6x01234567 = _mm256_fmadd_ps(va6, vb01234567, vacc6x01234567); in xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast()
157 vacc7x01234567 = _mm256_fmadd_ps(va7, vb01234567, vacc7x01234567); in xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast()
D7x8-minmax-avx-broadcast.c120 const __m256 vb01234567 = _mm256_load_ps(w); in xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast() local
138 vacc0x01234567 = _mm256_add_ps(vacc0x01234567, _mm256_mul_ps(va0, vb01234567)); in xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast()
139 vacc1x01234567 = _mm256_add_ps(vacc1x01234567, _mm256_mul_ps(va1, vb01234567)); in xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast()
140 vacc2x01234567 = _mm256_add_ps(vacc2x01234567, _mm256_mul_ps(va2, vb01234567)); in xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast()
141 vacc3x01234567 = _mm256_add_ps(vacc3x01234567, _mm256_mul_ps(va3, vb01234567)); in xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast()
142 vacc4x01234567 = _mm256_add_ps(vacc4x01234567, _mm256_mul_ps(va4, vb01234567)); in xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast()
143 vacc5x01234567 = _mm256_add_ps(vacc5x01234567, _mm256_mul_ps(va5, vb01234567)); in xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast()
144 vacc6x01234567 = _mm256_add_ps(vacc6x01234567, _mm256_mul_ps(va6, vb01234567)); in xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast()
D7x8-minmax-fma3-broadcast.c120 const __m256 vb01234567 = _mm256_load_ps(w); in xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast() local
138 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast()
139 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567, vacc1x01234567); in xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast()
140 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast()
141 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567, vacc3x01234567); in xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast()
142 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567, vacc4x01234567); in xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast()
143 vacc5x01234567 = _mm256_fmadd_ps(va5, vb01234567, vacc5x01234567); in xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast()
144 vacc6x01234567 = _mm256_fmadd_ps(va6, vb01234567, vacc6x01234567); in xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast()
/external/XNNPACK/src/f32-gemm/gen/
D8x8-minmax-fma3-broadcast.c113 const __m256 vb01234567 = _mm256_load_ps(w); in xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast() local
116 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast()
117 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567, vacc1x01234567); in xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast()
118 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast()
119 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567, vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast()
120 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567, vacc4x01234567); in xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast()
121 vacc5x01234567 = _mm256_fmadd_ps(va5, vb01234567, vacc5x01234567); in xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast()
122 vacc6x01234567 = _mm256_fmadd_ps(va6, vb01234567, vacc6x01234567); in xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast()
123 vacc7x01234567 = _mm256_fmadd_ps(va7, vb01234567, vacc7x01234567); in xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast()
D7x8-minmax-avx-broadcast.c104 const __m256 vb01234567 = _mm256_load_ps(w); in xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast() local
107 vacc0x01234567 = _mm256_add_ps(vacc0x01234567, _mm256_mul_ps(va0, vb01234567)); in xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast()
108 vacc1x01234567 = _mm256_add_ps(vacc1x01234567, _mm256_mul_ps(va1, vb01234567)); in xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast()
109 vacc2x01234567 = _mm256_add_ps(vacc2x01234567, _mm256_mul_ps(va2, vb01234567)); in xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast()
110 vacc3x01234567 = _mm256_add_ps(vacc3x01234567, _mm256_mul_ps(va3, vb01234567)); in xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast()
111 vacc4x01234567 = _mm256_add_ps(vacc4x01234567, _mm256_mul_ps(va4, vb01234567)); in xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast()
112 vacc5x01234567 = _mm256_add_ps(vacc5x01234567, _mm256_mul_ps(va5, vb01234567)); in xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast()
113 vacc6x01234567 = _mm256_add_ps(vacc6x01234567, _mm256_mul_ps(va6, vb01234567)); in xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast()
D7x8-minmax-fma3-broadcast.c104 const __m256 vb01234567 = _mm256_load_ps(w); in xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast() local
107 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast()
108 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567, vacc1x01234567); in xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast()
109 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast()
110 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567, vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast()
111 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567, vacc4x01234567); in xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast()
112 vacc5x01234567 = _mm256_fmadd_ps(va5, vb01234567, vacc5x01234567); in xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast()
113 vacc6x01234567 = _mm256_fmadd_ps(va6, vb01234567, vacc6x01234567); in xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast()

12345