/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 4x8inc-minmax-sse2-dup.c | 87 …const __m128 va3c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va3), _MM_SHUFFLE(0, 0… in xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup() local 95 vacc3x0123 = _mm_add_ps(vacc3x0123, _mm_mul_ps(va3c0000, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup() 99 vacc3x4567 = _mm_add_ps(vacc3x4567, _mm_mul_ps(va3c0000, vb4567c0)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup()
|
D | 4x8inc-minmax-sse-dup.c | 87 const __m128 va3c0000 = _mm_shuffle_ps(va3, va3, _MM_SHUFFLE(0, 0, 0, 0)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup() local 95 vacc3x0123 = _mm_add_ps(vacc3x0123, _mm_mul_ps(va3c0000, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup() 99 vacc3x4567 = _mm_add_ps(vacc3x4567, _mm_mul_ps(va3c0000, vb4567c0)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup()
|
D | 5x8inc-minmax-sse-dup.c | 97 const __m128 va3c0000 = _mm_shuffle_ps(va3, va3, _MM_SHUFFLE(0, 0, 0, 0)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup() local 106 vacc3x0123 = _mm_add_ps(vacc3x0123, _mm_mul_ps(va3c0000, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup() 111 vacc3x4567 = _mm_add_ps(vacc3x4567, _mm_mul_ps(va3c0000, vb4567c0)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup()
|
D | 5x8inc-minmax-sse2-dup.c | 97 …const __m128 va3c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va3), _MM_SHUFFLE(0, 0… in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup() local 106 vacc3x0123 = _mm_add_ps(vacc3x0123, _mm_mul_ps(va3c0000, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup() 111 vacc3x4567 = _mm_add_ps(vacc3x4567, _mm_mul_ps(va3c0000, vb4567c0)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup()
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 4x8-minmax-sse2-dup.c | 85 …const __m128 va3c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va3), _MM_SHUFFLE(0, 0… in xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup() local 93 vacc3x0123 = _mm_add_ps(vacc3x0123, _mm_mul_ps(va3c0000, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup() 97 vacc3x4567 = _mm_add_ps(vacc3x4567, _mm_mul_ps(va3c0000, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup()
|
D | 4x8-minmax-sse-dup.c | 85 const __m128 va3c0000 = _mm_shuffle_ps(va3, va3, _MM_SHUFFLE(0, 0, 0, 0)); in xnn_f32_gemm_minmax_ukernel_4x8__sse_dup() local 93 vacc3x0123 = _mm_add_ps(vacc3x0123, _mm_mul_ps(va3c0000, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_4x8__sse_dup() 97 vacc3x4567 = _mm_add_ps(vacc3x4567, _mm_mul_ps(va3c0000, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_4x8__sse_dup()
|
D | 5x8-minmax-sse-dup.c | 95 const __m128 va3c0000 = _mm_shuffle_ps(va3, va3, _MM_SHUFFLE(0, 0, 0, 0)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup() local 104 vacc3x0123 = _mm_add_ps(vacc3x0123, _mm_mul_ps(va3c0000, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup() 109 vacc3x4567 = _mm_add_ps(vacc3x4567, _mm_mul_ps(va3c0000, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup()
|
D | 5x8-minmax-sse2-dup.c | 95 …const __m128 va3c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va3), _MM_SHUFFLE(0, 0… in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup() local 104 vacc3x0123 = _mm_add_ps(vacc3x0123, _mm_mul_ps(va3c0000, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup() 109 vacc3x4567 = _mm_add_ps(vacc3x4567, _mm_mul_ps(va3c0000, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup()
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 4x8-minmax-sse-dup.c | 107 const __m128 va3c0000 = _mm_shuffle_ps(va3, va3, _MM_SHUFFLE(0, 0, 0, 0)); in xnn_f32_igemm_minmax_ukernel_4x8__sse_dup() local 115 vacc3x0123 = _mm_add_ps(vacc3x0123, _mm_mul_ps(va3c0000, vb0123c0)); in xnn_f32_igemm_minmax_ukernel_4x8__sse_dup() 119 vacc3x4567 = _mm_add_ps(vacc3x4567, _mm_mul_ps(va3c0000, vb4567c0)); in xnn_f32_igemm_minmax_ukernel_4x8__sse_dup()
|
D | 4x8-minmax-sse2-dup.c | 107 …const __m128 va3c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va3), _MM_SHUFFLE(0, 0… in xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup() local 115 vacc3x0123 = _mm_add_ps(vacc3x0123, _mm_mul_ps(va3c0000, vb0123c0)); in xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup() 119 vacc3x4567 = _mm_add_ps(vacc3x4567, _mm_mul_ps(va3c0000, vb4567c0)); in xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup()
|
D | 5x8-minmax-sse2-dup.c | 120 …const __m128 va3c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va3), _MM_SHUFFLE(0, 0… in xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup() local 129 vacc3x0123 = _mm_add_ps(vacc3x0123, _mm_mul_ps(va3c0000, vb0123c0)); in xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup() 134 vacc3x4567 = _mm_add_ps(vacc3x4567, _mm_mul_ps(va3c0000, vb4567c0)); in xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup()
|
D | 5x8-minmax-sse-dup.c | 120 const __m128 va3c0000 = _mm_shuffle_ps(va3, va3, _MM_SHUFFLE(0, 0, 0, 0)); in xnn_f32_igemm_minmax_ukernel_5x8__sse_dup() local 129 vacc3x0123 = _mm_add_ps(vacc3x0123, _mm_mul_ps(va3c0000, vb0123c0)); in xnn_f32_igemm_minmax_ukernel_5x8__sse_dup() 134 vacc3x4567 = _mm_add_ps(vacc3x4567, _mm_mul_ps(va3c0000, vb4567c0)); in xnn_f32_igemm_minmax_ukernel_5x8__sse_dup()
|