/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 3x8inc-minmax-sse-dup.c | 103 const __m128 va1c2222 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(2, 2, 2, 2)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup() local 110 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c2222, vb0123c2)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup() 113 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c2222, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup()
|
D | 3x8inc-minmax-sse2-dup.c | 103 …const __m128 va1c2222 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va1), _MM_SHUFFLE(2, 2… in xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup() local 110 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c2222, vb0123c2)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup() 113 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c2222, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup()
|
D | 4x8inc-minmax-sse2-dup.c | 119 …const __m128 va1c2222 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va1), _MM_SHUFFLE(2, 2… in xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup() local 127 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c2222, vb0123c2)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup() 131 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c2222, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup()
|
D | 4x8inc-minmax-sse-dup.c | 119 const __m128 va1c2222 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(2, 2, 2, 2)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup() local 127 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c2222, vb0123c2)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup() 131 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c2222, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup()
|
D | 5x8inc-minmax-sse-dup.c | 135 const __m128 va1c2222 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(2, 2, 2, 2)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup() local 144 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c2222, vb0123c2)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup() 149 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c2222, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup()
|
D | 5x8inc-minmax-sse2-dup.c | 135 …const __m128 va1c2222 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va1), _MM_SHUFFLE(2, 2… in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup() local 144 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c2222, vb0123c2)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup() 149 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c2222, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup()
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 3x8-minmax-sse2-dup.c | 101 …const __m128 va1c2222 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va1), _MM_SHUFFLE(2, 2… in xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup() local 108 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c2222, vb0123c2)); in xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup() 111 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c2222, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup()
|
D | 3x8-minmax-sse-dup.c | 101 const __m128 va1c2222 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(2, 2, 2, 2)); in xnn_f32_gemm_minmax_ukernel_3x8__sse_dup() local 108 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c2222, vb0123c2)); in xnn_f32_gemm_minmax_ukernel_3x8__sse_dup() 111 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c2222, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_3x8__sse_dup()
|
D | 4x8-minmax-sse2-dup.c | 117 …const __m128 va1c2222 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va1), _MM_SHUFFLE(2, 2… in xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup() local 125 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c2222, vb0123c2)); in xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup() 129 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c2222, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup()
|
D | 4x8-minmax-sse-dup.c | 117 const __m128 va1c2222 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(2, 2, 2, 2)); in xnn_f32_gemm_minmax_ukernel_4x8__sse_dup() local 125 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c2222, vb0123c2)); in xnn_f32_gemm_minmax_ukernel_4x8__sse_dup() 129 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c2222, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_4x8__sse_dup()
|
D | 5x8-minmax-sse-dup.c | 133 const __m128 va1c2222 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(2, 2, 2, 2)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup() local 142 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c2222, vb0123c2)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup() 147 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c2222, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup()
|
D | 5x8-minmax-sse2-dup.c | 133 …const __m128 va1c2222 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va1), _MM_SHUFFLE(2, 2… in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup() local 142 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c2222, vb0123c2)); in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup() 147 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c2222, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup()
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 3x8-minmax-sse-dup.c | 120 const __m128 va1c2222 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(2, 2, 2, 2)); in xnn_f32_igemm_minmax_ukernel_3x8__sse_dup() local 127 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c2222, vb0123c2)); in xnn_f32_igemm_minmax_ukernel_3x8__sse_dup() 130 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c2222, vb4567c2)); in xnn_f32_igemm_minmax_ukernel_3x8__sse_dup()
|
D | 3x8-minmax-sse2-dup.c | 120 …const __m128 va1c2222 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va1), _MM_SHUFFLE(2, 2… in xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup() local 127 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c2222, vb0123c2)); in xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup() 130 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c2222, vb4567c2)); in xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup()
|
D | 4x8-minmax-sse-dup.c | 139 const __m128 va1c2222 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(2, 2, 2, 2)); in xnn_f32_igemm_minmax_ukernel_4x8__sse_dup() local 147 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c2222, vb0123c2)); in xnn_f32_igemm_minmax_ukernel_4x8__sse_dup() 151 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c2222, vb4567c2)); in xnn_f32_igemm_minmax_ukernel_4x8__sse_dup()
|
D | 4x8-minmax-sse2-dup.c | 139 …const __m128 va1c2222 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va1), _MM_SHUFFLE(2, 2… in xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup() local 147 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c2222, vb0123c2)); in xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup() 151 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c2222, vb4567c2)); in xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup()
|
D | 5x8-minmax-sse2-dup.c | 158 …const __m128 va1c2222 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va1), _MM_SHUFFLE(2, 2… in xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup() local 167 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c2222, vb0123c2)); in xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup() 172 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c2222, vb4567c2)); in xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup()
|
D | 5x8-minmax-sse-dup.c | 158 const __m128 va1c2222 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(2, 2, 2, 2)); in xnn_f32_igemm_minmax_ukernel_5x8__sse_dup() local 167 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c2222, vb0123c2)); in xnn_f32_igemm_minmax_ukernel_5x8__sse_dup() 172 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c2222, vb4567c2)); in xnn_f32_igemm_minmax_ukernel_5x8__sse_dup()
|