/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 3x8inc-minmax-sse-dup.c | 76 const __m128 va2c0000 = _mm_shuffle_ps(va2, va2, _MM_SHUFFLE(0, 0, 0, 0)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup() local 83 vacc2x0123 = _mm_add_ps(vacc2x0123, _mm_mul_ps(va2c0000, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup() 86 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2c0000, vb4567c0)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup()
|
D | 3x8inc-minmax-sse2-dup.c | 76 …const __m128 va2c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va2), _MM_SHUFFLE(0, 0… in xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup() local 83 vacc2x0123 = _mm_add_ps(vacc2x0123, _mm_mul_ps(va2c0000, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup() 86 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2c0000, vb4567c0)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup()
|
D | 4x8inc-minmax-sse2-dup.c | 86 …const __m128 va2c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va2), _MM_SHUFFLE(0, 0… in xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup() local 94 vacc2x0123 = _mm_add_ps(vacc2x0123, _mm_mul_ps(va2c0000, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup() 98 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2c0000, vb4567c0)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup()
|
D | 4x8inc-minmax-sse-dup.c | 86 const __m128 va2c0000 = _mm_shuffle_ps(va2, va2, _MM_SHUFFLE(0, 0, 0, 0)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup() local 94 vacc2x0123 = _mm_add_ps(vacc2x0123, _mm_mul_ps(va2c0000, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup() 98 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2c0000, vb4567c0)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup()
|
D | 5x8inc-minmax-sse-dup.c | 96 const __m128 va2c0000 = _mm_shuffle_ps(va2, va2, _MM_SHUFFLE(0, 0, 0, 0)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup() local 105 vacc2x0123 = _mm_add_ps(vacc2x0123, _mm_mul_ps(va2c0000, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup() 110 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2c0000, vb4567c0)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup()
|
D | 5x8inc-minmax-sse2-dup.c | 96 …const __m128 va2c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va2), _MM_SHUFFLE(0, 0… in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup() local 105 vacc2x0123 = _mm_add_ps(vacc2x0123, _mm_mul_ps(va2c0000, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup() 110 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2c0000, vb4567c0)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup()
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 3x8-minmax-sse2-dup.c | 74 …const __m128 va2c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va2), _MM_SHUFFLE(0, 0… in xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup() local 81 vacc2x0123 = _mm_add_ps(vacc2x0123, _mm_mul_ps(va2c0000, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup() 84 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2c0000, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup()
|
D | 3x8-minmax-sse-dup.c | 74 const __m128 va2c0000 = _mm_shuffle_ps(va2, va2, _MM_SHUFFLE(0, 0, 0, 0)); in xnn_f32_gemm_minmax_ukernel_3x8__sse_dup() local 81 vacc2x0123 = _mm_add_ps(vacc2x0123, _mm_mul_ps(va2c0000, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_3x8__sse_dup() 84 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2c0000, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_3x8__sse_dup()
|
D | 4x8-minmax-sse2-dup.c | 84 …const __m128 va2c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va2), _MM_SHUFFLE(0, 0… in xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup() local 92 vacc2x0123 = _mm_add_ps(vacc2x0123, _mm_mul_ps(va2c0000, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup() 96 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2c0000, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup()
|
D | 4x8-minmax-sse-dup.c | 84 const __m128 va2c0000 = _mm_shuffle_ps(va2, va2, _MM_SHUFFLE(0, 0, 0, 0)); in xnn_f32_gemm_minmax_ukernel_4x8__sse_dup() local 92 vacc2x0123 = _mm_add_ps(vacc2x0123, _mm_mul_ps(va2c0000, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_4x8__sse_dup() 96 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2c0000, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_4x8__sse_dup()
|
D | 5x8-minmax-sse-dup.c | 94 const __m128 va2c0000 = _mm_shuffle_ps(va2, va2, _MM_SHUFFLE(0, 0, 0, 0)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup() local 103 vacc2x0123 = _mm_add_ps(vacc2x0123, _mm_mul_ps(va2c0000, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup() 108 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2c0000, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup()
|
D | 5x8-minmax-sse2-dup.c | 94 …const __m128 va2c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va2), _MM_SHUFFLE(0, 0… in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup() local 103 vacc2x0123 = _mm_add_ps(vacc2x0123, _mm_mul_ps(va2c0000, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup() 108 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2c0000, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup()
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 3x8-minmax-sse-dup.c | 93 const __m128 va2c0000 = _mm_shuffle_ps(va2, va2, _MM_SHUFFLE(0, 0, 0, 0)); in xnn_f32_igemm_minmax_ukernel_3x8__sse_dup() local 100 vacc2x0123 = _mm_add_ps(vacc2x0123, _mm_mul_ps(va2c0000, vb0123c0)); in xnn_f32_igemm_minmax_ukernel_3x8__sse_dup() 103 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2c0000, vb4567c0)); in xnn_f32_igemm_minmax_ukernel_3x8__sse_dup()
|
D | 3x8-minmax-sse2-dup.c | 93 …const __m128 va2c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va2), _MM_SHUFFLE(0, 0… in xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup() local 100 vacc2x0123 = _mm_add_ps(vacc2x0123, _mm_mul_ps(va2c0000, vb0123c0)); in xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup() 103 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2c0000, vb4567c0)); in xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup()
|
D | 4x8-minmax-sse-dup.c | 106 const __m128 va2c0000 = _mm_shuffle_ps(va2, va2, _MM_SHUFFLE(0, 0, 0, 0)); in xnn_f32_igemm_minmax_ukernel_4x8__sse_dup() local 114 vacc2x0123 = _mm_add_ps(vacc2x0123, _mm_mul_ps(va2c0000, vb0123c0)); in xnn_f32_igemm_minmax_ukernel_4x8__sse_dup() 118 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2c0000, vb4567c0)); in xnn_f32_igemm_minmax_ukernel_4x8__sse_dup()
|
D | 4x8-minmax-sse2-dup.c | 106 …const __m128 va2c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va2), _MM_SHUFFLE(0, 0… in xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup() local 114 vacc2x0123 = _mm_add_ps(vacc2x0123, _mm_mul_ps(va2c0000, vb0123c0)); in xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup() 118 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2c0000, vb4567c0)); in xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup()
|
D | 5x8-minmax-sse2-dup.c | 119 …const __m128 va2c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va2), _MM_SHUFFLE(0, 0… in xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup() local 128 vacc2x0123 = _mm_add_ps(vacc2x0123, _mm_mul_ps(va2c0000, vb0123c0)); in xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup() 133 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2c0000, vb4567c0)); in xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup()
|
D | 5x8-minmax-sse-dup.c | 119 const __m128 va2c0000 = _mm_shuffle_ps(va2, va2, _MM_SHUFFLE(0, 0, 0, 0)); in xnn_f32_igemm_minmax_ukernel_5x8__sse_dup() local 128 vacc2x0123 = _mm_add_ps(vacc2x0123, _mm_mul_ps(va2c0000, vb0123c0)); in xnn_f32_igemm_minmax_ukernel_5x8__sse_dup() 133 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2c0000, vb4567c0)); in xnn_f32_igemm_minmax_ukernel_5x8__sse_dup()
|