/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 3x8inc-minmax-sse-dup.c | 117 const __m128 va1c3333 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup() local 124 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c3333, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup() 127 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c3333, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup()
|
D | 3x8inc-minmax-sse2-dup.c | 117 const __m128 va1c3333 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup() local 124 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c3333, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup() 127 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c3333, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup()
|
D | 4x8inc-minmax-sse2-dup.c | 136 const __m128 va1c3333 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup() local 144 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c3333, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup() 148 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c3333, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup()
|
D | 4x8inc-minmax-sse-dup.c | 136 const __m128 va1c3333 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup() local 144 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c3333, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup() 148 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c3333, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup()
|
D | 5x8inc-minmax-sse-dup.c | 155 const __m128 va1c3333 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup() local 164 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c3333, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup() 169 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c3333, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup()
|
D | 5x8inc-minmax-sse2-dup.c | 155 const __m128 va1c3333 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup() local 164 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c3333, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup() 169 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c3333, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup()
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 3x8-minmax-sse2-dup.c | 115 const __m128 va1c3333 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup() local 122 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c3333, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup() 125 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c3333, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup()
|
D | 3x8-minmax-sse-dup.c | 115 const __m128 va1c3333 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemm_minmax_ukernel_3x8__sse_dup() local 122 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c3333, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_3x8__sse_dup() 125 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c3333, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_3x8__sse_dup()
|
D | 4x8-minmax-sse2-dup.c | 134 const __m128 va1c3333 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup() local 142 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c3333, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup() 146 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c3333, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup()
|
D | 4x8-minmax-sse-dup.c | 134 const __m128 va1c3333 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemm_minmax_ukernel_4x8__sse_dup() local 142 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c3333, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_4x8__sse_dup() 146 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c3333, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_4x8__sse_dup()
|
D | 5x8-minmax-sse-dup.c | 153 const __m128 va1c3333 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup() local 162 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c3333, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup() 167 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c3333, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup()
|
D | 5x8-minmax-sse2-dup.c | 153 const __m128 va1c3333 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup() local 162 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c3333, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup() 167 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c3333, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup()
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 3x8-minmax-sse-dup.c | 134 const __m128 va1c3333 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_igemm_minmax_ukernel_3x8__sse_dup() local 141 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c3333, vb0123c3)); in xnn_f32_igemm_minmax_ukernel_3x8__sse_dup() 144 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c3333, vb4567c3)); in xnn_f32_igemm_minmax_ukernel_3x8__sse_dup()
|
D | 3x8-minmax-sse2-dup.c | 134 const __m128 va1c3333 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup() local 141 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c3333, vb0123c3)); in xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup() 144 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c3333, vb4567c3)); in xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup()
|
D | 4x8-minmax-sse-dup.c | 156 const __m128 va1c3333 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_igemm_minmax_ukernel_4x8__sse_dup() local 164 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c3333, vb0123c3)); in xnn_f32_igemm_minmax_ukernel_4x8__sse_dup() 168 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c3333, vb4567c3)); in xnn_f32_igemm_minmax_ukernel_4x8__sse_dup()
|
D | 4x8-minmax-sse2-dup.c | 156 const __m128 va1c3333 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup() local 164 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c3333, vb0123c3)); in xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup() 168 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c3333, vb4567c3)); in xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup()
|
D | 5x8-minmax-sse2-dup.c | 178 const __m128 va1c3333 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup() local 187 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c3333, vb0123c3)); in xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup() 192 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c3333, vb4567c3)); in xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup()
|
D | 5x8-minmax-sse-dup.c | 178 const __m128 va1c3333 = _mm_shuffle_ps(va1, va1, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_igemm_minmax_ukernel_5x8__sse_dup() local 187 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c3333, vb0123c3)); in xnn_f32_igemm_minmax_ukernel_5x8__sse_dup() 192 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1c3333, vb4567c3)); in xnn_f32_igemm_minmax_ukernel_5x8__sse_dup()
|