/external/XNNPACK/src/f32-gemm/gen/ |
D | 1x8-minmax-sse-dup.c | 76 const __m128 va0c3333 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemm_minmax_ukernel_1x8__sse_dup() local 81 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c3333, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_1x8__sse_dup() 82 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_1x8__sse_dup()
|
D | 1x8-minmax-sse2-dup.c | 76 const __m128 va0c3333 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup() local 81 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c3333, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup() 82 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup()
|
D | 3x8-minmax-sse2-dup.c | 114 const __m128 va0c3333 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup() local 121 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c3333, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup() 124 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup()
|
D | 3x8-minmax-sse-dup.c | 114 const __m128 va0c3333 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemm_minmax_ukernel_3x8__sse_dup() local 121 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c3333, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_3x8__sse_dup() 124 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_3x8__sse_dup()
|
D | 4x8-minmax-sse2-dup.c | 133 const __m128 va0c3333 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup() local 141 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c3333, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup() 145 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup()
|
D | 4x8-minmax-sse-dup.c | 133 const __m128 va0c3333 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemm_minmax_ukernel_4x8__sse_dup() local 141 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c3333, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_4x8__sse_dup() 145 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_4x8__sse_dup()
|
D | 5x8-minmax-sse-dup.c | 152 const __m128 va0c3333 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup() local 161 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c3333, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup() 166 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup()
|
D | 5x8-minmax-sse2-dup.c | 152 const __m128 va0c3333 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup() local 161 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c3333, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup() 166 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup()
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 1x8inc-minmax-sse-dup.c | 78 const __m128 va0c3333 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup() local 83 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c3333, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup() 84 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup()
|
D | 1x8inc-minmax-sse2-dup.c | 78 const __m128 va0c3333 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup() local 83 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c3333, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup() 84 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup()
|
D | 3x8inc-minmax-sse-dup.c | 116 const __m128 va0c3333 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup() local 123 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c3333, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup() 126 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup()
|
D | 3x8inc-minmax-sse2-dup.c | 116 const __m128 va0c3333 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup() local 123 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c3333, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup() 126 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup()
|
D | 4x8inc-minmax-sse2-dup.c | 135 const __m128 va0c3333 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup() local 143 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c3333, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup() 147 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup()
|
D | 4x8inc-minmax-sse-dup.c | 135 const __m128 va0c3333 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup() local 143 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c3333, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup() 147 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup()
|
D | 5x8inc-minmax-sse-dup.c | 154 const __m128 va0c3333 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup() local 163 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c3333, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup() 168 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup()
|
D | 5x8inc-minmax-sse2-dup.c | 154 const __m128 va0c3333 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup() local 163 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c3333, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup() 168 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup()
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 1x8-minmax-sse-dup.c | 89 const __m128 va0c3333 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_igemm_minmax_ukernel_1x8__sse_dup() local 94 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c3333, vb0123c3)); in xnn_f32_igemm_minmax_ukernel_1x8__sse_dup() 95 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_igemm_minmax_ukernel_1x8__sse_dup()
|
D | 1x8-minmax-sse2-dup.c | 89 const __m128 va0c3333 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup() local 94 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c3333, vb0123c3)); in xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup() 95 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup()
|
D | 3x8-minmax-sse-dup.c | 133 const __m128 va0c3333 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_igemm_minmax_ukernel_3x8__sse_dup() local 140 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c3333, vb0123c3)); in xnn_f32_igemm_minmax_ukernel_3x8__sse_dup() 143 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_igemm_minmax_ukernel_3x8__sse_dup()
|
D | 3x8-minmax-sse2-dup.c | 133 const __m128 va0c3333 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup() local 140 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c3333, vb0123c3)); in xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup() 143 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup()
|
D | 4x8-minmax-sse-dup.c | 155 const __m128 va0c3333 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_igemm_minmax_ukernel_4x8__sse_dup() local 163 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c3333, vb0123c3)); in xnn_f32_igemm_minmax_ukernel_4x8__sse_dup() 167 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_igemm_minmax_ukernel_4x8__sse_dup()
|
D | 4x8-minmax-sse2-dup.c | 155 const __m128 va0c3333 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup() local 163 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c3333, vb0123c3)); in xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup() 167 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup()
|
D | 5x8-minmax-sse2-dup.c | 177 const __m128 va0c3333 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup() local 186 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c3333, vb0123c3)); in xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup() 191 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup()
|
D | 5x8-minmax-sse-dup.c | 177 const __m128 va0c3333 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(3, 3, 3, 3)); in xnn_f32_igemm_minmax_ukernel_5x8__sse_dup() local 186 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c3333, vb0123c3)); in xnn_f32_igemm_minmax_ukernel_5x8__sse_dup() 191 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_igemm_minmax_ukernel_5x8__sse_dup()
|