/external/XNNPACK/src/f32-gemm/gen/ |
D | 4x2c4-wasmsimd.c | 109 const v128_t vmask0 = wasm_f32x4_eq(vb0, vzero); in xnn_f32_gemm_ukernel_4x2c4__wasmsimd() local 112 vacc0x0c4 = wasm_f32x4_add(vacc0x0c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask0), vb0)); in xnn_f32_gemm_ukernel_4x2c4__wasmsimd() 114 vacc1x0c4 = wasm_f32x4_add(vacc1x0c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask0), vb0)); in xnn_f32_gemm_ukernel_4x2c4__wasmsimd() 116 vacc2x0c4 = wasm_f32x4_add(vacc2x0c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask0), vb0)); in xnn_f32_gemm_ukernel_4x2c4__wasmsimd() 118 vacc3x0c4 = wasm_f32x4_add(vacc3x0c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask0), vb0)); in xnn_f32_gemm_ukernel_4x2c4__wasmsimd()
|
D | 4x2c4-minmax-wasmsimd-arm.c | 111 const v128_t vmask0 = wasm_f32x4_eq(vb0, vzero); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm() local 114 vacc0x0c4 = wasm_f32x4_add(vacc0x0c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask0), vb0)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm() 116 vacc1x0c4 = wasm_f32x4_add(vacc1x0c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask0), vb0)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm() 118 vacc2x0c4 = wasm_f32x4_add(vacc2x0c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask0), vb0)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm() 120 vacc3x0c4 = wasm_f32x4_add(vacc3x0c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask0), vb0)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm()
|
D | 4x2c4-minmax-sse.c | 108 const __m128 vmask0 = _mm_cmpeq_ps(_mm_setzero_ps(), vb0); in xnn_f32_gemm_minmax_ukernel_4x2c4__sse() local 111 vacc0x0c4 = _mm_add_ps(vacc0x0c4, _mm_mul_ps(_mm_andnot_ps(vmask0, va0), vb0)); in xnn_f32_gemm_minmax_ukernel_4x2c4__sse() 113 vacc1x0c4 = _mm_add_ps(vacc1x0c4, _mm_mul_ps(_mm_andnot_ps(vmask0, va1), vb0)); in xnn_f32_gemm_minmax_ukernel_4x2c4__sse() 115 vacc2x0c4 = _mm_add_ps(vacc2x0c4, _mm_mul_ps(_mm_andnot_ps(vmask0, va2), vb0)); in xnn_f32_gemm_minmax_ukernel_4x2c4__sse() 117 vacc3x0c4 = _mm_add_ps(vacc3x0c4, _mm_mul_ps(_mm_andnot_ps(vmask0, va3), vb0)); in xnn_f32_gemm_minmax_ukernel_4x2c4__sse()
|
D | 4x2c4-relu-wasmsimd.c | 109 const v128_t vmask0 = wasm_f32x4_eq(vb0, vzero); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd() local 112 vacc0x0c4 = wasm_f32x4_add(vacc0x0c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask0), vb0)); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd() 114 vacc1x0c4 = wasm_f32x4_add(vacc1x0c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask0), vb0)); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd() 116 vacc2x0c4 = wasm_f32x4_add(vacc2x0c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask0), vb0)); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd() 118 vacc3x0c4 = wasm_f32x4_add(vacc3x0c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask0), vb0)); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd()
|
D | 4x2c4-minmax-wasmsimd-x86.c | 109 const v128_t vmask0 = wasm_f32x4_eq(vb0, vzero); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86() local 112 vacc0x0c4 = wasm_f32x4_add(vacc0x0c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask0), vb0)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86() 114 vacc1x0c4 = wasm_f32x4_add(vacc1x0c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask0), vb0)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86() 116 vacc2x0c4 = wasm_f32x4_add(vacc2x0c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask0), vb0)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86() 118 vacc3x0c4 = wasm_f32x4_add(vacc3x0c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask0), vb0)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86()
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 4x2c4-wasmsimd.c | 127 const v128_t vmask0 = wasm_f32x4_eq(vb0, vzero); in xnn_f32_igemm_ukernel_4x2c4__wasmsimd() local 130 vacc0x0c4 = wasm_f32x4_add(vacc0x0c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask0), vb0)); in xnn_f32_igemm_ukernel_4x2c4__wasmsimd() 132 vacc1x0c4 = wasm_f32x4_add(vacc1x0c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask0), vb0)); in xnn_f32_igemm_ukernel_4x2c4__wasmsimd() 134 vacc2x0c4 = wasm_f32x4_add(vacc2x0c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask0), vb0)); in xnn_f32_igemm_ukernel_4x2c4__wasmsimd() 136 vacc3x0c4 = wasm_f32x4_add(vacc3x0c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask0), vb0)); in xnn_f32_igemm_ukernel_4x2c4__wasmsimd()
|
D | 4x2c4-relu-wasmsimd.c | 127 const v128_t vmask0 = wasm_f32x4_eq(vb0, vzero); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd() local 130 vacc0x0c4 = wasm_f32x4_add(vacc0x0c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask0), vb0)); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd() 132 vacc1x0c4 = wasm_f32x4_add(vacc1x0c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask0), vb0)); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd() 134 vacc2x0c4 = wasm_f32x4_add(vacc2x0c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask0), vb0)); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd() 136 vacc3x0c4 = wasm_f32x4_add(vacc3x0c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask0), vb0)); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd()
|
D | 4x2c4-minmax-sse.c | 126 const __m128 vmask0 = _mm_cmpeq_ps(_mm_setzero_ps(), vb0); in xnn_f32_igemm_minmax_ukernel_4x2c4__sse() local 129 vacc0x0c4 = _mm_add_ps(vacc0x0c4, _mm_mul_ps(_mm_andnot_ps(vmask0, va0), vb0)); in xnn_f32_igemm_minmax_ukernel_4x2c4__sse() 131 vacc1x0c4 = _mm_add_ps(vacc1x0c4, _mm_mul_ps(_mm_andnot_ps(vmask0, va1), vb0)); in xnn_f32_igemm_minmax_ukernel_4x2c4__sse() 133 vacc2x0c4 = _mm_add_ps(vacc2x0c4, _mm_mul_ps(_mm_andnot_ps(vmask0, va2), vb0)); in xnn_f32_igemm_minmax_ukernel_4x2c4__sse() 135 vacc3x0c4 = _mm_add_ps(vacc3x0c4, _mm_mul_ps(_mm_andnot_ps(vmask0, va3), vb0)); in xnn_f32_igemm_minmax_ukernel_4x2c4__sse()
|
D | 4x2c4-minmax-wasmsimd-x86.c | 127 const v128_t vmask0 = wasm_f32x4_eq(vb0, vzero); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86() local 130 vacc0x0c4 = wasm_f32x4_add(vacc0x0c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask0), vb0)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86() 132 vacc1x0c4 = wasm_f32x4_add(vacc1x0c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask0), vb0)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86() 134 vacc2x0c4 = wasm_f32x4_add(vacc2x0c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask0), vb0)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86() 136 vacc3x0c4 = wasm_f32x4_add(vacc3x0c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask0), vb0)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86()
|
D | 4x2c4-minmax-wasmsimd-arm.c | 129 const v128_t vmask0 = wasm_f32x4_eq(vb0, vzero); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm() local 132 vacc0x0c4 = wasm_f32x4_add(vacc0x0c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask0), vb0)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm() 134 vacc1x0c4 = wasm_f32x4_add(vacc1x0c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask0), vb0)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm() 136 vacc2x0c4 = wasm_f32x4_add(vacc2x0c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask0), vb0)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm() 138 vacc3x0c4 = wasm_f32x4_add(vacc3x0c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask0), vb0)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm()
|