/external/XNNPACK/src/f32-dwconv/gen/ |
D | up8x9-minmax-sse.c | 152 const __m128 vi7x0123 = _mm_loadu_ps(i7); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() local 158 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() 227 const __m128 vi7x0123 = _mm_loadu_ps(i7); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() local 231 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() 279 const __m128 vi7x0123 = _mm_loadu_ps(i7); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() local 281 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse()
|
D | up8x9-wasmsimd.c | 150 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() local 156 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() 223 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() local 227 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() 274 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() local 276 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd()
|
D | up8x9-minmax-neon.c | 139 const float32x4_t vi7x0123 = vld1q_f32(i7); i7 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() local 143 vacc0123p0 = vmlaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 194 const float32x4_t vi7x0123 = vld1q_f32(i7); i7 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() local 196 vacc0123p0 = vmlaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 240 const float32x4_t vi7x0123 = vld1q_f32(i7); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() local 242 vacc0123p0 = vmlaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon()
|
D | up8x9-minmax-sse-acc2.c | 152 const __m128 vi7x0123 = _mm_loadu_ps(i7); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() local 158 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() 230 const __m128 vi7x0123 = _mm_loadu_ps(i7); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() local 234 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() 284 const __m128 vi7x0123 = _mm_loadu_ps(i7); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() local 286 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2()
|
D | up8x9-minmax-neonfma.c | 139 const float32x4_t vi7x0123 = vld1q_f32(i7); i7 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() local 143 vacc0123p0 = vfmaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() 194 const float32x4_t vi7x0123 = vld1q_f32(i7); i7 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() local 196 vacc0123p0 = vfmaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() 240 const float32x4_t vi7x0123 = vld1q_f32(i7); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() local 242 vacc0123p0 = vfmaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma()
|
D | up8x9-minmax-wasmsimd-arm-acc2.c | 152 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() local 158 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() 231 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() local 235 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() 285 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() local 287 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2()
|
D | up8x9-minmax-wasmsimd-x86.c | 152 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() local 158 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() 228 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() local 232 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() 280 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() local 282 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86()
|
D | up8x9-minmax-neonfma-acc2.c | 139 const float32x4_t vi7x0123 = vld1q_f32(i7); i7 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() local 143 vacc0123p1 = vfmaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() 197 const float32x4_t vi7x0123 = vld1q_f32(i7); i7 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() local 199 vacc0123p1 = vfmaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() 245 const float32x4_t vi7x0123 = vld1q_f32(i7); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() local 247 vacc0123p1 = vfmaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2()
|
D | up8x9-minmax-wasmsimd-arm.c | 152 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() local 158 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() 228 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() local 232 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() 280 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() local 282 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm()
|
D | up8x9-minmax-neon-acc2.c | 139 const float32x4_t vi7x0123 = vld1q_f32(i7); i7 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() local 143 vacc0123p1 = vmlaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() 197 const float32x4_t vi7x0123 = vld1q_f32(i7); i7 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() local 199 vacc0123p1 = vmlaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() 245 const float32x4_t vi7x0123 = vld1q_f32(i7); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() local 247 vacc0123p1 = vmlaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2()
|
D | up4x9-minmax-wasmsimd-arm.c | 130 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm() local 134 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm() 183 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm() local 185 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm()
|
D | up4x9-wasmsimd.c | 128 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd() local 132 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd() 179 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd() local 181 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd()
|
D | up4x9-minmax-sse.c | 130 const __m128 vi7x0123 = _mm_loadu_ps(i7); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse() local 134 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse() 182 const __m128 vi7x0123 = _mm_loadu_ps(i7); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse() local 184 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse()
|
D | up4x9-minmax-neonfma.c | 117 const float32x4_t vi7x0123 = vld1q_f32(i7); i7 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma() local 119 vacc0123p0 = vfmaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma() 163 const float32x4_t vi7x0123 = vld1q_f32(i7); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma() local 165 vacc0123p0 = vfmaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma()
|
D | up4x9-minmax-neon.c | 117 const float32x4_t vi7x0123 = vld1q_f32(i7); i7 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x9__neon() local 119 vacc0123p0 = vmlaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon() 163 const float32x4_t vi7x0123 = vld1q_f32(i7); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon() local 165 vacc0123p0 = vmlaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon()
|
D | up8x9-minmax-wasmsimd-x86-acc2.c | 152 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() local 158 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() 231 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() local 235 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() 285 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() local 287 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2()
|
D | up4x9-minmax-wasmsimd-x86-acc2.c | 130 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2() local 134 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2() 185 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2() local 187 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2()
|
D | up4x9-minmax-neon-acc2.c | 117 const float32x4_t vi7x0123 = vld1q_f32(i7); i7 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2() local 119 vacc0123p1 = vmlaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2() 165 const float32x4_t vi7x0123 = vld1q_f32(i7); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2() local 167 vacc0123p1 = vmlaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2()
|
D | up4x9-minmax-sse-acc2.c | 130 const __m128 vi7x0123 = _mm_loadu_ps(i7); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2() local 134 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2() 184 const __m128 vi7x0123 = _mm_loadu_ps(i7); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2() local 186 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2()
|
D | up4x9-minmax-wasmsimd-x86.c | 130 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86() local 134 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86() 183 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86() local 185 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86()
|
D | up4x9-minmax-neonfma-acc2.c | 117 const float32x4_t vi7x0123 = vld1q_f32(i7); i7 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2() local 119 vacc0123p1 = vfmaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2() 165 const float32x4_t vi7x0123 = vld1q_f32(i7); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2() local 167 vacc0123p1 = vfmaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2()
|
D | up4x9-minmax-wasmsimd-arm-acc2.c | 130 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2() local 134 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2() 185 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2() local 187 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2()
|
D | up4x25-minmax-wasmsimd-arm-acc2.c | 210 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm_acc2() local 214 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm_acc2() 361 const v128_t vi7x0123 = wasm_v128_load(i7); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm_acc2() local 363 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm_acc2()
|
D | up4x25-minmax-sse.c | 210 const __m128 vi7x0123 = _mm_loadu_ps(i7); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse() local 214 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse() 358 const __m128 vi7x0123 = _mm_loadu_ps(i7); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse() local 360 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
|
D | up4x25-minmax-neon-acc2.c | 197 const float32x4_t vi7x0123 = vld1q_f32(i7); i7 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() local 199 vacc0123p1 = vmlaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() 309 const float32x4_t vi7x0123 = vld1q_f32(i7); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() local 311 vacc0123p1 = vmlaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2()
|