/external/XNNPACK/src/u8-clamp/ |
D | scalar-x4.c | 26 uint8_t vt3 = x[3]; in xnn_u8_clamp_ukernel__scalar_x4() local 32 vt3 = XNN_UNPREDICTABLE(vt3 < voutput_min) ? voutput_min : vt3; in xnn_u8_clamp_ukernel__scalar_x4() 37 vt3 = XNN_UNPREDICTABLE(vt3 > voutput_max) ? voutput_max : vt3; in xnn_u8_clamp_ukernel__scalar_x4() 42 y[3] = vt3; in xnn_u8_clamp_ukernel__scalar_x4()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-scalar-rr2-p6-x4.c | 72 float vt3 = vn3 * vminus_ln2_hi + vz3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() local 77 vt3 = vn3 * vminus_ln2_lo + vt3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 93 vt3 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 99 float vp3 = vc6 * vt3 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 104 vp3 = vp3 * vt3 + vc4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 109 vp3 = vp3 * vt3 + vc3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 114 vp3 = vp3 * vt3 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 119 vp3 *= vt3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 127 vt3 *= vs3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 133 vp3 = vp3 * vt3 + vt3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4()
|
D | velu-wasm-rr2-p6-x4.c | 72 float vt3 = vn3 * vminus_ln2_hi + vz3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() local 77 vt3 = vn3 * vminus_ln2_lo + vt3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 83 float vp3 = vc6 * vt3 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 88 vp3 = vp3 * vt3 + vc4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 93 vp3 = vp3 * vt3 + vc3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 98 vp3 = vp3 * vt3 + vc2; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 103 vp3 *= vt3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 111 vt3 *= vs3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 117 vp3 = vp3 * vt3 + vt3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4()
|
D | velu-scalar-rr2-p6-x5.c | 77 float vt3 = vn3 * vminus_ln2_hi + vz3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() local 83 vt3 = vn3 * vminus_ln2_lo + vt3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 100 vt3 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 110 float vp3 = vc6 * vt3 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 116 vp3 = vp3 * vt3 + vc4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 122 vp3 = vp3 * vt3 + vc3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 128 vp3 = vp3 * vt3 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 134 vp3 *= vt3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 143 vt3 *= vs3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 151 vp3 = vp3 * vt3 + vt3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
|
D | velu-wasm-rr2-p6-x5.c | 77 float vt3 = vn3 * vminus_ln2_hi + vz3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() local 83 vt3 = vn3 * vminus_ln2_lo + vt3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 90 float vp3 = vc6 * vt3 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 96 vp3 = vp3 * vt3 + vc4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 102 vp3 = vp3 * vt3 + vc3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 108 vp3 = vp3 * vt3 + vc2; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 114 vp3 *= vt3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 123 vt3 *= vs3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 131 vp3 = vp3 * vt3 + vt3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
|
D | velu-scalar-rr2-p6-x6.c | 82 float vt3 = vn3 * vminus_ln2_hi + vz3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() local 89 vt3 = vn3 * vminus_ln2_lo + vt3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 107 vt3 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 121 float vp3 = vc6 * vt3 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 128 vp3 = vp3 * vt3 + vc4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 135 vp3 = vp3 * vt3 + vc3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 142 vp3 = vp3 * vt3 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 149 vp3 *= vt3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 159 vt3 *= vs3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 169 vp3 = vp3 * vt3 + vt3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
|
D | velu-wasm-rr2-p6-x6.c | 82 float vt3 = vn3 * vminus_ln2_hi + vz3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() local 89 vt3 = vn3 * vminus_ln2_lo + vt3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 97 float vp3 = vc6 * vt3 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 104 vp3 = vp3 * vt3 + vc4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 111 vp3 = vp3 * vt3 + vc3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 118 vp3 = vp3 * vt3 + vc2; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 125 vp3 *= vt3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 135 vt3 *= vs3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 145 vp3 = vp3 * vt3 + vt3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
|
D | velu-scalar-rr2-lut16-p3-x4.c | 79 float vt3 = vn3 * vminus_ln2_hi + vz3; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() local 97 vt3 = vn3 * vminus_ln2_lo + vt3; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() 100 vt3 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() 106 float vp3 = vc3 * vt3 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() 111 vp3 *= vt3; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() 119 vt3 *= vs3; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() 125 vp3 = vp3 * vt3 + vt3; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4()
|
D | velu-scalar-rr2-lut16-p3-x5.c | 85 float vt3 = vn3 * vminus_ln2_hi + vz3; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() local 105 vt3 = vn3 * vminus_ln2_lo + vt3; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() 108 vt3 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() 119 float vp3 = vc3 * vt3 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() 125 vp3 *= vt3; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() 134 vt3 *= vs3; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() 142 vp3 = vp3 * vt3 + vt3; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
|
D | velu-wasm-rr2-lut16-p3-x4.c | 79 float vt3 = vn3 * vminus_ln2_hi + vz3; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4() local 85 vt3 = vn3 * vminus_ln2_lo + vt3; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4() 90 float vp3 = vc3 * vt3 + vc2; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4() 95 vp3 *= vt3; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4() 103 vt3 *= vs3; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4() 109 vp3 = vp3 * vt3 + vt3; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4()
|
D | velu-scalar-rr2-lut16-p3-x6.c | 91 float vt3 = vn3 * vminus_ln2_hi + vz3; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() local 113 vt3 = vn3 * vminus_ln2_lo + vt3; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 116 vt3 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 132 float vp3 = vc3 * vt3 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 139 vp3 *= vt3; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 149 vt3 *= vs3; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 159 vp3 = vp3 * vt3 + vt3; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
|
D | velu-wasm-rr2-lut16-p3-x5.c | 85 float vt3 = vn3 * vminus_ln2_hi + vz3; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() local 93 vt3 = vn3 * vminus_ln2_lo + vt3; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() 99 float vp3 = vc3 * vt3 + vc2; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() 105 vp3 *= vt3; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() 114 vt3 *= vs3; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() 122 vp3 = vp3 * vt3 + vt3; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
|
D | velu-avx512f-rr1-p6-x64.c | 71 __m512 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2, vz3); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() local 76 __m512 vp3 = _mm512_fmadd_ps(vc6, vt3, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 81 vp3 = _mm512_fmadd_ps(vp3, vt3, vc4); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 86 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 91 vp3 = _mm512_fmadd_ps(vp3, vt3, vc2); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 99 vp3 = _mm512_mul_ps(vp3, vt3); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 100 vt3 = _mm512_mul_ps(vt3, vs3); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 110 vp3 = _mm512_fmadd_ps(vp3, vt3, vt3); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64()
|
D | velu-avx2-rr1-p6-x32.c | 71 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2, vz3); in xnn_f32_velu_ukernel__avx2_rr1_p6_x32() local 76 __m256 vp3 = _mm256_fmadd_ps(vc6, vt3, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x32() 81 vp3 = _mm256_fmadd_ps(vp3, vt3, vc4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x32() 86 vp3 = _mm256_fmadd_ps(vp3, vt3, vc3); in xnn_f32_velu_ukernel__avx2_rr1_p6_x32() 91 vp3 = _mm256_fmadd_ps(vp3, vt3, vc2); in xnn_f32_velu_ukernel__avx2_rr1_p6_x32() 99 vp3 = _mm256_mul_ps(vp3, vt3); in xnn_f32_velu_ukernel__avx2_rr1_p6_x32() 100 vt3 = _mm256_mul_ps(vt3, vs3); in xnn_f32_velu_ukernel__avx2_rr1_p6_x32() 109 vp3 = _mm256_fmadd_ps(vp3, vt3, vt3); in xnn_f32_velu_ukernel__avx2_rr1_p6_x32()
|
D | velu-avx-rr2-p6-x32.c | 80 __m256 vt3 = _mm256_add_ps(_mm256_mul_ps(vn3, vminus_ln2_hi), vz3); in xnn_f32_velu_ukernel__avx_rr2_p6_x32() local 86 vt3 = _mm256_add_ps(_mm256_mul_ps(vn3, vminus_ln2_lo), vt3); in xnn_f32_velu_ukernel__avx_rr2_p6_x32() 91 __m256 vp3 = _mm256_add_ps(_mm256_mul_ps(vc6, vt3), vc5); in xnn_f32_velu_ukernel__avx_rr2_p6_x32() 96 vp3 = _mm256_add_ps(_mm256_mul_ps(vp3, vt3), vc4); in xnn_f32_velu_ukernel__avx_rr2_p6_x32() 101 vp3 = _mm256_add_ps(_mm256_mul_ps(vp3, vt3), vc3); in xnn_f32_velu_ukernel__avx_rr2_p6_x32() 106 vp3 = _mm256_add_ps(_mm256_mul_ps(vp3, vt3), vc2); in xnn_f32_velu_ukernel__avx_rr2_p6_x32() 111 vp3 = _mm256_mul_ps(vp3, vt3); in xnn_f32_velu_ukernel__avx_rr2_p6_x32() 119 vt3 = _mm256_mul_ps(vt3, vs3); in xnn_f32_velu_ukernel__avx_rr2_p6_x32() 125 vp3 = _mm256_add_ps(_mm256_mul_ps(vp3, vt3), vt3); in xnn_f32_velu_ukernel__avx_rr2_p6_x32()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | scalar-p5-x4.c | 84 float vt3 = vn3 * vminus_ln2_hi + vx3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() local 89 vt3 = vn3 * vminus_ln2_lo + vt3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 95 float vp3 = vc5 * vt3 + vc4; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 100 vp3 = vp3 * vt3 + vc3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 105 vp3 = vp3 * vt3 + vc2; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 110 vp3 = vp3 * vt3 + vc1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 119 vt3 *= vs3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 124 float vf3 = vt3 * vp3 + vs3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4()
|
D | scalar-p5-x4-acc2.c | 85 float vt3 = vn3 * vminus_ln2_hi + vx3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() local 90 vt3 = vn3 * vminus_ln2_lo + vt3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 96 float vp3 = vc5 * vt3 + vc4; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 101 vp3 = vp3 * vt3 + vc3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 106 vp3 = vp3 * vt3 + vc2; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 111 vp3 = vp3 * vt3 + vc1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 120 vt3 *= vs3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 125 float vf3 = vt3 * vp3 + vs3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2()
|
D | scalar-p5-x4-acc4.c | 87 float vt3 = vn3 * vminus_ln2_hi + vx3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() local 92 vt3 = vn3 * vminus_ln2_lo + vt3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 98 float vp3 = vc5 * vt3 + vc4; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 103 vp3 = vp3 * vt3 + vc3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 108 vp3 = vp3 * vt3 + vc2; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 113 vp3 = vp3 * vt3 + vc1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 122 vt3 *= vs3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 127 float vf3 = vt3 * vp3 + vs3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4()
|
D | scalar-lut64-p2-x4-acc2.c | 107 float vt3 = vn3 * vminus_ln2_o64_hi + vx3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() local 112 vt3 = vn3 * vminus_ln2_o64_lo + vt3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() 118 float vp3 = vt3 * vc2; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() 123 vp3 = vp3 * vt3 + vt3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
|
D | scalar-lut64-p2-x4-acc4.c | 109 float vt3 = vn3 * vminus_ln2_o64_hi + vx3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() local 114 vt3 = vn3 * vminus_ln2_o64_lo + vt3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() 120 float vp3 = vt3 * vc2; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() 125 vp3 = vp3 * vt3 + vt3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | scalar-p5-div-x4.c | 69 float vt3 = vn3 * vln2_hi + vz3; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() local 74 vt3 = vn3 * vln2_lo + vt3; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 79 float vp3 = vt3 * vc5 + vc4; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 84 vp3 = vt3 * vp3 + vc3; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 89 vp3 = vt3 * vp3 + vc2; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 94 vp3 = vt3 * vp3 + vc1; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 99 vt3 *= vs3; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 104 const float ve3 = vt3 * vp3 + vs3; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4()
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x32.c | 83 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32() local 88 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32() 94 __m256 vp3 = _mm256_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32() 99 vp3 = _mm256_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32() 104 vp3 = _mm256_fmadd_ps(vp3, vt3, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32() 109 vp3 = _mm256_fmadd_ps(vp3, vt3, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32() 118 vt3 = _mm256_mul_ps(vt3, vs3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32() 123 __m256 vf3 = _mm256_fmadd_ps(vt3, vp3, vs3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32()
|
D | avx2-p5-x40.c | 88 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40() local 94 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40() 101 __m256 vp3 = _mm256_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40() 107 vp3 = _mm256_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40() 113 vp3 = _mm256_fmadd_ps(vp3, vt3, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40() 119 vp3 = _mm256_fmadd_ps(vp3, vt3, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40() 129 vt3 = _mm256_mul_ps(vt3, vs3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40() 135 __m256 vf3 = _mm256_fmadd_ps(vt3, vp3, vs3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40()
|
D | avx512f-p5-scalef-x64.c | 66 __m512 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x64() local 71 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x64() 77 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x64() 82 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x64() 87 vp3 = _mm512_fmadd_ps(vp3, vt3, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x64() 92 vp3 = _mm512_fmadd_ps(vp3, vt3, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x64() 97 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x64()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x64.c | 61 __m512 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64() local 66 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64() 72 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64() 77 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64() 82 vp3 = _mm512_fmadd_ps(vp3, vt3, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64() 87 vp3 = _mm512_fmadd_ps(vp3, vt3, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64() 92 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64()
|