/external/XNNPACK/src/f32-velu/gen/ |
D | velu-wasm-rr2-p6-x6.c | 33 const float vsat_cutoff = -0x1.154246p+4f; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() local 52 …const float vz0 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx0 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 53 …const float vz1 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx1 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 54 …const float vz2 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx2 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 55 …const float vz3 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx3 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 56 …const float vz4 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx4 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 57 …const float vz5 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx5 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 181 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
|
D | velu-scalar-rr2-lut16-p3-x6.c | 36 const float vsat_cutoff = -0x1.154246p+4f; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() local 99 if XNN_UNPREDICTABLE(vz0 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 104 if XNN_UNPREDICTABLE(vz1 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 109 if XNN_UNPREDICTABLE(vz2 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 114 if XNN_UNPREDICTABLE(vz3 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 119 if XNN_UNPREDICTABLE(vz4 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 124 if XNN_UNPREDICTABLE(vz5 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 218 if XNN_UNPREDICTABLE(vz <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
|
D | velu-wasm-rr2-lut16-p3-x6.c | 36 const float vsat_cutoff = -0x1.154246p+4f; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() local 52 …const float vz0 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx0 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() 53 …const float vz1 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx1 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() 54 …const float vz2 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx2 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() 55 …const float vz3 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx3 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() 56 …const float vz4 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx4 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() 57 …const float vz5 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx5 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() 171 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
|
D | velu-wasm-rr2-p6-x5.c | 33 const float vsat_cutoff = -0x1.154246p+4f; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() local 51 …const float vz0 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx0 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 52 …const float vz1 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx1 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 53 …const float vz2 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx2 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 54 …const float vz3 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx3 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 55 …const float vz4 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx4 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 162 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
|
D | velu-scalar-rr2-lut16-p3-x5.c | 36 const float vsat_cutoff = -0x1.154246p+4f; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() local 91 if XNN_UNPREDICTABLE(vz0 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() 96 if XNN_UNPREDICTABLE(vz1 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() 101 if XNN_UNPREDICTABLE(vz2 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() 106 if XNN_UNPREDICTABLE(vz3 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() 111 if XNN_UNPREDICTABLE(vz4 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() 194 if XNN_UNPREDICTABLE(vz <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
|
D | velu-wasm-rr2-lut16-p3-x5.c | 36 const float vsat_cutoff = -0x1.154246p+4f; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() local 51 …const float vz0 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx0 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() 52 …const float vz1 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx1 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() 53 …const float vz2 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx2 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() 54 …const float vz3 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx3 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() 55 …const float vz4 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx4 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() 153 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
|
D | velu-scalar-rr2-p6-x6.c | 33 const float vsat_cutoff = -0x1.154246p+4f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() local 93 if XNN_UNPREDICTABLE(vz0 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 97 if XNN_UNPREDICTABLE(vz1 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 101 if XNN_UNPREDICTABLE(vz2 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 105 if XNN_UNPREDICTABLE(vz3 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 109 if XNN_UNPREDICTABLE(vz4 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 113 if XNN_UNPREDICTABLE(vz5 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 226 if XNN_UNPREDICTABLE(vz <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
|
D | velu-scalar-rr2-p6-x5.c | 33 const float vsat_cutoff = -0x1.154246p+4f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() local 86 if XNN_UNPREDICTABLE(vz0 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 90 if XNN_UNPREDICTABLE(vz1 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 94 if XNN_UNPREDICTABLE(vz2 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 98 if XNN_UNPREDICTABLE(vz3 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 102 if XNN_UNPREDICTABLE(vz4 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 201 if XNN_UNPREDICTABLE(vz <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
|
D | velu-wasm-rr2-p6-x4.c | 33 const float vsat_cutoff = -0x1.154246p+4f; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() local 50 …const float vz0 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx0 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 51 …const float vz1 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx1 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 52 …const float vz2 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx2 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 53 …const float vz3 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx3 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 143 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); in xnn_f32_velu_ukernel__wasm_rr2_p6_x4()
|
D | velu-scalar-rr2-lut16-p3-x4.c | 36 const float vsat_cutoff = -0x1.154246p+4f; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() local 83 if XNN_UNPREDICTABLE(vz0 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() 88 if XNN_UNPREDICTABLE(vz1 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() 93 if XNN_UNPREDICTABLE(vz2 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() 98 if XNN_UNPREDICTABLE(vz3 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() 170 if XNN_UNPREDICTABLE(vz <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4()
|
D | velu-wasm-rr2-lut16-p3-x4.c | 36 const float vsat_cutoff = -0x1.154246p+4f; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4() local 50 …const float vz0 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx0 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4() 51 …const float vz1 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx1 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4() 52 …const float vz2 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx2 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4() 53 …const float vz3 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx3 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4() 135 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4()
|
D | velu-scalar-rr2-p6-x4.c | 33 const float vsat_cutoff = -0x1.154246p+4f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() local 79 if XNN_UNPREDICTABLE(vz0 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 83 if XNN_UNPREDICTABLE(vz1 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 87 if XNN_UNPREDICTABLE(vz2 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 91 if XNN_UNPREDICTABLE(vz3 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 176 if XNN_UNPREDICTABLE(vz <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x4()
|
D | velu-avx512f-rr1-lut16-p3-perm-x128.c | 32 const __m512 vsat_cutoff = _mm512_set1_ps(-0x1.154246p+4f); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() local 53 const __m512 vz0 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 54 const __m512 vz1 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 55 const __m512 vz2 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 56 const __m512 vz3 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 57 const __m512 vz4 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 58 const __m512 vz5 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 59 const __m512 vz6 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 60 const __m512 vz7 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx7, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 199 const __m512 vz = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() [all …]
|
D | velu-scalar-rr2-p6-x3.c | 33 const float vsat_cutoff = -0x1.154246p+4f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() local 72 if XNN_UNPREDICTABLE(vz0 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() 76 if XNN_UNPREDICTABLE(vz1 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() 80 if XNN_UNPREDICTABLE(vz2 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() 151 if XNN_UNPREDICTABLE(vz <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x3()
|
D | velu-wasm-rr2-p6-x3.c | 33 const float vsat_cutoff = -0x1.154246p+4f; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() local 49 …const float vz0 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx0 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() 50 …const float vz1 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx1 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() 51 …const float vz2 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx2 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() 124 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); in xnn_f32_velu_ukernel__wasm_rr2_p6_x3()
|
D | velu-scalar-rr2-lut16-p3-x3.c | 36 const float vsat_cutoff = -0x1.154246p+4f; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3() local 75 if XNN_UNPREDICTABLE(vz0 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3() 80 if XNN_UNPREDICTABLE(vz1 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3() 85 if XNN_UNPREDICTABLE(vz2 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3() 146 if XNN_UNPREDICTABLE(vz <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3()
|
D | velu-wasm-rr2-lut16-p3-x3.c | 36 const float vsat_cutoff = -0x1.154246p+4f; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3() local 49 …const float vz0 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx0 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3() 50 …const float vz1 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx1 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3() 51 …const float vz2 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx2 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3() 117 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3()
|
D | velu-avx2-rr1-lut8-p4-perm-x72.c | 32 const __m256 vsat_cutoff = _mm256_set1_ps(-0x1.154246p+4f); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() local 54 const __m256 vz0 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() 55 const __m256 vz1 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() 56 const __m256 vz2 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() 57 const __m256 vz3 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() 58 const __m256 vz4 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() 59 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() 60 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() 61 const __m256 vz7 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx7, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() 62 const __m256 vz8 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx8, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() [all …]
|
D | velu-avx2-rr1-lut4-p4-perm-x72.c | 32 const __m256 vsat_cutoff = _mm256_set1_ps(-0x1.154246p+4f); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72() local 55 const __m256 vz0 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72() 56 const __m256 vz1 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72() 57 const __m256 vz2 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72() 58 const __m256 vz3 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72() 59 const __m256 vz4 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72() 60 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72() 61 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72() 62 const __m256 vz7 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx7, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72() 63 const __m256 vz8 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx8, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72() [all …]
|
D | velu-avx2-rr1-p6-x72.c | 32 const __m256 vsat_cutoff = _mm256_set1_ps(-0x1.154246p+4f); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() local 54 const __m256 vz0 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 55 const __m256 vz1 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 56 const __m256 vz2 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 57 const __m256 vz3 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 58 const __m256 vz4 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 59 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 60 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 61 const __m256 vz7 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx7, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 62 const __m256 vz8 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx8, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() [all …]
|
D | velu-avx2-rr1-lut16-p3-gather-x72.c | 34 const __m256 vsat_cutoff = _mm256_set1_ps(-0x1.154246p+4f); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() local 54 const __m256 vz0 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() 55 const __m256 vz1 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() 56 const __m256 vz2 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() 57 const __m256 vz3 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() 58 const __m256 vz4 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() 59 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() 60 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() 61 const __m256 vz7 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx7, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() 62 const __m256 vz8 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx8, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() [all …]
|
D | velu-avx2-rr1-lut4-p4-perm-x80.c | 32 const __m256 vsat_cutoff = _mm256_set1_ps(-0x1.154246p+4f); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80() local 56 const __m256 vz0 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80() 57 const __m256 vz1 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80() 58 const __m256 vz2 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80() 59 const __m256 vz3 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80() 60 const __m256 vz4 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80() 61 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80() 62 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80() 63 const __m256 vz7 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx7, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80() 64 const __m256 vz8 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx8, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80() [all …]
|
D | velu-avx2-rr1-lut16-p3-gather-x80.c | 34 const __m256 vsat_cutoff = _mm256_set1_ps(-0x1.154246p+4f); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() local 55 const __m256 vz0 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() 56 const __m256 vz1 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() 57 const __m256 vz2 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() 58 const __m256 vz3 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() 59 const __m256 vz4 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() 60 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() 61 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() 62 const __m256 vz7 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx7, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() 63 const __m256 vz8 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx8, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() [all …]
|
/external/XNNPACK/src/f32-velu/ |
D | scalar-rr2-lut16-p3.c.in | 33 const float vsat_cutoff = -0x1.154246p+4f; 48 …oat vz${N} = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx${N} * vprescale, vsat_cutoff), 0.0f); 67 if XNN_UNPREDICTABLE(vz${N} <= vsat_cutoff) { 110 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); 124 if XNN_UNPREDICTABLE(vz <= vsat_cutoff) { 155 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); 169 if XNN_UNPREDICTABLE(vz <= vsat_cutoff) { 199 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); 213 if XNN_UNPREDICTABLE(vz <= vsat_cutoff) {
|
D | scalar-rr2-p6.c.in | 30 const float vsat_cutoff = -0x1.154246p+4f; 48 …oat vz${N} = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx${N} * vprescale, vsat_cutoff), 0.0f); 67 if XNN_UNPREDICTABLE(vz${N} <= vsat_cutoff) { 119 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); 131 if XNN_UNPREDICTABLE(vz <= vsat_cutoff) { 165 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); 177 if XNN_UNPREDICTABLE(vz <= vsat_cutoff) { 210 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); 222 if XNN_UNPREDICTABLE(vz <= vsat_cutoff) {
|