Home
last modified time | relevance | path

Searched refs:vsign (Results 1 – 25 of 28) sorted by relevance

12

/external/XNNPACK/src/f32-vlrelu/
Davx512f.c.in37 … const __mmask16 vsign${ABC[N:N+16]} = _mm512_cmp_ps_mask(vacc${ABC[N:N+16]}, vzero, _CMP_LT_OQ);
40 …vacc${ABC[N:N+16]} = _mm512_mask_mul_ps(vacc${ABC[N:N+16]}, vsign${ABC[N:N+16]}, vacc${ABC[N:N+16]…
51 const __mmask16 vsign = _mm512_cmp_ps_mask(vacc, vzero, _CMP_LT_OQ); variable
52 vacc = _mm512_mask_mul_ps(vacc, vsign, vacc, vslope);
64 const __mmask16 vsign = _mm512_mask_cmp_ps_mask(vmask, vacc, vzero, _CMP_LT_OQ); variable
65 vacc = _mm512_mask_mul_ps(vacc, vsign, vacc, vslope);
/external/XNNPACK/src/f32-vlrelu/gen/
Dvlrelu-avx512f-x32.c49 const __mmask16 vsign = _mm512_cmp_ps_mask(vacc, vzero, _CMP_LT_OQ); in xnn_f32_vlrelu_ukernel__avx512f_x32() local
50 vacc = _mm512_mask_mul_ps(vacc, vsign, vacc, vslope); in xnn_f32_vlrelu_ukernel__avx512f_x32()
62 const __mmask16 vsign = _mm512_mask_cmp_ps_mask(vmask, vacc, vzero, _CMP_LT_OQ); in xnn_f32_vlrelu_ukernel__avx512f_x32() local
63 vacc = _mm512_mask_mul_ps(vacc, vsign, vacc, vslope); in xnn_f32_vlrelu_ukernel__avx512f_x32()
Dvlrelu-avx512f-x16.c50 const __mmask16 vsign = _mm512_mask_cmp_ps_mask(vmask, vacc, vzero, _CMP_LT_OQ); in xnn_f32_vlrelu_ukernel__avx512f_x16() local
51 vacc = _mm512_mask_mul_ps(vacc, vsign, vacc, vslope); in xnn_f32_vlrelu_ukernel__avx512f_x16()
/external/XNNPACK/src/f32-velu/gen/
Dvelu-avx512f-rr1-lut16-p3-perm-x16.c47 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16() local
65 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16()
80 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16() local
98 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16()
Dvelu-avx512f-rr1-p6-x16.c47 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x16() local
66 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x16()
81 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x16() local
100 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x16()
Dvelu-avx512f-rr1-lut16-p3-perm-x32.c98 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32() local
116 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32()
131 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32() local
149 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32()
Dvelu-avx512f-rr1-p6-x32.c102 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x32() local
121 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x32()
136 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x32() local
155 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x32()
Dvelu-avx512f-rr1-lut16-p3-perm-x48.c115 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48() local
133 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48()
148 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48() local
166 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48()
Dvelu-avx512f-rr1-p6-x48.c120 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x48() local
139 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x48()
154 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x48() local
173 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x48()
Dvelu-avx512f-rr1-p6-x64.c138 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() local
157 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64()
172 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() local
191 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64()
Dvelu-avx512f-rr1-lut16-p3-perm-x64.c132 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64() local
150 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64()
165 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64() local
183 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64()
Dvelu-avx512f-rr1-lut16-p3-perm-x80.c149 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80() local
167 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80()
182 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80() local
200 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80()
Dvelu-avx512f-rr1-p6-x80.c156 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() local
175 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80()
190 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() local
209 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80()
Dvelu-avx512f-rr1-lut16-p3-perm-x96.c166 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() local
184 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96()
199 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() local
217 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96()
Dvelu-avx512f-rr1-p6-x96.c174 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() local
193 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96()
208 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() local
227 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96()
Dvelu-avx512f-rr1-lut16-p3-perm-x112.c183 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() local
201 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
216 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() local
234 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
Dvelu-avx512f-rr1-p6-x112.c192 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() local
211 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112()
226 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() local
245 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112()
Dvelu-avx512f-rr1-lut16-p3-perm-x128.c200 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() local
218 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
233 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() local
251 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
Dvelu-avx512f-rr1-p6-x128.c210 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() local
229 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128()
244 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() local
263 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128()
/external/XNNPACK/src/f32-prelu/
Davx512f.c.in69 …const __mmask16 vsign${M}x${ABC[C:C+16]} = _mm512_cmp_ps_mask(vi${M}x${ABC[C:C+16]}, vzero, _CMP_L…
70 …const __m512 vacc${M}x${ABC[C:C+16]} = _mm512_mask_mul_ps(vi${M}x${ABC[C:C+16]}, vsign${M}x${ABC[C…
88 const __mmask16 vsign${M} = _mm512_cmp_ps_mask(vi${M}, vzero, _CMP_LT_OQ);
89 const __m512 vacc${M} = _mm512_mask_mul_ps(vi${M}, vsign${M}, vi${M}, vw);
108 const __mmask16 vsign${M} = _mm512_cmp_ps_mask(vi${M}, vzero, _CMP_LT_OQ);
109 const __m512 vacc${M} = _mm512_mask_mul_ps(vi${M}, vsign${M}, vi${M}, vw);
/external/XNNPACK/src/f32-velu/
Davx512f-rr1-lut16-p3-perm.c.in81 const __mmask16 vsign${N} = _mm512_cmp_ps_mask(vx${N}, vzero, _CMP_NLT_US);
84 vy${N} = _mm512_mask_mul_ps(vy${N}, vsign${N}, vx${N}, vbeta);
96 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); variable
114 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta);
129 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); variable
147 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta);
Davx512f-rr1-p6.c.in86 const __mmask16 vsign${N} = _mm512_cmp_ps_mask(vx${N}, vzero, _CMP_NLT_US);
89 vy${N} = _mm512_mask_mul_ps(vy${N}, vsign${N}, vx${N}, vbeta);
101 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); variable
120 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta);
135 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); variable
154 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta);
/external/XNNPACK/src/qs8-dwconv/
Dunipass-wasmsimd-mul16.c.in68 const v128_t vsign${ABC[C:C+4]} = wasm_i32x4_shr(vacc${ABC[C:C+4]}, 31);
71 …const v128_t vacc${ABC[C:C+2]} = wasm_v32x4_shuffle(vacc${ABC[C:C+4]}, vsign${ABC[C:C+4]}, 0, 4, 1…
72 …const v128_t vacc${ABC[C+2:C+4]} = wasm_v32x4_shuffle(vacc${ABC[C:C+4]}, vsign${ABC[C:C+4]}, 2, 6,…
143 const v128_t vsign${ABC[0:4]} = wasm_i32x4_shr(vacc${ABC[0:4]}, 31);
144 const v128_t vsign${ABC[4:8]} = wasm_i32x4_shr(vacc${ABC[4:8]}, 31);
146 … const v128_t vacc${ABC[0:2]} = wasm_v32x4_shuffle(vacc${ABC[0:4]}, vsign${ABC[0:4]}, 0, 4, 1, 5);
147 … const v128_t vacc${ABC[2:4]} = wasm_v32x4_shuffle(vacc${ABC[0:4]}, vsign${ABC[0:4]}, 2, 6, 3, 7);
148 … const v128_t vacc${ABC[4:6]} = wasm_v32x4_shuffle(vacc${ABC[4:8]}, vsign${ABC[4:8]}, 0, 4, 1, 5);
149 … const v128_t vacc${ABC[6:8]} = wasm_v32x4_shuffle(vacc${ABC[4:8]}, vsign${ABC[4:8]}, 2, 6, 3, 7);
/external/XNNPACK/src/qs8-gemm/
DMRx4c8-wasmsimd.c.in129 const v128_t vsign${M}x0123 = wasm_i32x4_lt(vacc${M}x0123, vzero);
132 const v128_t vacc${M}x01 = wasm_v32x4_shuffle(vacc${M}x0123, vsign${M}x0123, 0, 4, 1, 5);
138 const v128_t vacc${M}x23 = wasm_v32x4_shuffle(vacc${M}x0123, vsign${M}x0123, 2, 6, 3, 7);
/external/XNNPACK/src/qs8-igemm/
DMRx4c8-wasmsimd.c.in139 const v128_t vsign${M}x0123 = wasm_i32x4_lt(vacc${M}x0123, vzero);
142 const v128_t vacc${M}x01 = wasm_v32x4_shuffle(vacc${M}x0123, vsign${M}x0123, 0, 4, 1, 5);
148 const v128_t vacc${M}x23 = wasm_v32x4_shuffle(vacc${M}x0123, vsign${M}x0123, 2, 6, 3, 7);

12