/external/XNNPACK/src/f32-vlrelu/ |
D | avx512f.c.in | 37 … const __mmask16 vsign${ABC[N:N+16]} = _mm512_cmp_ps_mask(vacc${ABC[N:N+16]}, vzero, _CMP_LT_OQ); 40 …vacc${ABC[N:N+16]} = _mm512_mask_mul_ps(vacc${ABC[N:N+16]}, vsign${ABC[N:N+16]}, vacc${ABC[N:N+16]… 51 const __mmask16 vsign = _mm512_cmp_ps_mask(vacc, vzero, _CMP_LT_OQ); variable 52 vacc = _mm512_mask_mul_ps(vacc, vsign, vacc, vslope); 64 const __mmask16 vsign = _mm512_mask_cmp_ps_mask(vmask, vacc, vzero, _CMP_LT_OQ); variable 65 vacc = _mm512_mask_mul_ps(vacc, vsign, vacc, vslope);
|
/external/XNNPACK/src/f32-vlrelu/gen/ |
D | vlrelu-avx512f-x32.c | 49 const __mmask16 vsign = _mm512_cmp_ps_mask(vacc, vzero, _CMP_LT_OQ); in xnn_f32_vlrelu_ukernel__avx512f_x32() local 50 vacc = _mm512_mask_mul_ps(vacc, vsign, vacc, vslope); in xnn_f32_vlrelu_ukernel__avx512f_x32() 62 const __mmask16 vsign = _mm512_mask_cmp_ps_mask(vmask, vacc, vzero, _CMP_LT_OQ); in xnn_f32_vlrelu_ukernel__avx512f_x32() local 63 vacc = _mm512_mask_mul_ps(vacc, vsign, vacc, vslope); in xnn_f32_vlrelu_ukernel__avx512f_x32()
|
D | vlrelu-avx512f-x16.c | 50 const __mmask16 vsign = _mm512_mask_cmp_ps_mask(vmask, vacc, vzero, _CMP_LT_OQ); in xnn_f32_vlrelu_ukernel__avx512f_x16() local 51 vacc = _mm512_mask_mul_ps(vacc, vsign, vacc, vslope); in xnn_f32_vlrelu_ukernel__avx512f_x16()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx512f-rr1-lut16-p3-perm-x16.c | 47 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16() local 65 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16() 80 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16() local 98 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16()
|
D | velu-avx512f-rr1-p6-x16.c | 47 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x16() local 66 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x16() 81 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x16() local 100 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x16()
|
D | velu-avx512f-rr1-lut16-p3-perm-x32.c | 98 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32() local 116 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32() 131 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32() local 149 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32()
|
D | velu-avx512f-rr1-p6-x32.c | 102 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x32() local 121 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x32() 136 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x32() local 155 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x32()
|
D | velu-avx512f-rr1-lut16-p3-perm-x48.c | 115 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48() local 133 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48() 148 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48() local 166 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48()
|
D | velu-avx512f-rr1-p6-x48.c | 120 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x48() local 139 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x48() 154 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x48() local 173 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x48()
|
D | velu-avx512f-rr1-p6-x64.c | 138 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() local 157 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 172 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() local 191 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64()
|
D | velu-avx512f-rr1-lut16-p3-perm-x64.c | 132 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64() local 150 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64() 165 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64() local 183 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64()
|
D | velu-avx512f-rr1-lut16-p3-perm-x80.c | 149 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80() local 167 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80() 182 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80() local 200 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80()
|
D | velu-avx512f-rr1-p6-x80.c | 156 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() local 175 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() 190 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() local 209 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80()
|
D | velu-avx512f-rr1-lut16-p3-perm-x96.c | 166 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() local 184 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() 199 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() local 217 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96()
|
D | velu-avx512f-rr1-p6-x96.c | 174 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() local 193 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 208 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() local 227 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96()
|
D | velu-avx512f-rr1-lut16-p3-perm-x112.c | 183 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() local 201 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 216 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() local 234 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
|
D | velu-avx512f-rr1-p6-x112.c | 192 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() local 211 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 226 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() local 245 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112()
|
D | velu-avx512f-rr1-lut16-p3-perm-x128.c | 200 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() local 218 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 233 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() local 251 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
|
D | velu-avx512f-rr1-p6-x128.c | 210 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() local 229 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 244 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() local 263 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128()
|
/external/XNNPACK/src/f32-prelu/ |
D | avx512f.c.in | 69 …const __mmask16 vsign${M}x${ABC[C:C+16]} = _mm512_cmp_ps_mask(vi${M}x${ABC[C:C+16]}, vzero, _CMP_L… 70 …const __m512 vacc${M}x${ABC[C:C+16]} = _mm512_mask_mul_ps(vi${M}x${ABC[C:C+16]}, vsign${M}x${ABC[C… 88 const __mmask16 vsign${M} = _mm512_cmp_ps_mask(vi${M}, vzero, _CMP_LT_OQ); 89 const __m512 vacc${M} = _mm512_mask_mul_ps(vi${M}, vsign${M}, vi${M}, vw); 108 const __mmask16 vsign${M} = _mm512_cmp_ps_mask(vi${M}, vzero, _CMP_LT_OQ); 109 const __m512 vacc${M} = _mm512_mask_mul_ps(vi${M}, vsign${M}, vi${M}, vw);
|
/external/XNNPACK/src/f32-velu/ |
D | avx512f-rr1-lut16-p3-perm.c.in | 81 const __mmask16 vsign${N} = _mm512_cmp_ps_mask(vx${N}, vzero, _CMP_NLT_US); 84 vy${N} = _mm512_mask_mul_ps(vy${N}, vsign${N}, vx${N}, vbeta); 96 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); variable 114 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); 129 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); variable 147 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta);
|
D | avx512f-rr1-p6.c.in | 86 const __mmask16 vsign${N} = _mm512_cmp_ps_mask(vx${N}, vzero, _CMP_NLT_US); 89 vy${N} = _mm512_mask_mul_ps(vy${N}, vsign${N}, vx${N}, vbeta); 101 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); variable 120 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta); 135 const __mmask16 vsign = _mm512_cmp_ps_mask(vx, _mm512_setzero_ps(), _CMP_NLT_US); variable 154 vy = _mm512_mask_mul_ps(vy, vsign, vx, vbeta);
|
/external/XNNPACK/src/qs8-dwconv/ |
D | unipass-wasmsimd-mul16.c.in | 68 const v128_t vsign${ABC[C:C+4]} = wasm_i32x4_shr(vacc${ABC[C:C+4]}, 31); 71 …const v128_t vacc${ABC[C:C+2]} = wasm_v32x4_shuffle(vacc${ABC[C:C+4]}, vsign${ABC[C:C+4]}, 0, 4, 1… 72 …const v128_t vacc${ABC[C+2:C+4]} = wasm_v32x4_shuffle(vacc${ABC[C:C+4]}, vsign${ABC[C:C+4]}, 2, 6,… 143 const v128_t vsign${ABC[0:4]} = wasm_i32x4_shr(vacc${ABC[0:4]}, 31); 144 const v128_t vsign${ABC[4:8]} = wasm_i32x4_shr(vacc${ABC[4:8]}, 31); 146 … const v128_t vacc${ABC[0:2]} = wasm_v32x4_shuffle(vacc${ABC[0:4]}, vsign${ABC[0:4]}, 0, 4, 1, 5); 147 … const v128_t vacc${ABC[2:4]} = wasm_v32x4_shuffle(vacc${ABC[0:4]}, vsign${ABC[0:4]}, 2, 6, 3, 7); 148 … const v128_t vacc${ABC[4:6]} = wasm_v32x4_shuffle(vacc${ABC[4:8]}, vsign${ABC[4:8]}, 0, 4, 1, 5); 149 … const v128_t vacc${ABC[6:8]} = wasm_v32x4_shuffle(vacc${ABC[4:8]}, vsign${ABC[4:8]}, 2, 6, 3, 7);
|
/external/XNNPACK/src/qs8-gemm/ |
D | MRx4c8-wasmsimd.c.in | 129 const v128_t vsign${M}x0123 = wasm_i32x4_lt(vacc${M}x0123, vzero); 132 const v128_t vacc${M}x01 = wasm_v32x4_shuffle(vacc${M}x0123, vsign${M}x0123, 0, 4, 1, 5); 138 const v128_t vacc${M}x23 = wasm_v32x4_shuffle(vacc${M}x0123, vsign${M}x0123, 2, 6, 3, 7);
|
/external/XNNPACK/src/qs8-igemm/ |
D | MRx4c8-wasmsimd.c.in | 139 const v128_t vsign${M}x0123 = wasm_i32x4_lt(vacc${M}x0123, vzero); 142 const v128_t vacc${M}x01 = wasm_v32x4_shuffle(vacc${M}x0123, vsign${M}x0123, 0, 4, 1, 5); 148 const v128_t vacc${M}x23 = wasm_v32x4_shuffle(vacc${M}x0123, vsign${M}x0123, 2, 6, 3, 7);
|