Home
last modified time | relevance | path

Searched refs:_mm256_broadcastsi128_si256 (Results 1 – 25 of 49) sorted by relevance

12

/external/XNNPACK/src/qs8-vadd/gen/
Dminmax-avx2-mul32-ld64-x16.c25 …const __m256i vzero_point_product = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) pa… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16()
26 …const __m256i vx_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16()
27 …const __m256i vy_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16()
28 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16()
29 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16()
31 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16()
32 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16()
33 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16()
Dminmax-avx2-mul32-ld64-x24.c25 …const __m256i vzero_point_product = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) pa… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24()
26 …const __m256i vx_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24()
27 …const __m256i vy_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24()
28 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24()
29 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24()
31 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24()
32 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24()
33 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24()
Dminmax-avx2-mul32-ld64-x32.c25 …const __m256i vzero_point_product = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) pa… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32()
26 …const __m256i vx_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32()
27 …const __m256i vy_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32()
28 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32()
29 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32()
31 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32()
32 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32()
33 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32()
Dminmax-avx2-mul32-ld64-x8.c25 …const __m256i vzero_point_product = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) pa… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
26 …const __m256i vx_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
27 …const __m256i vy_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
28 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
29 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
/external/XNNPACK/src/qs8-vaddc/gen/
Dminmax-avx2-mul32-ld64-x16.c25 …const __m256i vx_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
26 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
27 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
29 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
30 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
31 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
33 __m256i vzero_point_product = _mm256_broadcastsi128_si256(_mm_add_epi32( in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
Dminmax-avx2-mul32-ld64-x24.c25 …const __m256i vx_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24()
26 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24()
27 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24()
29 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24()
30 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24()
31 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24()
33 __m256i vzero_point_product = _mm256_broadcastsi128_si256(_mm_add_epi32( in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24()
Dminmax-avx2-mul32-ld64-x32.c25 …const __m256i vx_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
26 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
27 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
29 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
30 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
31 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
33 __m256i vzero_point_product = _mm256_broadcastsi128_si256(_mm_add_epi32( in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
Dminmax-avx2-mul32-ld64-x8.c25 …const __m256i vx_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
26 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
27 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
33 __m256i vzero_point_product = _mm256_broadcastsi128_si256(_mm_add_epi32( in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
/external/XNNPACK/src/qs8-gemm/gen/
D1x8c8-minmax-avx2.c94 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
95 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
108 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
112 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
117 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
122 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
123 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
D1x8c8-xw-minmax-avx2.c90 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2()
91 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2()
104 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2()
108 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2()
113 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2()
118 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2()
119 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2()
D2x8c8-minmax-avx2.c115 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
116 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
135 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
141 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
148 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
153 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
154 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
D2x8c8-xw-minmax-avx2.c111 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
112 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
131 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
137 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
144 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
149 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
150 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
D3x8c8-minmax-avx2.c136 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
137 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
162 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
170 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
179 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
186 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
187 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
D3x8c8-xw-minmax-avx2.c132 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
133 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
158 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
166 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
175 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
182 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
183 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
/external/XNNPACK/src/qs8-igemm/gen/
D1x8c8-minmax-avx2.c107 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
108 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
121 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
125 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
130 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
135 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
136 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
D2x8c8-minmax-avx2.c130 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
131 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
150 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
156 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
163 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
168 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
169 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
D3x8c8-minmax-avx2.c153 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
154 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
179 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
187 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
196 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
203 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
204 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
/external/XNNPACK/src/qs8-dwconv/gen/
Dup16x9-minmax-avx2-mul32.c170 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32()
171 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32()
189 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32()
195 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32()
202 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32()
205 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32()
206 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32()
277 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32()
278 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32()
290 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32()
[all …]
Dup16x9-minmax-avx2-mul16.c170 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16()
171 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16()
189 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16()
195 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16()
202 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16()
205 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16()
206 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16()
293 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16()
294 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16()
312 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16()
[all …]
Dup8x9-minmax-avx2-mul32.c142 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32()
143 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32()
155 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32()
159 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32()
227 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32()
228 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32()
240 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32()
244 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32()
Dup24x9-minmax-avx2-mul32.c198 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32()
199 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32()
223 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32()
231 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32()
240 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32()
244 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32()
245 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32()
319 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32()
320 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32()
332 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32()
[all …]
/external/XNNPACK/src/qs8-vadd/
Davx2-mul32-ld64.c.in24 …const __m256i vzero_point_product = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) pa…
25 …const __m256i vx_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->…
26 …const __m256i vy_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->…
27 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params…
28 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p…
31 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par…
32 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss…
33 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss…
/external/XNNPACK/src/qs8-dwconv/
Dunipass-avx2-mul32.c.in63 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss…
64 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2…
80 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params…
85 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p…
92 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par…
104 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss…
105 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss…
159 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss…
160 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2…
172 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params…
[all …]
Dunipass-avx2-mul16.c.in66 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss…
67 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2…
83 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params…
88 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p…
94 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par…
98 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss…
99 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss…
140 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss…
141 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2…
159 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params…
[all …]
/external/XNNPACK/src/qs8-vaddc/
Davx2-mul32-ld64.c.in24 …const __m256i vx_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->…
25 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params…
26 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p…
29 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par…
30 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss…
31 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss…
37 __m256i vzero_point_product = _mm256_broadcastsi128_si256(_mm_add_epi32(

12