/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-avx2-mul32-ld64-x16.c | 25 …const __m256i vzero_point_product = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) pa… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16() 26 …const __m256i vx_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16() 27 …const __m256i vy_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16() 28 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16() 29 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16() 31 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16() 32 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16() 33 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16()
|
D | minmax-avx2-mul32-ld64-x24.c | 25 …const __m256i vzero_point_product = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) pa… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24() 26 …const __m256i vx_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24() 27 …const __m256i vy_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24() 28 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24() 29 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24() 31 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24() 32 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24() 33 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24()
|
D | minmax-avx2-mul32-ld64-x32.c | 25 …const __m256i vzero_point_product = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) pa… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32() 26 …const __m256i vx_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32() 27 …const __m256i vy_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32() 28 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32() 29 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32() 31 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32() 32 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32() 33 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32()
|
D | minmax-avx2-mul32-ld64-x8.c | 25 …const __m256i vzero_point_product = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) pa… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() 26 …const __m256i vx_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() 27 …const __m256i vy_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() 28 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() 29 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
|
/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-avx2-mul32-ld64-x16.c | 25 …const __m256i vx_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16() 26 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16() 27 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16() 29 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16() 30 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16() 31 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16() 33 __m256i vzero_point_product = _mm256_broadcastsi128_si256(_mm_add_epi32( in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
|
D | minmax-avx2-mul32-ld64-x24.c | 25 …const __m256i vx_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24() 26 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24() 27 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24() 29 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24() 30 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24() 31 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24() 33 __m256i vzero_point_product = _mm256_broadcastsi128_si256(_mm_add_epi32( in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24()
|
D | minmax-avx2-mul32-ld64-x32.c | 25 …const __m256i vx_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32() 26 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32() 27 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32() 29 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32() 30 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32() 31 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32() 33 __m256i vzero_point_product = _mm256_broadcastsi128_si256(_mm_add_epi32( in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
|
D | minmax-avx2-mul32-ld64-x8.c | 25 …const __m256i vx_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8() 26 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8() 27 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8() 33 __m256i vzero_point_product = _mm256_broadcastsi128_si256(_mm_add_epi32( in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x8c8-minmax-avx2.c | 94 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2() 95 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2() 108 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2() 112 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2() 117 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2() 122 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2() 123 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
|
D | 1x8c8-xw-minmax-avx2.c | 90 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2() 91 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2() 104 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2() 108 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2() 113 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2() 118 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2() 119 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2()
|
D | 2x8c8-minmax-avx2.c | 115 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2() 116 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2() 135 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2() 141 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2() 148 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2() 153 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2() 154 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
|
D | 2x8c8-xw-minmax-avx2.c | 111 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2() 112 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2() 131 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2() 137 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2() 144 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2() 149 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2() 150 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
|
D | 3x8c8-minmax-avx2.c | 136 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2() 137 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2() 162 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2() 170 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2() 179 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2() 186 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2() 187 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
|
D | 3x8c8-xw-minmax-avx2.c | 132 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2() 133 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2() 158 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2() 166 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2() 175 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2() 182 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2() 183 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x8c8-minmax-avx2.c | 107 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2() 108 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2() 121 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2() 125 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2() 130 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2() 135 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2() 136 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
|
D | 2x8c8-minmax-avx2.c | 130 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2() 131 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2() 150 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2() 156 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2() 163 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2() 168 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2() 169 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
|
D | 3x8c8-minmax-avx2.c | 153 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2() 154 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2() 179 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2() 187 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2() 196 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2() 203 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2() 204 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up16x9-minmax-avx2-mul32.c | 170 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32() 171 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32() 189 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32() 195 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32() 202 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32() 205 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32() 206 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32() 277 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32() 278 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32() 290 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32() [all …]
|
D | up16x9-minmax-avx2-mul16.c | 170 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() 171 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() 189 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() 195 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() 202 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() 205 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() 206 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() 293 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() 294 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() 312 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() [all …]
|
D | up8x9-minmax-avx2-mul32.c | 142 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32() 143 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32() 155 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32() 159 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32() 227 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32() 228 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32() 240 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32() 244 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32()
|
D | up24x9-minmax-avx2-mul32.c | 198 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32() 199 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32() 223 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32() 231 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32() 240 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32() 244 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32() 245 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32() 319 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32() 320 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32() 332 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32() [all …]
|
/external/XNNPACK/src/qs8-vadd/ |
D | avx2-mul32-ld64.c.in | 24 …const __m256i vzero_point_product = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) pa… 25 …const __m256i vx_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… 26 …const __m256i vy_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… 27 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… 28 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… 31 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… 32 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… 33 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss…
|
/external/XNNPACK/src/qs8-dwconv/ |
D | unipass-avx2-mul32.c.in | 63 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… 64 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… 80 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… 85 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… 92 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… 104 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… 105 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… 159 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… 160 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… 172 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… [all …]
|
D | unipass-avx2-mul16.c.in | 66 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… 67 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… 83 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… 88 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… 94 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… 98 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… 99 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… 140 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… 141 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… 159 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… [all …]
|
/external/XNNPACK/src/qs8-vaddc/ |
D | avx2-mul32-ld64.c.in | 24 …const __m256i vx_multiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->… 25 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… 26 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… 29 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… 30 …const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… 31 …const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… 37 __m256i vzero_point_product = _mm256_broadcastsi128_si256(_mm_add_epi32(
|