Lines Matching refs:nr_block_offset
35 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f32_gemm_goi_w() local
36 packed_w[nr_block_offset] = b[nr_block_start + nr_block_offset]; in xnn_pack_f32_gemm_goi_w()
42 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f32_gemm_goi_w() local
45 …k[(nr_block_start + nr_block_offset) * kc + round_down_po2(kr_block_start, skr) + ((kr_block_start… in xnn_pack_f32_gemm_goi_w()
53 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f32_gemm_goi_w() local
56 k[(nr_block_start + nr_block_offset) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_f32_gemm_goi_w()
89 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f16_gemm_goi_w() local
90 packed_w[nr_block_offset] = b[nr_block_start + nr_block_offset]; in xnn_pack_f16_gemm_goi_w()
96 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f16_gemm_goi_w() local
99 …k[(nr_block_start + nr_block_offset) * kc + round_down_po2(kr_block_start, skr) + ((kr_block_start… in xnn_pack_f16_gemm_goi_w()
107 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f16_gemm_goi_w() local
110 k[(nr_block_start + nr_block_offset) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_f16_gemm_goi_w()
144 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_qu8_gemm_goi_w() local
145 *((int32_t*) packed_w) = b[nr_block_start + nr_block_offset] + boff; in xnn_pack_qu8_gemm_goi_w()
158 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_qu8_gemm_goi_w() local
161 …const uint8_t kv = k[(nr_block_start + nr_block_offset) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_qu8_gemm_goi_w()
166 packed_b[nr_block_offset] -= ksum * izp; in xnn_pack_qu8_gemm_goi_w()
198 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_qs8_gemm_goi_w() local
199 *((int32_t*) packed_w) = b[nr_block_start + nr_block_offset]; in xnn_pack_qs8_gemm_goi_w()
212 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_qs8_gemm_goi_w() local
215 … const int8_t kv = k[(nr_block_start + nr_block_offset) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_qs8_gemm_goi_w()
220 packed_b[nr_block_offset] -= ksum * izp; in xnn_pack_qs8_gemm_goi_w()
252 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_qs8_gemm_xw_goi_w() local
253 *((int32_t*) packed_w) = b[nr_block_start + nr_block_offset]; in xnn_pack_qs8_gemm_xw_goi_w()
266 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_qs8_gemm_xw_goi_w() local
269 … const int8_t kv = k[(nr_block_start + nr_block_offset) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_qs8_gemm_xw_goi_w()
274 packed_b[nr_block_offset] -= ksum * izp; in xnn_pack_qs8_gemm_xw_goi_w()
304 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f32_gemm_io_w() local
305 packed_w[nr_block_offset] = b[nr_block_start + nr_block_offset]; in xnn_pack_f32_gemm_io_w()
311 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f32_gemm_io_w() local
314 …rt, skr) + ((kr_block_start + nr_block_offset * kr) & sr_mask) + kr_block_offset) * nc + (nr_block… in xnn_pack_f32_gemm_io_w()
322 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f32_gemm_io_w() local
325 k[(kr_block_start + kr_block_offset) * nc + (nr_block_start + nr_block_offset)]; in xnn_pack_f32_gemm_io_w()
351 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f16_gemm_io_w() local
352 packed_w[nr_block_offset] = b[nr_block_start + nr_block_offset]; in xnn_pack_f16_gemm_io_w()
358 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f16_gemm_io_w() local
361 …rt, skr) + ((kr_block_start + nr_block_offset * kr) & sr_mask) + kr_block_offset) * nc + (nr_block… in xnn_pack_f16_gemm_io_w()
369 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f16_gemm_io_w() local
372 k[(kr_block_start + kr_block_offset) * nc + (nr_block_start + nr_block_offset)]; in xnn_pack_f16_gemm_io_w()
399 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_qu8_gemm_io_w() local
400 *((int32_t*) packed_w) = b[nr_block_start + nr_block_offset] + boff; in xnn_pack_qu8_gemm_io_w()
413 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_qu8_gemm_io_w() local
416 …const uint8_t kv = k[(kr_block_start + kr_block_offset) * nc + (nr_block_start + nr_block_offset)]; in xnn_pack_qu8_gemm_io_w()
421 packed_b[nr_block_offset] -= ksum * izp; in xnn_pack_qu8_gemm_io_w()
449 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f32_conv_goki_w() local
450 packed_w[nr_block_offset] = b[nr_block_start + nr_block_offset]; in xnn_pack_f32_conv_goki_w()
457 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f32_conv_goki_w() local
460 …k[((nr_block_start + nr_block_offset) * ks + ki) * kc + round_down_po2(kr_block_start, skr) + ((kr… in xnn_pack_f32_conv_goki_w()
468 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f32_conv_goki_w() local
471 … k[((nr_block_start + nr_block_offset) * ks + ki) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_f32_conv_goki_w()
506 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f16_conv_goki_w() local
507 packed_w[nr_block_offset] = b[nr_block_start + nr_block_offset]; in xnn_pack_f16_conv_goki_w()
514 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f16_conv_goki_w() local
517 …k[((nr_block_start + nr_block_offset) * ks + ki) * kc + round_down_po2(kr_block_start, skr) + ((kr… in xnn_pack_f16_conv_goki_w()
525 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f16_conv_goki_w() local
528 … k[((nr_block_start + nr_block_offset) * ks + ki) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_f16_conv_goki_w()
564 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_qu8_conv_goki_w() local
565 *((int32_t*) packed_w) = b[nr_block_start + nr_block_offset] + boff; in xnn_pack_qu8_conv_goki_w()
579 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_qu8_conv_goki_w() local
583 … k[((nr_block_start + nr_block_offset) * ks + ki) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_qu8_conv_goki_w()
588 packed_b[nr_block_offset] -= ksum * izp; in xnn_pack_qu8_conv_goki_w()
622 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_qs8_conv_goki_w() local
623 *((int32_t*) packed_w) = b[nr_block_start + nr_block_offset]; in xnn_pack_qs8_conv_goki_w()
637 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_qs8_conv_goki_w() local
641 … k[((nr_block_start + nr_block_offset) * ks + ki) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_qs8_conv_goki_w()
646 packed_b[nr_block_offset] -= ksum * izp; in xnn_pack_qs8_conv_goki_w()
675 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f32_conv_kgo_w() local
676 packed_w[nr_block_offset] = b[nr_block_start + nr_block_offset]; in xnn_pack_f32_conv_kgo_w()
681 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f32_conv_kgo_w() local
683 k[ki * g * nc + (nr_block_start + nr_block_offset)]; in xnn_pack_f32_conv_kgo_w()
711 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f16_conv_kgo_w() local
712 packed_w[nr_block_offset] = b[nr_block_start + nr_block_offset]; in xnn_pack_f16_conv_kgo_w()
717 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f16_conv_kgo_w() local
719 k[ki * g * nc + (nr_block_start + nr_block_offset)]; in xnn_pack_f16_conv_kgo_w()
750 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_qu8_conv_kgo_w() local
751 *((int32_t*) packed_w) = b[nr_block_start + nr_block_offset] + boff; in xnn_pack_qu8_conv_kgo_w()
763 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_qu8_conv_kgo_w() local
765 k[ki * g * nc + (nr_block_start + nr_block_offset)]; in xnn_pack_qu8_conv_kgo_w()
767 packed_b[nr_block_offset] -= (int32_t) kv * izp; in xnn_pack_qu8_conv_kgo_w()
797 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_qs8_conv_kgo_w() local
798 *((int32_t*) packed_w) = b[nr_block_start + nr_block_offset]; in xnn_pack_qs8_conv_kgo_w()
810 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_qs8_conv_kgo_w() local
812 k[ki * g * nc + (nr_block_start + nr_block_offset)]; in xnn_pack_qs8_conv_kgo_w()
814 packed_b[nr_block_offset] -= (int32_t) kv * izp; in xnn_pack_qs8_conv_kgo_w()
856 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f32_deconv_goki_w() local
857 packed_w[nr_block_offset] = b[nr_block_start + nr_block_offset]; in xnn_pack_f32_deconv_goki_w()
864 … for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f32_deconv_goki_w() local
867 …nr_block_start + nr_block_offset) * kh + ky) * kw + kx) * kc + round_down_po2(kr_block_start, skr)… in xnn_pack_f32_deconv_goki_w()
875 … for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f32_deconv_goki_w() local
878 …k[(((nr_block_start + nr_block_offset) * kh + ky) * kw + kx) * kc + (kr_block_start + kr_block_off… in xnn_pack_f32_deconv_goki_w()
925 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f16_deconv_goki_w() local
926 packed_w[nr_block_offset] = b[nr_block_start + nr_block_offset]; in xnn_pack_f16_deconv_goki_w()
933 … for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f16_deconv_goki_w() local
936 …nr_block_start + nr_block_offset) * kh + ky) * kw + kx) * kc + round_down_po2(kr_block_start, skr)… in xnn_pack_f16_deconv_goki_w()
944 … for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f16_deconv_goki_w() local
947 …k[(((nr_block_start + nr_block_offset) * kh + ky) * kw + kx) * kc + (kr_block_start + kr_block_off… in xnn_pack_f16_deconv_goki_w()
996 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_qu8_deconv_goki_w() local
997 *((int32_t*) packed_w) = b[nr_block_start + nr_block_offset] + boff; in xnn_pack_qu8_deconv_goki_w()
1012 … for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_qu8_deconv_goki_w() local
1016 …k[(((nr_block_start + nr_block_offset) * kh + ky) * kw + kx) * kc + (kr_block_start + kr_block_off… in xnn_pack_qu8_deconv_goki_w()
1021 packed_b[nr_block_offset] -= ksum * izp; in xnn_pack_qu8_deconv_goki_w()
1363 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f32_gemminc_goi_w() local
1366 …k[(nr_block_start + nr_block_offset) * kc + round_down_po2(kr_block_start, skr) + ((kr_block_start… in xnn_pack_f32_gemminc_goi_w()
1374 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f32_gemminc_goi_w() local
1377 k[(nr_block_start + nr_block_offset) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_f32_gemminc_goi_w()
1407 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f16_gemminc_goi_w() local
1410 …k[(nr_block_start + nr_block_offset) * kc + round_down_po2(kr_block_start, skr) + ((kr_block_start… in xnn_pack_f16_gemminc_goi_w()
1418 for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) { in xnn_pack_f16_gemminc_goi_w() local
1421 k[(nr_block_start + nr_block_offset) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_f16_gemminc_goi_w()
1446 for (size_t nr_block_offset = 0; nr_block_offset < nr; nr_block_offset++) { in xnn_pack_f32_dconv_oki_w() local
1447 *packed_w++ = b[min(nr_block_offset, nr_block_size - 1)]; in xnn_pack_f32_dconv_oki_w()
1459 for (size_t nr_block_offset = 0; nr_block_offset < nr; nr_block_offset++) { in xnn_pack_f32_dconv_oki_w() local
1460 …*packed_w++ = k[(((nr_block_start + min(nr_block_offset, nr_block_size - 1)) * kh + ky) * kw + kx)… in xnn_pack_f32_dconv_oki_w()
1485 for (size_t nr_block_offset = 0; nr_block_offset < nr; nr_block_offset++) { in xnn_pack_f16_dconv_oki_w() local
1486 *packed_w++ = b[min(nr_block_offset, nr_block_size - 1)]; in xnn_pack_f16_dconv_oki_w()
1498 for (size_t nr_block_offset = 0; nr_block_offset < nr; nr_block_offset++) { in xnn_pack_f16_dconv_oki_w() local
1499 …*packed_w++ = k[(((nr_block_start + min(nr_block_offset, nr_block_size - 1)) * kh + ky) * kw + kx)… in xnn_pack_f16_dconv_oki_w()