/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x16c8-minmax-avx512skx.c | 104 vacc0x0123 = _mm512_add_epi32(vacc0x0123, _mm512_madd_epi16(va0, vb0123)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx() 105 vacc1x0123 = _mm512_add_epi32(vacc1x0123, _mm512_madd_epi16(va1, vb0123)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx() 106 vacc2x0123 = _mm512_add_epi32(vacc2x0123, _mm512_madd_epi16(va2, vb0123)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx() 107 vacc3x0123 = _mm512_add_epi32(vacc3x0123, _mm512_madd_epi16(va3, vb0123)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx() 110 vacc0x4567 = _mm512_add_epi32(vacc0x4567, _mm512_madd_epi16(va0, vb4567)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx() 111 vacc1x4567 = _mm512_add_epi32(vacc1x4567, _mm512_madd_epi16(va1, vb4567)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx() 112 vacc2x4567 = _mm512_add_epi32(vacc2x4567, _mm512_madd_epi16(va2, vb4567)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx() 113 vacc3x4567 = _mm512_add_epi32(vacc3x4567, _mm512_madd_epi16(va3, vb4567)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx() 116 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx() 117 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx() [all …]
|
D | 3x16c8-minmax-avx512skx.c | 92 vacc0x0123 = _mm512_add_epi32(vacc0x0123, _mm512_madd_epi16(va0, vb0123)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx() 93 vacc1x0123 = _mm512_add_epi32(vacc1x0123, _mm512_madd_epi16(va1, vb0123)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx() 94 vacc2x0123 = _mm512_add_epi32(vacc2x0123, _mm512_madd_epi16(va2, vb0123)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx() 97 vacc0x4567 = _mm512_add_epi32(vacc0x4567, _mm512_madd_epi16(va0, vb4567)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx() 98 vacc1x4567 = _mm512_add_epi32(vacc1x4567, _mm512_madd_epi16(va1, vb4567)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx() 99 vacc2x4567 = _mm512_add_epi32(vacc2x4567, _mm512_madd_epi16(va2, vb4567)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx() 102 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx() 103 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx() 104 vacc2x89AB = _mm512_add_epi32(vacc2x89AB, _mm512_madd_epi16(va2, vb89AB)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx() 107 vacc0xCDEF = _mm512_add_epi32(vacc0xCDEF, _mm512_madd_epi16(va0, vbCDEF)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx() [all …]
|
D | 2x16c8-minmax-avx512skx.c | 80 vacc0x0123 = _mm512_add_epi32(vacc0x0123, _mm512_madd_epi16(va0, vb0123)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx() 81 vacc1x0123 = _mm512_add_epi32(vacc1x0123, _mm512_madd_epi16(va1, vb0123)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx() 84 vacc0x4567 = _mm512_add_epi32(vacc0x4567, _mm512_madd_epi16(va0, vb4567)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx() 85 vacc1x4567 = _mm512_add_epi32(vacc1x4567, _mm512_madd_epi16(va1, vb4567)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx() 88 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx() 89 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx() 92 vacc0xCDEF = _mm512_add_epi32(vacc0xCDEF, _mm512_madd_epi16(va0, vbCDEF)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx() 93 vacc1xCDEF = _mm512_add_epi32(vacc1xCDEF, _mm512_madd_epi16(va1, vbCDEF)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx()
|
D | 1x16c8-minmax-avx512skx.c | 68 vacc0x0123 = _mm512_add_epi32(vacc0x0123, _mm512_madd_epi16(va0, vb0123)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx() 71 vacc0x4567 = _mm512_add_epi32(vacc0x4567, _mm512_madd_epi16(va0, vb4567)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx() 74 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx() 77 vacc0xCDEF = _mm512_add_epi32(vacc0xCDEF, _mm512_madd_epi16(va0, vbCDEF)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x16c8-minmax-avx512skx.c | 119 vacc0x0123 = _mm512_add_epi32(vacc0x0123, _mm512_madd_epi16(va0, vb0123)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx() 120 vacc1x0123 = _mm512_add_epi32(vacc1x0123, _mm512_madd_epi16(va1, vb0123)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx() 121 vacc2x0123 = _mm512_add_epi32(vacc2x0123, _mm512_madd_epi16(va2, vb0123)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx() 122 vacc3x0123 = _mm512_add_epi32(vacc3x0123, _mm512_madd_epi16(va3, vb0123)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx() 125 vacc0x4567 = _mm512_add_epi32(vacc0x4567, _mm512_madd_epi16(va0, vb4567)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx() 126 vacc1x4567 = _mm512_add_epi32(vacc1x4567, _mm512_madd_epi16(va1, vb4567)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx() 127 vacc2x4567 = _mm512_add_epi32(vacc2x4567, _mm512_madd_epi16(va2, vb4567)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx() 128 vacc3x4567 = _mm512_add_epi32(vacc3x4567, _mm512_madd_epi16(va3, vb4567)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx() 131 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx() 132 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx() [all …]
|
D | 3x16c8-minmax-avx512skx.c | 105 vacc0x0123 = _mm512_add_epi32(vacc0x0123, _mm512_madd_epi16(va0, vb0123)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx() 106 vacc1x0123 = _mm512_add_epi32(vacc1x0123, _mm512_madd_epi16(va1, vb0123)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx() 107 vacc2x0123 = _mm512_add_epi32(vacc2x0123, _mm512_madd_epi16(va2, vb0123)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx() 110 vacc0x4567 = _mm512_add_epi32(vacc0x4567, _mm512_madd_epi16(va0, vb4567)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx() 111 vacc1x4567 = _mm512_add_epi32(vacc1x4567, _mm512_madd_epi16(va1, vb4567)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx() 112 vacc2x4567 = _mm512_add_epi32(vacc2x4567, _mm512_madd_epi16(va2, vb4567)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx() 115 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx() 116 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx() 117 vacc2x89AB = _mm512_add_epi32(vacc2x89AB, _mm512_madd_epi16(va2, vb89AB)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx() 120 vacc0xCDEF = _mm512_add_epi32(vacc0xCDEF, _mm512_madd_epi16(va0, vbCDEF)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx() [all …]
|
D | 2x16c8-minmax-avx512skx.c | 91 vacc0x0123 = _mm512_add_epi32(vacc0x0123, _mm512_madd_epi16(va0, vb0123)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx() 92 vacc1x0123 = _mm512_add_epi32(vacc1x0123, _mm512_madd_epi16(va1, vb0123)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx() 95 vacc0x4567 = _mm512_add_epi32(vacc0x4567, _mm512_madd_epi16(va0, vb4567)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx() 96 vacc1x4567 = _mm512_add_epi32(vacc1x4567, _mm512_madd_epi16(va1, vb4567)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx() 99 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx() 100 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx() 103 vacc0xCDEF = _mm512_add_epi32(vacc0xCDEF, _mm512_madd_epi16(va0, vbCDEF)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx() 104 vacc1xCDEF = _mm512_add_epi32(vacc1xCDEF, _mm512_madd_epi16(va1, vbCDEF)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx()
|
D | 1x16c8-minmax-avx512skx.c | 77 vacc0x0123 = _mm512_add_epi32(vacc0x0123, _mm512_madd_epi16(va0, vb0123)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx() 80 vacc0x4567 = _mm512_add_epi32(vacc0x4567, _mm512_madd_epi16(va0, vb4567)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx() 83 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx() 86 vacc0xCDEF = _mm512_add_epi32(vacc0xCDEF, _mm512_madd_epi16(va0, vbCDEF)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx()
|
/external/ruy/ruy/ |
D | kernel_avx512.cc | 325 accum, _mm512_madd_epi16(lhs_16_bit_low, rhs_16_bit_dup_low)); 327 accum, _mm512_madd_epi16(lhs_16_bit_high, rhs_16_bit_dup_high)); 759 accum_v, _mm512_madd_epi16(lhs_16_bit_low, rhs_16_bit_dup_low)); 761 accum_v, _mm512_madd_epi16(lhs_16_bit_high, rhs_16_bit_dup_high));
|
D | pack_avx512.cc | 253 _mm512_madd_epi16(sums_8x4_16bit, ones_16bit); 404 _mm512_madd_epi16(sums_8x4_16bit, ones_16bit);
|
/external/XNNPACK/src/qs8-gemm/ |
D | MRx16c8-avx512skx.c.in | 105 …vacc${M}x${ABC[N:N+4]} = _mm512_add_epi32(vacc${M}x${ABC[N:N+4]}, _mm512_madd_epi16(va${M}, vb${AB…
|
/external/XNNPACK/src/qs8-igemm/ |
D | MRx16c8-avx512skx.c.in | 111 …vacc${M}x${ABC[N:N+4]} = _mm512_add_epi32(vacc${M}x${ABC[N:N+4]}, _mm512_madd_epi16(va${M}, vb${AB…
|
/external/llvm-project/clang/lib/Headers/ |
D | avx512bwintrin.h | 1160 _mm512_madd_epi16(__m512i __A, __m512i __B) { in _mm512_madd_epi16() function 1167 (__v16si)_mm512_madd_epi16(__A, __B), in _mm512_mask_madd_epi16() 1174 (__v16si)_mm512_madd_epi16(__A, __B), in _mm512_maskz_madd_epi16()
|
/external/clang/test/CodeGen/ |
D | avx512bw-builtins.c | 933 return _mm512_madd_epi16(__A,__B); in test_mm512_madd_epi16()
|
/external/llvm-project/clang/test/CodeGen/X86/ |
D | avx512bw-builtins.c | 1412 return _mm512_madd_epi16(__A,__B); in test_mm512_madd_epi16()
|
/external/clang/lib/Headers/ |
D | avx512bwintrin.h | 1310 _mm512_madd_epi16 (__m512i __A, __m512i __B) { in _mm512_madd_epi16() function
|