Home
last modified time | relevance | path

Searched refs:_mm512_madd_epi16 (Results 1 – 16 of 16) sorted by relevance

/external/XNNPACK/src/qs8-gemm/gen/
D4x16c8-minmax-avx512skx.c104 vacc0x0123 = _mm512_add_epi32(vacc0x0123, _mm512_madd_epi16(va0, vb0123)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx()
105 vacc1x0123 = _mm512_add_epi32(vacc1x0123, _mm512_madd_epi16(va1, vb0123)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx()
106 vacc2x0123 = _mm512_add_epi32(vacc2x0123, _mm512_madd_epi16(va2, vb0123)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx()
107 vacc3x0123 = _mm512_add_epi32(vacc3x0123, _mm512_madd_epi16(va3, vb0123)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx()
110 vacc0x4567 = _mm512_add_epi32(vacc0x4567, _mm512_madd_epi16(va0, vb4567)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx()
111 vacc1x4567 = _mm512_add_epi32(vacc1x4567, _mm512_madd_epi16(va1, vb4567)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx()
112 vacc2x4567 = _mm512_add_epi32(vacc2x4567, _mm512_madd_epi16(va2, vb4567)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx()
113 vacc3x4567 = _mm512_add_epi32(vacc3x4567, _mm512_madd_epi16(va3, vb4567)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx()
116 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx()
117 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx()
[all …]
D3x16c8-minmax-avx512skx.c92 vacc0x0123 = _mm512_add_epi32(vacc0x0123, _mm512_madd_epi16(va0, vb0123)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx()
93 vacc1x0123 = _mm512_add_epi32(vacc1x0123, _mm512_madd_epi16(va1, vb0123)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx()
94 vacc2x0123 = _mm512_add_epi32(vacc2x0123, _mm512_madd_epi16(va2, vb0123)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx()
97 vacc0x4567 = _mm512_add_epi32(vacc0x4567, _mm512_madd_epi16(va0, vb4567)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx()
98 vacc1x4567 = _mm512_add_epi32(vacc1x4567, _mm512_madd_epi16(va1, vb4567)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx()
99 vacc2x4567 = _mm512_add_epi32(vacc2x4567, _mm512_madd_epi16(va2, vb4567)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx()
102 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx()
103 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx()
104 vacc2x89AB = _mm512_add_epi32(vacc2x89AB, _mm512_madd_epi16(va2, vb89AB)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx()
107 vacc0xCDEF = _mm512_add_epi32(vacc0xCDEF, _mm512_madd_epi16(va0, vbCDEF)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx()
[all …]
D2x16c8-minmax-avx512skx.c80 vacc0x0123 = _mm512_add_epi32(vacc0x0123, _mm512_madd_epi16(va0, vb0123)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx()
81 vacc1x0123 = _mm512_add_epi32(vacc1x0123, _mm512_madd_epi16(va1, vb0123)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx()
84 vacc0x4567 = _mm512_add_epi32(vacc0x4567, _mm512_madd_epi16(va0, vb4567)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx()
85 vacc1x4567 = _mm512_add_epi32(vacc1x4567, _mm512_madd_epi16(va1, vb4567)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx()
88 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx()
89 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx()
92 vacc0xCDEF = _mm512_add_epi32(vacc0xCDEF, _mm512_madd_epi16(va0, vbCDEF)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx()
93 vacc1xCDEF = _mm512_add_epi32(vacc1xCDEF, _mm512_madd_epi16(va1, vbCDEF)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx()
D1x16c8-minmax-avx512skx.c68 vacc0x0123 = _mm512_add_epi32(vacc0x0123, _mm512_madd_epi16(va0, vb0123)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx()
71 vacc0x4567 = _mm512_add_epi32(vacc0x4567, _mm512_madd_epi16(va0, vb4567)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx()
74 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx()
77 vacc0xCDEF = _mm512_add_epi32(vacc0xCDEF, _mm512_madd_epi16(va0, vbCDEF)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx()
/external/XNNPACK/src/qs8-igemm/gen/
D4x16c8-minmax-avx512skx.c119 vacc0x0123 = _mm512_add_epi32(vacc0x0123, _mm512_madd_epi16(va0, vb0123)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx()
120 vacc1x0123 = _mm512_add_epi32(vacc1x0123, _mm512_madd_epi16(va1, vb0123)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx()
121 vacc2x0123 = _mm512_add_epi32(vacc2x0123, _mm512_madd_epi16(va2, vb0123)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx()
122 vacc3x0123 = _mm512_add_epi32(vacc3x0123, _mm512_madd_epi16(va3, vb0123)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx()
125 vacc0x4567 = _mm512_add_epi32(vacc0x4567, _mm512_madd_epi16(va0, vb4567)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx()
126 vacc1x4567 = _mm512_add_epi32(vacc1x4567, _mm512_madd_epi16(va1, vb4567)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx()
127 vacc2x4567 = _mm512_add_epi32(vacc2x4567, _mm512_madd_epi16(va2, vb4567)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx()
128 vacc3x4567 = _mm512_add_epi32(vacc3x4567, _mm512_madd_epi16(va3, vb4567)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx()
131 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx()
132 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx()
[all …]
D3x16c8-minmax-avx512skx.c105 vacc0x0123 = _mm512_add_epi32(vacc0x0123, _mm512_madd_epi16(va0, vb0123)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx()
106 vacc1x0123 = _mm512_add_epi32(vacc1x0123, _mm512_madd_epi16(va1, vb0123)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx()
107 vacc2x0123 = _mm512_add_epi32(vacc2x0123, _mm512_madd_epi16(va2, vb0123)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx()
110 vacc0x4567 = _mm512_add_epi32(vacc0x4567, _mm512_madd_epi16(va0, vb4567)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx()
111 vacc1x4567 = _mm512_add_epi32(vacc1x4567, _mm512_madd_epi16(va1, vb4567)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx()
112 vacc2x4567 = _mm512_add_epi32(vacc2x4567, _mm512_madd_epi16(va2, vb4567)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx()
115 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx()
116 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx()
117 vacc2x89AB = _mm512_add_epi32(vacc2x89AB, _mm512_madd_epi16(va2, vb89AB)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx()
120 vacc0xCDEF = _mm512_add_epi32(vacc0xCDEF, _mm512_madd_epi16(va0, vbCDEF)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx()
[all …]
D2x16c8-minmax-avx512skx.c91 vacc0x0123 = _mm512_add_epi32(vacc0x0123, _mm512_madd_epi16(va0, vb0123)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx()
92 vacc1x0123 = _mm512_add_epi32(vacc1x0123, _mm512_madd_epi16(va1, vb0123)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx()
95 vacc0x4567 = _mm512_add_epi32(vacc0x4567, _mm512_madd_epi16(va0, vb4567)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx()
96 vacc1x4567 = _mm512_add_epi32(vacc1x4567, _mm512_madd_epi16(va1, vb4567)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx()
99 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx()
100 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx()
103 vacc0xCDEF = _mm512_add_epi32(vacc0xCDEF, _mm512_madd_epi16(va0, vbCDEF)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx()
104 vacc1xCDEF = _mm512_add_epi32(vacc1xCDEF, _mm512_madd_epi16(va1, vbCDEF)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx()
D1x16c8-minmax-avx512skx.c77 vacc0x0123 = _mm512_add_epi32(vacc0x0123, _mm512_madd_epi16(va0, vb0123)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx()
80 vacc0x4567 = _mm512_add_epi32(vacc0x4567, _mm512_madd_epi16(va0, vb4567)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx()
83 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx()
86 vacc0xCDEF = _mm512_add_epi32(vacc0xCDEF, _mm512_madd_epi16(va0, vbCDEF)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx()
/external/ruy/ruy/
Dkernel_avx512.cc325 accum, _mm512_madd_epi16(lhs_16_bit_low, rhs_16_bit_dup_low));
327 accum, _mm512_madd_epi16(lhs_16_bit_high, rhs_16_bit_dup_high));
759 accum_v, _mm512_madd_epi16(lhs_16_bit_low, rhs_16_bit_dup_low));
761 accum_v, _mm512_madd_epi16(lhs_16_bit_high, rhs_16_bit_dup_high));
Dpack_avx512.cc253 _mm512_madd_epi16(sums_8x4_16bit, ones_16bit);
404 _mm512_madd_epi16(sums_8x4_16bit, ones_16bit);
/external/XNNPACK/src/qs8-gemm/
DMRx16c8-avx512skx.c.in105 …vacc${M}x${ABC[N:N+4]} = _mm512_add_epi32(vacc${M}x${ABC[N:N+4]}, _mm512_madd_epi16(va${M}, vb${AB…
/external/XNNPACK/src/qs8-igemm/
DMRx16c8-avx512skx.c.in111 …vacc${M}x${ABC[N:N+4]} = _mm512_add_epi32(vacc${M}x${ABC[N:N+4]}, _mm512_madd_epi16(va${M}, vb${AB…
/external/llvm-project/clang/lib/Headers/
Davx512bwintrin.h1160 _mm512_madd_epi16(__m512i __A, __m512i __B) { in _mm512_madd_epi16() function
1167 (__v16si)_mm512_madd_epi16(__A, __B), in _mm512_mask_madd_epi16()
1174 (__v16si)_mm512_madd_epi16(__A, __B), in _mm512_maskz_madd_epi16()
/external/clang/test/CodeGen/
Davx512bw-builtins.c933 return _mm512_madd_epi16(__A,__B); in test_mm512_madd_epi16()
/external/llvm-project/clang/test/CodeGen/X86/
Davx512bw-builtins.c1412 return _mm512_madd_epi16(__A,__B); in test_mm512_madd_epi16()
/external/clang/lib/Headers/
Davx512bwintrin.h1310 _mm512_madd_epi16 (__m512i __A, __m512i __B) { in _mm512_madd_epi16() function