Home
last modified time | relevance | path

Searched refs:vaddw_s16 (Results 1 – 25 of 42) sorted by relevance

12

/external/XNNPACK/src/qs8-igemm/gen/
D4x16-minmax-neon-mull-addw-dup.c105 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
106 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
108 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
109 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
111 vacc2x0123 = vaddw_s16(vacc2x0123, vget_low_s16(vprod2x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
112 vacc2x4567 = vaddw_s16(vacc2x4567, vget_high_s16(vprod2x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
114 vacc3x0123 = vaddw_s16(vacc3x0123, vget_low_s16(vprod3x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
115 vacc3x4567 = vaddw_s16(vacc3x4567, vget_high_s16(vprod3x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
119 vacc0x89AB = vaddw_s16(vacc0x89AB, vget_low_s16(vprod0x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
120 vacc0xCDEF = vaddw_s16(vacc0xCDEF, vget_high_s16(vprod0x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
[all …]
D3x16-minmax-neon-mull-addw-dup.c92 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
93 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
95 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
96 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
98 vacc2x0123 = vaddw_s16(vacc2x0123, vget_low_s16(vprod2x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
99 vacc2x4567 = vaddw_s16(vacc2x4567, vget_high_s16(vprod2x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
103 vacc0x89AB = vaddw_s16(vacc0x89AB, vget_low_s16(vprod0x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
104 vacc0xCDEF = vaddw_s16(vacc0xCDEF, vget_high_s16(vprod0x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
106 vacc1x89AB = vaddw_s16(vacc1x89AB, vget_low_s16(vprod1x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
107 vacc1xCDEF = vaddw_s16(vacc1xCDEF, vget_high_s16(vprod1x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
[all …]
D4x8-minmax-neon-mull-addw-dup.c97 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup()
98 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup()
100 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup()
101 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup()
103 vacc2x0123 = vaddw_s16(vacc2x0123, vget_low_s16(vprod2x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup()
104 vacc2x4567 = vaddw_s16(vacc2x4567, vget_high_s16(vprod2x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup()
106 vacc3x0123 = vaddw_s16(vacc3x0123, vget_low_s16(vprod3x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup()
107 vacc3x4567 = vaddw_s16(vacc3x4567, vget_high_s16(vprod3x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup()
111 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup()
112 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup()
[all …]
D2x16-minmax-neon-mull-addw-dup.c79 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup()
80 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup()
82 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup()
83 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup()
87 vacc0x89AB = vaddw_s16(vacc0x89AB, vget_low_s16(vprod0x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup()
88 vacc0xCDEF = vaddw_s16(vacc0xCDEF, vget_high_s16(vprod0x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup()
90 vacc1x89AB = vaddw_s16(vacc1x89AB, vget_low_s16(vprod1x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup()
91 vacc1xCDEF = vaddw_s16(vacc1xCDEF, vget_high_s16(vprod1x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup()
95 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup()
96 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup()
[all …]
D3x8-minmax-neon-mull-addw-dup.c86 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup()
87 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup()
89 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup()
90 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup()
92 vacc2x0123 = vaddw_s16(vacc2x0123, vget_low_s16(vprod2x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup()
93 vacc2x4567 = vaddw_s16(vacc2x4567, vget_high_s16(vprod2x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup()
97 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup()
98 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup()
100 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup()
101 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup()
[all …]
D2x8-minmax-neon-mull-addw-dup.c75 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
76 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
78 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
79 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
83 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
84 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
86 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
87 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
91 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
92 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
[all …]
D1x16-minmax-neon-mull-addw-dup.c66 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup()
67 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup()
71 vacc0x89AB = vaddw_s16(vacc0x89AB, vget_low_s16(vprod0x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup()
72 vacc0xCDEF = vaddw_s16(vacc0xCDEF, vget_high_s16(vprod0x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup()
76 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup()
77 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup()
81 vacc0x89AB = vaddw_s16(vacc0x89AB, vget_low_s16(vprod0x89ABCDEFc1)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup()
82 vacc0xCDEF = vaddw_s16(vacc0xCDEF, vget_high_s16(vprod0x89ABCDEFc1)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup()
86 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup()
87 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup()
[all …]
D1x8-minmax-neon-mull-addw-dup.c64 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
65 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
69 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
70 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
74 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
75 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
79 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c3)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
80 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c3)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
84 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c4)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
85 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c4)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
[all …]
/external/XNNPACK/src/qs8-gemm/gen/
D4x16-minmax-neon-mull-addw-dup.c88 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
89 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
91 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
92 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
94 vacc2x0123 = vaddw_s16(vacc2x0123, vget_low_s16(vprod2x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
95 vacc2x4567 = vaddw_s16(vacc2x4567, vget_high_s16(vprod2x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
97 vacc3x0123 = vaddw_s16(vacc3x0123, vget_low_s16(vprod3x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
98 vacc3x4567 = vaddw_s16(vacc3x4567, vget_high_s16(vprod3x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
102 vacc0x89AB = vaddw_s16(vacc0x89AB, vget_low_s16(vprod0x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
103 vacc0xCDEF = vaddw_s16(vacc0xCDEF, vget_high_s16(vprod0x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
[all …]
D3x16-minmax-neon-mull-addw-dup.c77 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup()
78 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup()
80 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup()
81 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup()
83 vacc2x0123 = vaddw_s16(vacc2x0123, vget_low_s16(vprod2x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup()
84 vacc2x4567 = vaddw_s16(vacc2x4567, vget_high_s16(vprod2x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup()
88 vacc0x89AB = vaddw_s16(vacc0x89AB, vget_low_s16(vprod0x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup()
89 vacc0xCDEF = vaddw_s16(vacc0xCDEF, vget_high_s16(vprod0x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup()
91 vacc1x89AB = vaddw_s16(vacc1x89AB, vget_low_s16(vprod1x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup()
92 vacc1xCDEF = vaddw_s16(vacc1xCDEF, vget_high_s16(vprod1x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup()
[all …]
D4x8-minmax-neon-mull-addw-dup.c80 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup()
81 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup()
83 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup()
84 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup()
86 vacc2x0123 = vaddw_s16(vacc2x0123, vget_low_s16(vprod2x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup()
87 vacc2x4567 = vaddw_s16(vacc2x4567, vget_high_s16(vprod2x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup()
89 vacc3x0123 = vaddw_s16(vacc3x0123, vget_low_s16(vprod3x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup()
90 vacc3x4567 = vaddw_s16(vacc3x4567, vget_high_s16(vprod3x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup()
94 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup()
95 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup()
[all …]
D2x16-minmax-neon-mull-addw-dup.c66 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup()
67 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup()
69 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup()
70 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup()
74 vacc0x89AB = vaddw_s16(vacc0x89AB, vget_low_s16(vprod0x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup()
75 vacc0xCDEF = vaddw_s16(vacc0xCDEF, vget_high_s16(vprod0x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup()
77 vacc1x89AB = vaddw_s16(vacc1x89AB, vget_low_s16(vprod1x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup()
78 vacc1xCDEF = vaddw_s16(vacc1xCDEF, vget_high_s16(vprod1x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup()
82 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup()
83 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup()
[all …]
D3x8-minmax-neon-mull-addw-dup.c71 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup()
72 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup()
74 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup()
75 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup()
77 vacc2x0123 = vaddw_s16(vacc2x0123, vget_low_s16(vprod2x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup()
78 vacc2x4567 = vaddw_s16(vacc2x4567, vget_high_s16(vprod2x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup()
82 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup()
83 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup()
85 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup()
86 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup()
[all …]
D2x8-minmax-neon-mull-addw-dup.c62 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
63 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
65 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
66 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
70 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
71 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
73 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
74 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
78 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
79 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
[all …]
D1x16-minmax-neon-mull-addw-dup.c55 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup()
56 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup()
60 vacc0x89AB = vaddw_s16(vacc0x89AB, vget_low_s16(vprod0x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup()
61 vacc0xCDEF = vaddw_s16(vacc0xCDEF, vget_high_s16(vprod0x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup()
65 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup()
66 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup()
70 vacc0x89AB = vaddw_s16(vacc0x89AB, vget_low_s16(vprod0x89ABCDEFc1)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup()
71 vacc0xCDEF = vaddw_s16(vacc0xCDEF, vget_high_s16(vprod0x89ABCDEFc1)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup()
75 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup()
76 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup()
[all …]
D1x8-minmax-neon-mull-addw-dup.c53 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
54 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
58 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
59 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
63 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
64 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
68 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c3)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
69 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c3)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
73 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c4)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
74 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c4)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
[all …]
/external/XNNPACK/src/qs8-gavgpool/gen/
D7p7x-minmax-neon-c32-acc2.c101 const int32x4_t vacc0123 = vaddw_s16(vbias, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2()
102 const int32x4_t vacc4567 = vaddw_s16(vbias, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2()
103 const int32x4_t vacc89AB = vaddw_s16(vbias, vget_low_s16(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2()
104 const int32x4_t vaccCDEF = vaddw_s16(vbias, vget_high_s16(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2()
105 const int32x4_t vaccGHIJ = vaddw_s16(vbias, vget_low_s16(vacc0xGHIJKLMN)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2()
106 const int32x4_t vaccKLMN = vaddw_s16(vbias, vget_high_s16(vacc0xGHIJKLMN)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2()
107 const int32x4_t vaccOPQR = vaddw_s16(vbias, vget_low_s16(vacc0xOPQRSTUV)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2()
108 const int32x4_t vaccSTUV = vaddw_s16(vbias, vget_high_s16(vacc0xOPQRSTUV)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2()
139 const int32x4_t vacc0123 = vaddw_s16(vbias, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2()
140 const int32x4_t vacc4567 = vaddw_s16(vbias, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2()
[all …]
D7p7x-minmax-neon-c24-acc2.c88 const int32x4_t vacc0123 = vaddw_s16(vbias, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2()
89 const int32x4_t vacc4567 = vaddw_s16(vbias, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2()
90 const int32x4_t vacc89AB = vaddw_s16(vbias, vget_low_s16(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2()
91 const int32x4_t vaccCDEF = vaddw_s16(vbias, vget_high_s16(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2()
92 const int32x4_t vaccGHIJ = vaddw_s16(vbias, vget_low_s16(vacc0xGHIJKLMN)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2()
93 const int32x4_t vaccKLMN = vaddw_s16(vbias, vget_high_s16(vacc0xGHIJKLMN)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2()
122 const int32x4_t vacc0123 = vaddw_s16(vbias, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2()
123 const int32x4_t vacc4567 = vaddw_s16(vbias, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2()
195 vacc0123 = vaddw_s16(vacc0123, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2()
196 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2()
[all …]
D7p7x-minmax-neon-c16-acc2.c75 const int32x4_t vacc0123 = vaddw_s16(vbias, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2()
76 const int32x4_t vacc4567 = vaddw_s16(vbias, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2()
77 const int32x4_t vacc89AB = vaddw_s16(vbias, vget_low_s16(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2()
78 const int32x4_t vaccCDEF = vaddw_s16(vbias, vget_high_s16(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2()
134 vacc0123 = vaddw_s16(vacc0123, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2()
135 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2()
136 vacc89AB = vaddw_s16(vacc89AB, vget_low_s16(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2()
137 vaccCDEF = vaddw_s16(vaccCDEF, vget_high_s16(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2()
218 vacc0123 = vaddw_s16(vacc0123, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2()
219 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2()
[all …]
D7p7x-minmax-neon-c8-acc2.c62 const int32x4_t vacc0123 = vaddw_s16(vbias, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
63 const int32x4_t vacc4567 = vaddw_s16(vbias, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
102 vacc0123 = vaddw_s16(vacc0123, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
103 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
167 vacc0123 = vaddw_s16(vacc0123, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
168 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
247 vacc0123 = vaddw_s16(vacc0123, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
248 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
D7x-minmax-neon-c32-acc2.c124 int32x4_t vacc0123 = vaddw_s16(vbias, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
125 int32x4_t vacc4567 = vaddw_s16(vbias, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
126 int32x4_t vacc89AB = vaddw_s16(vbias, vget_low_s16(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
127 int32x4_t vaccCDEF = vaddw_s16(vbias, vget_high_s16(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
128 int32x4_t vaccGHIJ = vaddw_s16(vbias, vget_low_s16(vacc0xGHIJKLMN)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
129 int32x4_t vaccKLMN = vaddw_s16(vbias, vget_high_s16(vacc0xGHIJKLMN)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
130 int32x4_t vaccOPQR = vaddw_s16(vbias, vget_low_s16(vacc0xOPQRSTUV)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
131 int32x4_t vaccSTUV = vaddw_s16(vbias, vget_high_s16(vacc0xOPQRSTUV)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
296 int32x4_t vacc0123 = vaddw_s16(vbias, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
297 int32x4_t vacc4567 = vaddw_s16(vbias, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
/external/XNNPACK/src/qs8-igemm/
Dneon-mull-addw-dup.c.in85 …vacc${M}x${ABC[N:N+4]} = vaddw_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vprod${M}x${ABC[N:N+8]}c${…
86 …vacc${M}x${ABC[N+4:N+8]} = vaddw_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vprod${M}x${ABC[N:N+8…
100 …vacc${M}x${ABC[N:N+4]} = vaddw_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vprod${M}x${ABC[N:N+8]}c0)…
101 …vacc${M}x${ABC[N+4:N+8]} = vaddw_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vprod${M}x${ABC[N:N+8…
110 …vacc${M}x${ABC[N:N+4]} = vaddw_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vprod${M}x${ABC[N:N+8]}c1)…
111 …vacc${M}x${ABC[N+4:N+8]} = vaddw_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vprod${M}x${ABC[N:N+8…
120 …vacc${M}x${ABC[N:N+4]} = vaddw_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vprod${M}x${ABC[N:N+8]}c2)…
121 …vacc${M}x${ABC[N+4:N+8]} = vaddw_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vprod${M}x${ABC[N:N+8…
130 …vacc${M}x${ABC[N:N+4]} = vaddw_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vprod${M}x${ABC[N:N+8]}c3)…
131 …vacc${M}x${ABC[N+4:N+8]} = vaddw_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vprod${M}x${ABC[N:N+8…
[all …]
/external/XNNPACK/src/qs8-gemm/
Dneon-mull-addw-dup.c.in77 …vacc${M}x${ABC[N:N+4]} = vaddw_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vprod${M}x${ABC[N:N+8]}c${…
78 …vacc${M}x${ABC[N+4:N+8]} = vaddw_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vprod${M}x${ABC[N:N+8…
92 …vacc${M}x${ABC[N:N+4]} = vaddw_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vprod${M}x${ABC[N:N+8]}c0)…
93 …vacc${M}x${ABC[N+4:N+8]} = vaddw_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vprod${M}x${ABC[N:N+8…
102 …vacc${M}x${ABC[N:N+4]} = vaddw_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vprod${M}x${ABC[N:N+8]}c1)…
103 …vacc${M}x${ABC[N+4:N+8]} = vaddw_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vprod${M}x${ABC[N:N+8…
112 …vacc${M}x${ABC[N:N+4]} = vaddw_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vprod${M}x${ABC[N:N+8]}c2)…
113 …vacc${M}x${ABC[N+4:N+8]} = vaddw_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vprod${M}x${ABC[N:N+8…
122 …vacc${M}x${ABC[N:N+4]} = vaddw_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vprod${M}x${ABC[N:N+8]}c3)…
123 …vacc${M}x${ABC[N+4:N+8]} = vaddw_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vprod${M}x${ABC[N:N+8…
[all …]
/external/XNNPACK/src/qu8-gavgpool/
D7p7x-minmax-neon-c8.c61 const int32x4_t vacc_lo = vaddw_s16(vbias, vget_low_s16(vsum)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
62 const int32x4_t vacc_hi = vaddw_s16(vbias, vget_high_s16(vsum)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
98 vst1q_s32(acc, vaddw_s16(vacc_lo, vget_low_s16(vsum))); acc += 4; in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
99 vst1q_s32(acc, vaddw_s16(vacc_hi, vget_high_s16(vsum))); acc += 4; in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
159 vacc_lo = vaddw_s16(vacc_lo, vget_low_s16(vsum)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
160 vacc_hi = vaddw_s16(vacc_hi, vget_high_s16(vsum)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
231 vacc_lo = vaddw_s16(vacc_lo, vget_low_s16(vsum)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
232 vacc_hi = vaddw_s16(vacc_hi, vget_high_s16(vsum)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
/external/XNNPACK/src/qu8-avgpool/
D9p8x-minmax-neon-c8.c114 const int32x4_t vacc_lo = vaddw_s16(vbias, vreinterpret_s16_u16(vget_low_u16(vsum))); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8()
115 const int32x4_t vacc_hi = vaddw_s16(vbias, vreinterpret_s16_u16(vget_high_u16(vsum))); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8()
187 vacc_lo = vaddw_s16(vacc_lo, vreinterpret_s16_u16(vget_low_u16(vsum))); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8()
188 vacc_hi = vaddw_s16(vacc_hi, vreinterpret_s16_u16(vget_high_u16(vsum))); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8()
282 vacc_lo = vaddw_s16(vacc_lo, vget_low_s16(vsum)); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8()
283 vacc_hi = vaddw_s16(vacc_hi, vget_high_s16(vsum)); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8()
356 vacc_lo = vaddw_s16(vacc_lo, vget_low_s16(vsum)); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8()
357 vacc_hi = vaddw_s16(vacc_hi, vget_high_s16(vsum)); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8()

12