Lines Matching refs:__m128i

40   const __m128i vbias = _mm_load_si128((const __m128i*) params->sse2.bias);  in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
44 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
45 const __m128i vi0x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i0 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
46 const __m128i vi0xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i0 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
48 const __m128i vi1x01234567 = _mm_loadl_epi64((const __m128i*) i1); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
49 const __m128i vi1x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i1 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
50 const __m128i vi1xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i1 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
52 const __m128i vi2x01234567 = _mm_loadl_epi64((const __m128i*) i2); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
53 const __m128i vi2x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i2 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
54 const __m128i vi2xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i2 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
56 const __m128i vi3x01234567 = _mm_loadl_epi64((const __m128i*) i3); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
57 const __m128i vi3x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i3 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
58 const __m128i vi3xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i3 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
60 const __m128i vi4x01234567 = _mm_loadl_epi64((const __m128i*) i4); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
61 const __m128i vi4x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i4 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
62 const __m128i vi4xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i4 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
64 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
65 const __m128i vi5x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i5 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
66 const __m128i vi5xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i5 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
68 const __m128i vi6x01234567 = _mm_loadl_epi64((const __m128i*) i6); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
69 const __m128i vi6x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i6 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
70 const __m128i vi6xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i6 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
73 …const __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
74 …const __m128i vxi0x89ABCDEF = _mm_unpacklo_epi8(vi0x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
75 …const __m128i vxi0xGHIJKLMN = _mm_unpacklo_epi8(vi0xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
76 …const __m128i vxi1x01234567 = _mm_unpacklo_epi8(vi1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
77 …const __m128i vxi1x89ABCDEF = _mm_unpacklo_epi8(vi1x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
78 …const __m128i vxi1xGHIJKLMN = _mm_unpacklo_epi8(vi1xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
79 …const __m128i vxi2x01234567 = _mm_unpacklo_epi8(vi2x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
80 …const __m128i vxi2x89ABCDEF = _mm_unpacklo_epi8(vi2x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
81 …const __m128i vxi2xGHIJKLMN = _mm_unpacklo_epi8(vi2xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
82 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
83 …const __m128i vxi3x89ABCDEF = _mm_unpacklo_epi8(vi3x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
84 …const __m128i vxi3xGHIJKLMN = _mm_unpacklo_epi8(vi3xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
85 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
86 …const __m128i vxi4x89ABCDEF = _mm_unpacklo_epi8(vi4x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
87 …const __m128i vxi4xGHIJKLMN = _mm_unpacklo_epi8(vi4xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
88 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
89 …const __m128i vxi5x89ABCDEF = _mm_unpacklo_epi8(vi5x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
90 …const __m128i vxi5xGHIJKLMN = _mm_unpacklo_epi8(vi5xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
91 …const __m128i vxi6x01234567 = _mm_unpacklo_epi8(vi6x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
92 …const __m128i vxi6x89ABCDEF = _mm_unpacklo_epi8(vi6x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
93 …const __m128i vxi6xGHIJKLMN = _mm_unpacklo_epi8(vi6xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
95 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
96 __m128i vacc0x89ABCDEF = _mm_add_epi16(vxi0x89ABCDEF, vxi1x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
97 __m128i vacc0xGHIJKLMN = _mm_add_epi16(vxi0xGHIJKLMN, vxi1xGHIJKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
98 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
99 __m128i vacc1x89ABCDEF = _mm_add_epi16(vxi2x89ABCDEF, vxi3x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
100 __m128i vacc1xGHIJKLMN = _mm_add_epi16(vxi2xGHIJKLMN, vxi3xGHIJKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
117 const __m128i vsgnacc0x01234567 = _mm_cmpgt_epi16(_mm_setzero_si128(), vacc0x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
118 …const __m128i vacc0123 = _mm_add_epi32(vbias, _mm_unpacklo_epi16(vacc0x01234567, vsgnacc0x01234567… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
119 …const __m128i vacc4567 = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vacc0x01234567, vsgnacc0x01234567… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
120 const __m128i vsgnacc0x89ABCDEF = _mm_cmpgt_epi16(_mm_setzero_si128(), vacc0x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
121 …const __m128i vacc89AB = _mm_add_epi32(vbias, _mm_unpacklo_epi16(vacc0x89ABCDEF, vsgnacc0x89ABCDEF… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
122 …const __m128i vaccCDEF = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vacc0x89ABCDEF, vsgnacc0x89ABCDEF… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
123 const __m128i vsgnacc0xGHIJKLMN = _mm_cmpgt_epi16(_mm_setzero_si128(), vacc0xGHIJKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
124 …const __m128i vaccGHIJ = _mm_add_epi32(vbias, _mm_unpacklo_epi16(vacc0xGHIJKLMN, vsgnacc0xGHIJKLMN… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
125 …const __m128i vaccKLMN = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vacc0xGHIJKLMN, vsgnacc0xGHIJKLMN… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
127 _mm_store_si128((__m128i*) b, vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
128 _mm_store_si128((__m128i*) (b + 4), vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
129 _mm_store_si128((__m128i*) (b + 8), vacc89AB); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
130 _mm_store_si128((__m128i*) (b + 12), vaccCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
131 _mm_store_si128((__m128i*) (b + 16), vaccGHIJ); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
132 _mm_store_si128((__m128i*) (b + 20), vaccKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
137 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
139 const __m128i vi1x01234567 = _mm_loadl_epi64((const __m128i*) i1); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
141 const __m128i vi2x01234567 = _mm_loadl_epi64((const __m128i*) i2); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
143 const __m128i vi3x01234567 = _mm_loadl_epi64((const __m128i*) i3); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
145 const __m128i vi4x01234567 = _mm_loadl_epi64((const __m128i*) i4); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
147 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
149 const __m128i vi6x01234567 = _mm_loadl_epi64((const __m128i*) i6); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
152 …const __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
153 …const __m128i vxi1x01234567 = _mm_unpacklo_epi8(vi1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
154 …const __m128i vxi2x01234567 = _mm_unpacklo_epi8(vi2x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
155 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
156 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
157 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
158 …const __m128i vxi6x01234567 = _mm_unpacklo_epi8(vi6x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
160 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
161 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
170 const __m128i vsgnacc0x01234567 = _mm_cmpgt_epi16(_mm_setzero_si128(), vacc0x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
171 …const __m128i vacc0123 = _mm_add_epi32(vbias, _mm_unpacklo_epi16(vacc0x01234567, vsgnacc0x01234567… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
172 …const __m128i vacc4567 = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vacc0x01234567, vsgnacc0x01234567… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
174 _mm_store_si128((__m128i*) b, vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
175 _mm_store_si128((__m128i*) (b + 4), vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
194 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
195 const __m128i vi0x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i0 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
196 const __m128i vi0xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i0 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
198 const __m128i vi1x01234567 = _mm_loadl_epi64((const __m128i*) i1); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
199 const __m128i vi1x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i1 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
200 const __m128i vi1xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i1 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
202 const __m128i vi2x01234567 = _mm_loadl_epi64((const __m128i*) i2); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
203 const __m128i vi2x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i2 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
204 const __m128i vi2xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i2 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
206 const __m128i vi3x01234567 = _mm_loadl_epi64((const __m128i*) i3); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
207 const __m128i vi3x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i3 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
208 const __m128i vi3xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i3 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
210 const __m128i vi4x01234567 = _mm_loadl_epi64((const __m128i*) i4); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
211 const __m128i vi4x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i4 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
212 const __m128i vi4xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i4 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
214 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
215 const __m128i vi5x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i5 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
216 const __m128i vi5xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i5 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
218 const __m128i vi6x01234567 = _mm_loadl_epi64((const __m128i*) i6); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
219 const __m128i vi6x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i6 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
220 const __m128i vi6xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i6 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
223 …const __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
224 …const __m128i vxi0x89ABCDEF = _mm_unpacklo_epi8(vi0x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
225 …const __m128i vxi0xGHIJKLMN = _mm_unpacklo_epi8(vi0xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
226 …const __m128i vxi1x01234567 = _mm_unpacklo_epi8(vi1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
227 …const __m128i vxi1x89ABCDEF = _mm_unpacklo_epi8(vi1x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
228 …const __m128i vxi1xGHIJKLMN = _mm_unpacklo_epi8(vi1xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
229 …const __m128i vxi2x01234567 = _mm_unpacklo_epi8(vi2x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
230 …const __m128i vxi2x89ABCDEF = _mm_unpacklo_epi8(vi2x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
231 …const __m128i vxi2xGHIJKLMN = _mm_unpacklo_epi8(vi2xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
232 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
233 …const __m128i vxi3x89ABCDEF = _mm_unpacklo_epi8(vi3x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
234 …const __m128i vxi3xGHIJKLMN = _mm_unpacklo_epi8(vi3xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
235 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
236 …const __m128i vxi4x89ABCDEF = _mm_unpacklo_epi8(vi4x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
237 …const __m128i vxi4xGHIJKLMN = _mm_unpacklo_epi8(vi4xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
238 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
239 …const __m128i vxi5x89ABCDEF = _mm_unpacklo_epi8(vi5x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
240 …const __m128i vxi5xGHIJKLMN = _mm_unpacklo_epi8(vi5xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
241 …const __m128i vxi6x01234567 = _mm_unpacklo_epi8(vi6x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
242 …const __m128i vxi6x89ABCDEF = _mm_unpacklo_epi8(vi6x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
243 …const __m128i vxi6xGHIJKLMN = _mm_unpacklo_epi8(vi6xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
245 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
246 __m128i vacc0x89ABCDEF = _mm_add_epi16(vxi0x89ABCDEF, vxi1x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
247 __m128i vacc0xGHIJKLMN = _mm_add_epi16(vxi0xGHIJKLMN, vxi1xGHIJKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
248 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
249 __m128i vacc1x89ABCDEF = _mm_add_epi16(vxi2x89ABCDEF, vxi3x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
250 __m128i vacc1xGHIJKLMN = _mm_add_epi16(vxi2xGHIJKLMN, vxi3xGHIJKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
267 const __m128i vsgnacc0x01234567 = _mm_cmpgt_epi16(_mm_setzero_si128(), vacc0x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
268 …const __m128i vacc0123 = _mm_add_epi32(_mm_unpacklo_epi16(vacc0x01234567, vsgnacc0x01234567), _mm_… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
269 …const __m128i vacc4567 = _mm_add_epi32(_mm_unpackhi_epi16(vacc0x01234567, vsgnacc0x01234567), _mm_… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
270 const __m128i vsgnacc0x89ABCDEF = _mm_cmpgt_epi16(_mm_setzero_si128(), vacc0x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
271 …const __m128i vacc89AB = _mm_add_epi32(_mm_unpacklo_epi16(vacc0x89ABCDEF, vsgnacc0x89ABCDEF), _mm_… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
272 …const __m128i vaccCDEF = _mm_add_epi32(_mm_unpackhi_epi16(vacc0x89ABCDEF, vsgnacc0x89ABCDEF), _mm_… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
273 const __m128i vsgnacc0xGHIJKLMN = _mm_cmpgt_epi16(_mm_setzero_si128(), vacc0xGHIJKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
274 …const __m128i vaccGHIJ = _mm_add_epi32(_mm_unpacklo_epi16(vacc0xGHIJKLMN, vsgnacc0xGHIJKLMN), _mm_… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
275 …const __m128i vaccKLMN = _mm_add_epi32(_mm_unpackhi_epi16(vacc0xGHIJKLMN, vsgnacc0xGHIJKLMN), _mm_… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
277 _mm_store_si128((__m128i*) b, vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
278 _mm_store_si128((__m128i*) (b + 4), vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
279 _mm_store_si128((__m128i*) (b + 8), vacc89AB); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
280 _mm_store_si128((__m128i*) (b + 12), vaccCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
281 _mm_store_si128((__m128i*) (b + 16), vaccGHIJ); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
282 _mm_store_si128((__m128i*) (b + 20), vaccKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
287 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
289 const __m128i vi1x01234567 = _mm_loadl_epi64((const __m128i*) i1); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
291 const __m128i vi2x01234567 = _mm_loadl_epi64((const __m128i*) i2); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
293 const __m128i vi3x01234567 = _mm_loadl_epi64((const __m128i*) i3); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
295 const __m128i vi4x01234567 = _mm_loadl_epi64((const __m128i*) i4); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
297 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
299 const __m128i vi6x01234567 = _mm_loadl_epi64((const __m128i*) i6); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
302 …const __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
303 …const __m128i vxi1x01234567 = _mm_unpacklo_epi8(vi1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
304 …const __m128i vxi2x01234567 = _mm_unpacklo_epi8(vi2x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
305 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
306 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
307 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
308 …const __m128i vxi6x01234567 = _mm_unpacklo_epi8(vi6x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
310 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
311 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
320 const __m128i vsgnacc0x01234567 = _mm_cmpgt_epi16(_mm_setzero_si128(), vacc0x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
321 …const __m128i vacc0123 = _mm_add_epi32(_mm_unpacklo_epi16(vacc0x01234567, vsgnacc0x01234567), _mm_… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
322 …const __m128i vacc4567 = _mm_add_epi32(_mm_unpackhi_epi16(vacc0x01234567, vsgnacc0x01234567), _mm_… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
324 _mm_store_si128((__m128i*) b, vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
325 _mm_store_si128((__m128i*) (b + 4), vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
359 const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
360 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
361 const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
363 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
364 const __m128i vi0x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i0 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
365 const __m128i vi0xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i0 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
367 const __m128i vi1x01234567 = _mm_loadl_epi64((const __m128i*) i1); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
368 const __m128i vi1x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i1 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
369 const __m128i vi1xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i1 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
371 const __m128i vi2x01234567 = _mm_loadl_epi64((const __m128i*) i2); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
372 const __m128i vi2x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i2 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
373 const __m128i vi2xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i2 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
375 const __m128i vi3x01234567 = _mm_loadl_epi64((const __m128i*) i3); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
376 const __m128i vi3x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i3 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
377 const __m128i vi3xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i3 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
379 const __m128i vi4x01234567 = _mm_loadl_epi64((const __m128i*) i4); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
380 const __m128i vi4x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i4 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
381 const __m128i vi4xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i4 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
383 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
384 const __m128i vi5x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i5 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
385 const __m128i vi5xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i5 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
387 const __m128i vi6x01234567 = _mm_loadl_epi64((const __m128i*) i6); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
388 const __m128i vi6x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i6 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
389 const __m128i vi6xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i6 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
392 …const __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
393 …const __m128i vxi0x89ABCDEF = _mm_unpacklo_epi8(vi0x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
394 …const __m128i vxi0xGHIJKLMN = _mm_unpacklo_epi8(vi0xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
395 …const __m128i vxi1x01234567 = _mm_unpacklo_epi8(vi1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
396 …const __m128i vxi1x89ABCDEF = _mm_unpacklo_epi8(vi1x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
397 …const __m128i vxi1xGHIJKLMN = _mm_unpacklo_epi8(vi1xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
398 …const __m128i vxi2x01234567 = _mm_unpacklo_epi8(vi2x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
399 …const __m128i vxi2x89ABCDEF = _mm_unpacklo_epi8(vi2x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
400 …const __m128i vxi2xGHIJKLMN = _mm_unpacklo_epi8(vi2xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
401 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
402 …const __m128i vxi3x89ABCDEF = _mm_unpacklo_epi8(vi3x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
403 …const __m128i vxi3xGHIJKLMN = _mm_unpacklo_epi8(vi3xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
404 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
405 …const __m128i vxi4x89ABCDEF = _mm_unpacklo_epi8(vi4x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
406 …const __m128i vxi4xGHIJKLMN = _mm_unpacklo_epi8(vi4xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
407 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
408 …const __m128i vxi5x89ABCDEF = _mm_unpacklo_epi8(vi5x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
409 …const __m128i vxi5xGHIJKLMN = _mm_unpacklo_epi8(vi5xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
410 …const __m128i vxi6x01234567 = _mm_unpacklo_epi8(vi6x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
411 …const __m128i vxi6x89ABCDEF = _mm_unpacklo_epi8(vi6x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
412 …const __m128i vxi6xGHIJKLMN = _mm_unpacklo_epi8(vi6xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
414 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
415 __m128i vacc0x89ABCDEF = _mm_add_epi16(vxi0x89ABCDEF, vxi1x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
416 __m128i vacc0xGHIJKLMN = _mm_add_epi16(vxi0xGHIJKLMN, vxi1xGHIJKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
417 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
418 __m128i vacc1x89ABCDEF = _mm_add_epi16(vxi2x89ABCDEF, vxi3x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
419 __m128i vacc1xGHIJKLMN = _mm_add_epi16(vxi2xGHIJKLMN, vxi3xGHIJKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
436 const __m128i vsgnacc0x01234567 = _mm_cmpgt_epi16(_mm_setzero_si128(), vacc0x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
437 …const __m128i vacc0123 = _mm_add_epi32(_mm_unpacklo_epi16(vacc0x01234567, vsgnacc0x01234567), _mm_… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
438 …const __m128i vacc4567 = _mm_add_epi32(_mm_unpackhi_epi16(vacc0x01234567, vsgnacc0x01234567), _mm_… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
439 const __m128i vsgnacc0x89ABCDEF = _mm_cmpgt_epi16(_mm_setzero_si128(), vacc0x89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
440 …const __m128i vacc89AB = _mm_add_epi32(_mm_unpacklo_epi16(vacc0x89ABCDEF, vsgnacc0x89ABCDEF), _mm_… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
441 …const __m128i vaccCDEF = _mm_add_epi32(_mm_unpackhi_epi16(vacc0x89ABCDEF, vsgnacc0x89ABCDEF), _mm_… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
442 const __m128i vsgnacc0xGHIJKLMN = _mm_cmpgt_epi16(_mm_setzero_si128(), vacc0xGHIJKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
443 …const __m128i vaccGHIJ = _mm_add_epi32(_mm_unpacklo_epi16(vacc0xGHIJKLMN, vsgnacc0xGHIJKLMN), _mm_… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
444 …const __m128i vaccKLMN = _mm_add_epi32(_mm_unpackhi_epi16(vacc0xGHIJKLMN, vsgnacc0xGHIJKLMN), _mm_… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
447 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
448 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
449 const __m128i vabsacc89AB = _mm_abs_epi32(vacc89AB); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
450 const __m128i vabsaccCDEF = _mm_abs_epi32(vaccCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
451 const __m128i vabsaccGHIJ = _mm_abs_epi32(vaccGHIJ); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
452 const __m128i vabsaccKLMN = _mm_abs_epi32(vaccKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
454 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
455 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
456 const __m128i vabsacc9B = _mm_shuffle_epi32(vabsacc89AB, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
457 const __m128i vabsaccDF = _mm_shuffle_epi32(vabsaccCDEF, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
458 const __m128i vabsaccHJ = _mm_shuffle_epi32(vabsaccGHIJ, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
459 const __m128i vabsaccLN = _mm_shuffle_epi32(vabsaccKLMN, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
461 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
462 const __m128i vabsprod13 = _mm_mul_epu32(vabsacc13, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
463 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
464 const __m128i vabsprod57 = _mm_mul_epu32(vabsacc57, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
465 const __m128i vabsprod8A = _mm_mul_epu32(vabsacc89AB, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
466 const __m128i vabsprod9B = _mm_mul_epu32(vabsacc9B, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
467 const __m128i vabsprodCE = _mm_mul_epu32(vabsaccCDEF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
468 const __m128i vabsprodDF = _mm_mul_epu32(vabsaccDF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
469 const __m128i vabsprodGI = _mm_mul_epu32(vabsaccGHIJ, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
470 const __m128i vabsprodHJ = _mm_mul_epu32(vabsaccHJ, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
471 const __m128i vabsprodKM = _mm_mul_epu32(vabsaccKLMN, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
472 const __m128i vabsprodLN = _mm_mul_epu32(vabsaccLN, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
474 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
475 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
476 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
477 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
478 const __m128i vabsout8A = _mm_srl_epi64(_mm_add_epi64(vabsprod8A, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
479 const __m128i vabsout9B = _mm_srl_epi64(_mm_add_epi64(vabsprod9B, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
480 const __m128i vabsoutCE = _mm_srl_epi64(_mm_add_epi64(vabsprodCE, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
481 const __m128i vabsoutDF = _mm_srl_epi64(_mm_add_epi64(vabsprodDF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
482 const __m128i vabsoutGI = _mm_srl_epi64(_mm_add_epi64(vabsprodGI, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
483 const __m128i vabsoutHJ = _mm_srl_epi64(_mm_add_epi64(vabsprodHJ, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
484 const __m128i vabsoutKM = _mm_srl_epi64(_mm_add_epi64(vabsprodKM, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
485 const __m128i vabsoutLN = _mm_srl_epi64(_mm_add_epi64(vabsprodLN, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
487 const __m128i vabsout0213 = _mm_castps_si128( in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
489 const __m128i vabsout4657 = _mm_castps_si128( in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
491 const __m128i vabsout8A9B = _mm_castps_si128( in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
493 const __m128i vabsoutCEDF = _mm_castps_si128( in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
495 const __m128i vabsoutGIHJ = _mm_castps_si128( in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
497 const __m128i vabsoutKMLN = _mm_castps_si128( in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
500 const __m128i vabsout0123 = _mm_shuffle_epi32(vabsout0213, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
501 const __m128i vabsout4567 = _mm_shuffle_epi32(vabsout4657, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
502 const __m128i vabsout89AB = _mm_shuffle_epi32(vabsout8A9B, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
503 const __m128i vabsoutCDEF = _mm_shuffle_epi32(vabsoutCEDF, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
504 const __m128i vabsoutGHIJ = _mm_shuffle_epi32(vabsoutGIHJ, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
505 const __m128i vabsoutKLMN = _mm_shuffle_epi32(vabsoutKMLN, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
507 const __m128i vout0123 = _mm_sign_epi32(vabsout0123, vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
508 const __m128i vout4567 = _mm_sign_epi32(vabsout4567, vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
509 const __m128i vout89AB = _mm_sign_epi32(vabsout89AB, vacc89AB); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
510 const __m128i voutCDEF = _mm_sign_epi32(vabsoutCDEF, vaccCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
511 const __m128i voutGHIJ = _mm_sign_epi32(vabsoutGHIJ, vaccGHIJ); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
512 const __m128i voutKLMN = _mm_sign_epi32(vabsoutKLMN, vaccKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
514 …const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
515 __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vout0123, vout4567), voutput_zero_point); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
516 __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vout89AB, voutCDEF), voutput_zero_point); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
517 __m128i voutGHIJKLMN = _mm_adds_epi16(_mm_packs_epi32(voutGHIJ, voutKLMN), voutput_zero_point); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
519 const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
520 const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
525 __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
526 __m128i voutGHIJKLMNGHIJKLMN = _mm_packs_epi16(voutGHIJKLMN, voutGHIJKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
528 _mm_storeu_si128((__m128i*) output, vout0123456789ABCDEF); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
529 _mm_storel_epi64((__m128i*) (output + 16), voutGHIJKLMNGHIJKLMN); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
536 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
538 const __m128i vi1x01234567 = _mm_loadl_epi64((const __m128i*) i1); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
540 const __m128i vi2x01234567 = _mm_loadl_epi64((const __m128i*) i2); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
542 const __m128i vi3x01234567 = _mm_loadl_epi64((const __m128i*) i3); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
544 const __m128i vi4x01234567 = _mm_loadl_epi64((const __m128i*) i4); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
546 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
548 const __m128i vi6x01234567 = _mm_loadl_epi64((const __m128i*) i6); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
551 …const __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
552 …const __m128i vxi1x01234567 = _mm_unpacklo_epi8(vi1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
553 …const __m128i vxi2x01234567 = _mm_unpacklo_epi8(vi2x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
554 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
555 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
556 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
557 …const __m128i vxi6x01234567 = _mm_unpacklo_epi8(vi6x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
559 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
560 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
569 const __m128i vsgnacc0x01234567 = _mm_cmpgt_epi16(_mm_setzero_si128(), vacc0x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
570 …const __m128i vacc0123 = _mm_add_epi32(_mm_unpacklo_epi16(vacc0x01234567, vsgnacc0x01234567), _mm_… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
571 …const __m128i vacc4567 = _mm_add_epi32(_mm_unpackhi_epi16(vacc0x01234567, vsgnacc0x01234567), _mm_… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
574 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
575 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
577 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
578 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
580 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
581 const __m128i vabsprod13 = _mm_mul_epu32(vabsacc13, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
582 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
583 const __m128i vabsprod57 = _mm_mul_epu32(vabsacc57, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
585 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
586 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
587 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
588 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
590 const __m128i vabsout0213 = _mm_castps_si128( in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
592 const __m128i vabsout4657 = _mm_castps_si128( in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
595 const __m128i vabsout0123 = _mm_shuffle_epi32(vabsout0213, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
596 const __m128i vabsout4567 = _mm_shuffle_epi32(vabsout4657, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
598 const __m128i vout0123 = _mm_sign_epi32(vabsout0123, vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
599 const __m128i vout4567 = _mm_sign_epi32(vabsout4567, vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
601 …const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
602__m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vout0123, vout4567), voutput_zero_point); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
604 const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
605 const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
608 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
611 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()