Lines Matching refs:ABC
8 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
44 const __m128i vx${ABC[0:4]} = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x));
45 const __m128i vy${ABC[0:4]} = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y));
47 const __m128i vx${ABC[N:N+4]} = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + ${N}));
48 const __m128i vy${ABC[N:N+4]} = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + ${N}));
54 … __m128i vacc${ABC[N:N+4]} = _mm_macc_epi32(vx${ABC[N:N+4]}, vx_multiplier, vzero_point_product);
57 vacc${ABC[N:N+4]} = _mm_macc_epi32(vy${ABC[N:N+4]}, vy_multiplier, vacc${ABC[N:N+4]});
60 …__m128i vacc${ABC[N:N+4]} = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx${ABC[N:N+4]}, vx…
63 …vacc${ABC[N:N+4]} = _mm_add_epi32(vacc${ABC[N:N+4]}, _mm_mullo_epi32(vy${ABC[N:N+4]}, vy_multiplie…
66 …__m128i vrem${ABC[N:N+4]} = _mm_add_epi32(_mm_and_si128(vacc${ABC[N:N+4]}, vremainder_mask), _mm_c…
69 …vacc${ABC[N:N+4]} = _mm_sub_epi32(_mm_sra_epi32(vacc${ABC[N:N+4]}, vshift), _mm_cmpgt_epi32(vrem${…
72 …__m128i vout${ABC[N:N+8]} = _mm_adds_epi16(_mm_packs_epi32(vacc${ABC[N:N+4]}, vacc${ABC[N+4:N+8]})…
75 vout${ABC[N:N+8]} = _mm_max_epi16(vout${ABC[N:N+8]}, voutput_min);
78 vout${ABC[N:N+8]} = _mm_min_epi16(vout${ABC[N:N+8]}, voutput_max);
82 const __m128i vout${ABC[N:N+16]} = _mm_packs_epi16(vout${ABC[N:N+8]}, vout${ABC[N+8:N+16]});
84 …const __m128i vout${ABC[N:N+8]}${ABC[N:N+8]} = _mm_packs_epi16(vout${ABC[N:N+8]}, vout${ABC[N:N+8]…
87 _mm_storeu_si128((__m128i*) output, vout${ABC[0:16]});
89 _mm_storel_epi64((__m128i*) output, vout${ABC[0:8]}${ABC[0:8]});
92 _mm_storeu_si128((__m128i*) (output + ${N}), vout${ABC[N:N+16]});
94 _mm_storel_epi64((__m128i*) (output + ${N}), vout${ABC[N:N+8]}${ABC[N:N+8]});
99 const __m128i vx${ABC[0:4]} = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x));
100 const __m128i vy${ABC[0:4]} = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y));
101 const __m128i vx${ABC[4:8]} = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4));
102 const __m128i vy${ABC[4:8]} = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 4));
108 __m128i vacc${ABC[0:4]} = _mm_macc_epi32(vx${ABC[0:4]}, vx_multiplier, vzero_point_product);
109 __m128i vacc${ABC[4:8]} = _mm_macc_epi32(vx${ABC[4:8]}, vx_multiplier, vzero_point_product);
111 vacc${ABC[0:4]} = _mm_macc_epi32(vy${ABC[0:4]}, vy_multiplier, vacc${ABC[0:4]});
112 vacc${ABC[4:8]} = _mm_macc_epi32(vy${ABC[4:8]}, vy_multiplier, vacc${ABC[4:8]});
114 …__m128i vacc${ABC[0:4]} = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx${ABC[0:4]}, vx_mul…
115 …__m128i vacc${ABC[4:8]} = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx${ABC[4:8]}, vx_mul…
117 … vacc${ABC[0:4]} = _mm_add_epi32(vacc${ABC[0:4]}, _mm_mullo_epi32(vy${ABC[0:4]}, vy_multiplier));
118 … vacc${ABC[4:8]} = _mm_add_epi32(vacc${ABC[4:8]}, _mm_mullo_epi32(vy${ABC[4:8]}, vy_multiplier));
120 …t __m128i vrem${ABC[0:4]} = _mm_add_epi32(_mm_and_si128(vacc${ABC[0:4]}, vremainder_mask), _mm_cmp…
121 …t __m128i vrem${ABC[4:8]} = _mm_add_epi32(_mm_and_si128(vacc${ABC[4:8]}, vremainder_mask), _mm_cmp…
123 …vacc${ABC[0:4]} = _mm_sub_epi32(_mm_sra_epi32(vacc${ABC[0:4]}, vshift), _mm_cmpgt_epi32(vrem${ABC[…
124 …vacc${ABC[4:8]} = _mm_sub_epi32(_mm_sra_epi32(vacc${ABC[4:8]}, vshift), _mm_cmpgt_epi32(vrem${ABC[…
126 …__m128i vout${ABC[0:8]} = _mm_adds_epi16(_mm_packs_epi32(vacc${ABC[0:4]}, vacc${ABC[4:8]}), voutpu…
127 vout${ABC[0:8]} = _mm_max_epi16(vout${ABC[0:8]}, voutput_min);
128 vout${ABC[0:8]} = _mm_min_epi16(vout${ABC[0:8]}, voutput_max);
130 __m128i vout${ABC[0:8]}${ABC[0:8]} = _mm_packs_epi16(vout${ABC[0:8]}, vout${ABC[0:8]});
134 _mm_storel_epi64((__m128i*) output, vout${ABC[0:8]}${ABC[0:8]});
139 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout${ABC[0:8]}${ABC[0:8]});
140 vout${ABC[0:8]}${ABC[0:8]} = _mm_srli_epi64(vout${ABC[0:8]}${ABC[0:8]}, 32);
144 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout${ABC[0:8]}${ABC[0:8]}, 0);
145 vout${ABC[0:8]}${ABC[0:8]} = _mm_srli_epi32(vout${ABC[0:8]}${ABC[0:8]}, 16);
150 *output = (int8_t) _mm_extract_epi8(vout${ABC[0:8]}${ABC[0:8]}, 0);
152 *output = (int32_t) _mm_cvtsi128_si32(vout${ABC[0:8]}${ABC[0:8]});
158 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout${ABC[0:8]}${ABC[0:8]});
159 vout${ABC[0:8]}${ABC[0:8]} = _mm_srli_epi64(vout${ABC[0:8]}${ABC[0:8]}, 32);
163 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout${ABC[0:8]}${ABC[0:8]}, 0);
164 vout${ABC[0:8]}${ABC[0:8]} = _mm_srli_epi32(vout${ABC[0:8]}${ABC[0:8]}, 16);
169 *output = (int8_t) _mm_extract_epi8(vout${ABC[0:8]}${ABC[0:8]}, 0);
171 *output = (int32_t) _mm_cvtsi128_si32(vout${ABC[0:8]}${ABC[0:8]});