1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #ifndef AOM_AOM_DSP_SIMD_V128_INTRINSICS_C_H_
13 #define AOM_AOM_DSP_SIMD_V128_INTRINSICS_C_H_
14
15 #include <stdio.h>
16 #include <stdlib.h>
17
18 #include "config/aom_config.h"
19
20 #include "aom_dsp/simd/v64_intrinsics_c.h"
21
22 typedef union {
23 uint8_t u8[16];
24 uint16_t u16[8];
25 uint32_t u32[4];
26 uint64_t u64[2];
27 int8_t s8[16];
28 int16_t s16[8];
29 int32_t s32[4];
30 int64_t s64[2];
31 c_v64 v64[2];
32 } c_v128;
33
c_v128_low_u32(c_v128 a)34 SIMD_INLINE uint32_t c_v128_low_u32(c_v128 a) { return a.u32[0]; }
35
c_v128_low_v64(c_v128 a)36 SIMD_INLINE c_v64 c_v128_low_v64(c_v128 a) { return a.v64[0]; }
37
c_v128_high_v64(c_v128 a)38 SIMD_INLINE c_v64 c_v128_high_v64(c_v128 a) { return a.v64[1]; }
39
c_v128_from_64(uint64_t hi,uint64_t lo)40 SIMD_INLINE c_v128 c_v128_from_64(uint64_t hi, uint64_t lo) {
41 c_v128 t;
42 t.u64[1] = hi;
43 t.u64[0] = lo;
44 return t;
45 }
46
c_v128_from_v64(c_v64 hi,c_v64 lo)47 SIMD_INLINE c_v128 c_v128_from_v64(c_v64 hi, c_v64 lo) {
48 c_v128 t;
49 t.v64[1] = hi;
50 t.v64[0] = lo;
51 return t;
52 }
53
c_v128_from_32(uint32_t a,uint32_t b,uint32_t c,uint32_t d)54 SIMD_INLINE c_v128 c_v128_from_32(uint32_t a, uint32_t b, uint32_t c,
55 uint32_t d) {
56 c_v128 t;
57 t.u32[3] = a;
58 t.u32[2] = b;
59 t.u32[1] = c;
60 t.u32[0] = d;
61 return t;
62 }
63
c_v128_load_unaligned(const void * p)64 SIMD_INLINE c_v128 c_v128_load_unaligned(const void *p) {
65 c_v128 t;
66 uint8_t *pp = (uint8_t *)p;
67 uint8_t *q = (uint8_t *)&t;
68 int c;
69 for (c = 0; c < 16; c++) q[c] = pp[c];
70 return t;
71 }
72
c_v128_load_aligned(const void * p)73 SIMD_INLINE c_v128 c_v128_load_aligned(const void *p) {
74 if (SIMD_CHECK && (uintptr_t)p & 15) {
75 fprintf(stderr, "Error: unaligned v128 load at %p\n", p);
76 abort();
77 }
78 return c_v128_load_unaligned(p);
79 }
80
c_v128_store_unaligned(void * p,c_v128 a)81 SIMD_INLINE void c_v128_store_unaligned(void *p, c_v128 a) {
82 uint8_t *pp = (uint8_t *)p;
83 uint8_t *q = (uint8_t *)&a;
84 int c;
85 for (c = 0; c < 16; c++) pp[c] = q[c];
86 }
87
c_v128_store_aligned(void * p,c_v128 a)88 SIMD_INLINE void c_v128_store_aligned(void *p, c_v128 a) {
89 if (SIMD_CHECK && (uintptr_t)p & 15) {
90 fprintf(stderr, "Error: unaligned v128 store at %p\n", p);
91 abort();
92 }
93 c_v128_store_unaligned(p, a);
94 }
95
c_v128_zero()96 SIMD_INLINE c_v128 c_v128_zero() {
97 c_v128 t;
98 t.u64[1] = t.u64[0] = 0;
99 return t;
100 }
101
c_v128_dup_8(uint8_t x)102 SIMD_INLINE c_v128 c_v128_dup_8(uint8_t x) {
103 c_v128 t;
104 t.v64[1] = t.v64[0] = c_v64_dup_8(x);
105 return t;
106 }
107
c_v128_dup_16(uint16_t x)108 SIMD_INLINE c_v128 c_v128_dup_16(uint16_t x) {
109 c_v128 t;
110 t.v64[1] = t.v64[0] = c_v64_dup_16(x);
111 return t;
112 }
113
c_v128_dup_32(uint32_t x)114 SIMD_INLINE c_v128 c_v128_dup_32(uint32_t x) {
115 c_v128 t;
116 t.v64[1] = t.v64[0] = c_v64_dup_32(x);
117 return t;
118 }
119
c_v128_dup_64(uint64_t x)120 SIMD_INLINE c_v128 c_v128_dup_64(uint64_t x) {
121 c_v128 t;
122 t.u64[1] = t.u64[0] = x;
123 return t;
124 }
125
c_v128_dotp_su8(c_v128 a,c_v128 b)126 SIMD_INLINE int64_t c_v128_dotp_su8(c_v128 a, c_v128 b) {
127 return c_v64_dotp_su8(a.v64[1], b.v64[1]) +
128 c_v64_dotp_su8(a.v64[0], b.v64[0]);
129 }
130
c_v128_dotp_s16(c_v128 a,c_v128 b)131 SIMD_INLINE int64_t c_v128_dotp_s16(c_v128 a, c_v128 b) {
132 return c_v64_dotp_s16(a.v64[1], b.v64[1]) +
133 c_v64_dotp_s16(a.v64[0], b.v64[0]);
134 }
135
c_v128_dotp_s32(c_v128 a,c_v128 b)136 SIMD_INLINE int64_t c_v128_dotp_s32(c_v128 a, c_v128 b) {
137 // 32 bit products, 64 bit sum
138 return (int64_t)(int32_t)((int64_t)a.s32[3] * b.s32[3]) +
139 (int64_t)(int32_t)((int64_t)a.s32[2] * b.s32[2]) +
140 (int64_t)(int32_t)((int64_t)a.s32[1] * b.s32[1]) +
141 (int64_t)(int32_t)((int64_t)a.s32[0] * b.s32[0]);
142 }
143
c_v128_hadd_u8(c_v128 a)144 SIMD_INLINE uint64_t c_v128_hadd_u8(c_v128 a) {
145 return c_v64_hadd_u8(a.v64[1]) + c_v64_hadd_u8(a.v64[0]);
146 }
147
148 typedef uint32_t c_sad128_internal;
149
c_v128_sad_u8_init()150 SIMD_INLINE c_sad128_internal c_v128_sad_u8_init() { return 0; }
151
152 /* Implementation dependent return value. Result must be finalised with
153 v128_sad_u8_sum().
154 The result for more than 32 v128_sad_u8() calls is undefined. */
c_v128_sad_u8(c_sad128_internal s,c_v128 a,c_v128 b)155 SIMD_INLINE c_sad128_internal c_v128_sad_u8(c_sad128_internal s, c_v128 a,
156 c_v128 b) {
157 int c;
158 for (c = 0; c < 16; c++)
159 s += a.u8[c] > b.u8[c] ? a.u8[c] - b.u8[c] : b.u8[c] - a.u8[c];
160 return s;
161 }
162
c_v128_sad_u8_sum(c_sad128_internal s)163 SIMD_INLINE uint32_t c_v128_sad_u8_sum(c_sad128_internal s) { return s; }
164
165 typedef uint32_t c_ssd128_internal;
166
c_v128_ssd_u8_init()167 SIMD_INLINE c_ssd128_internal c_v128_ssd_u8_init() { return 0; }
168
169 /* Implementation dependent return value. Result must be finalised with
170 * v128_ssd_u8_sum(). */
c_v128_ssd_u8(c_ssd128_internal s,c_v128 a,c_v128 b)171 SIMD_INLINE c_ssd128_internal c_v128_ssd_u8(c_ssd128_internal s, c_v128 a,
172 c_v128 b) {
173 int c;
174 for (c = 0; c < 16; c++) s += (a.u8[c] - b.u8[c]) * (a.u8[c] - b.u8[c]);
175 return s;
176 }
177
c_v128_ssd_u8_sum(c_ssd128_internal s)178 SIMD_INLINE uint32_t c_v128_ssd_u8_sum(c_ssd128_internal s) { return s; }
179
c_v128_or(c_v128 a,c_v128 b)180 SIMD_INLINE c_v128 c_v128_or(c_v128 a, c_v128 b) {
181 return c_v128_from_v64(c_v64_or(a.v64[1], b.v64[1]),
182 c_v64_or(a.v64[0], b.v64[0]));
183 }
184
c_v128_xor(c_v128 a,c_v128 b)185 SIMD_INLINE c_v128 c_v128_xor(c_v128 a, c_v128 b) {
186 return c_v128_from_v64(c_v64_xor(a.v64[1], b.v64[1]),
187 c_v64_xor(a.v64[0], b.v64[0]));
188 }
189
c_v128_and(c_v128 a,c_v128 b)190 SIMD_INLINE c_v128 c_v128_and(c_v128 a, c_v128 b) {
191 return c_v128_from_v64(c_v64_and(a.v64[1], b.v64[1]),
192 c_v64_and(a.v64[0], b.v64[0]));
193 }
194
c_v128_andn(c_v128 a,c_v128 b)195 SIMD_INLINE c_v128 c_v128_andn(c_v128 a, c_v128 b) {
196 return c_v128_from_v64(c_v64_andn(a.v64[1], b.v64[1]),
197 c_v64_andn(a.v64[0], b.v64[0]));
198 }
199
c_v128_add_8(c_v128 a,c_v128 b)200 SIMD_INLINE c_v128 c_v128_add_8(c_v128 a, c_v128 b) {
201 return c_v128_from_v64(c_v64_add_8(a.v64[1], b.v64[1]),
202 c_v64_add_8(a.v64[0], b.v64[0]));
203 }
204
c_v128_add_16(c_v128 a,c_v128 b)205 SIMD_INLINE c_v128 c_v128_add_16(c_v128 a, c_v128 b) {
206 return c_v128_from_v64(c_v64_add_16(a.v64[1], b.v64[1]),
207 c_v64_add_16(a.v64[0], b.v64[0]));
208 }
209
c_v128_sadd_u8(c_v128 a,c_v128 b)210 SIMD_INLINE c_v128 c_v128_sadd_u8(c_v128 a, c_v128 b) {
211 return c_v128_from_v64(c_v64_sadd_u8(a.v64[1], b.v64[1]),
212 c_v64_sadd_u8(a.v64[0], b.v64[0]));
213 }
214
c_v128_sadd_s8(c_v128 a,c_v128 b)215 SIMD_INLINE c_v128 c_v128_sadd_s8(c_v128 a, c_v128 b) {
216 return c_v128_from_v64(c_v64_sadd_s8(a.v64[1], b.v64[1]),
217 c_v64_sadd_s8(a.v64[0], b.v64[0]));
218 }
219
c_v128_sadd_s16(c_v128 a,c_v128 b)220 SIMD_INLINE c_v128 c_v128_sadd_s16(c_v128 a, c_v128 b) {
221 return c_v128_from_v64(c_v64_sadd_s16(a.v64[1], b.v64[1]),
222 c_v64_sadd_s16(a.v64[0], b.v64[0]));
223 }
224
c_v128_add_32(c_v128 a,c_v128 b)225 SIMD_INLINE c_v128 c_v128_add_32(c_v128 a, c_v128 b) {
226 return c_v128_from_v64(c_v64_add_32(a.v64[1], b.v64[1]),
227 c_v64_add_32(a.v64[0], b.v64[0]));
228 }
229
c_v128_add_64(c_v128 a,c_v128 b)230 SIMD_INLINE c_v128 c_v128_add_64(c_v128 a, c_v128 b) {
231 // Two complement overflow (silences sanitizers)
232 return c_v128_from_64(
233 a.v64[1].u64 > ~b.v64[1].u64 ? a.v64[1].u64 - ~b.v64[1].u64 - 1
234 : a.v64[1].u64 + b.v64[1].u64,
235 a.v64[0].u64 > ~b.v64[0].u64 ? a.v64[0].u64 - ~b.v64[0].u64 - 1
236 : a.v64[0].u64 + b.v64[0].u64);
237 }
238
c_v128_padd_s16(c_v128 a)239 SIMD_INLINE c_v128 c_v128_padd_s16(c_v128 a) {
240 c_v128 t;
241 t.s32[0] = (int32_t)a.s16[0] + (int32_t)a.s16[1];
242 t.s32[1] = (int32_t)a.s16[2] + (int32_t)a.s16[3];
243 t.s32[2] = (int32_t)a.s16[4] + (int32_t)a.s16[5];
244 t.s32[3] = (int32_t)a.s16[6] + (int32_t)a.s16[7];
245 return t;
246 }
247
c_v128_padd_u8(c_v128 a)248 SIMD_INLINE c_v128 c_v128_padd_u8(c_v128 a) {
249 c_v128 t;
250 t.u16[0] = (uint16_t)a.u8[0] + (uint16_t)a.u8[1];
251 t.u16[1] = (uint16_t)a.u8[2] + (uint16_t)a.u8[3];
252 t.u16[2] = (uint16_t)a.u8[4] + (uint16_t)a.u8[5];
253 t.u16[3] = (uint16_t)a.u8[6] + (uint16_t)a.u8[7];
254 t.u16[4] = (uint16_t)a.u8[8] + (uint16_t)a.u8[9];
255 t.u16[5] = (uint16_t)a.u8[10] + (uint16_t)a.u8[11];
256 t.u16[6] = (uint16_t)a.u8[12] + (uint16_t)a.u8[13];
257 t.u16[7] = (uint16_t)a.u8[14] + (uint16_t)a.u8[15];
258 return t;
259 }
260
c_v128_sub_8(c_v128 a,c_v128 b)261 SIMD_INLINE c_v128 c_v128_sub_8(c_v128 a, c_v128 b) {
262 return c_v128_from_v64(c_v64_sub_8(a.v64[1], b.v64[1]),
263 c_v64_sub_8(a.v64[0], b.v64[0]));
264 }
265
c_v128_ssub_u8(c_v128 a,c_v128 b)266 SIMD_INLINE c_v128 c_v128_ssub_u8(c_v128 a, c_v128 b) {
267 return c_v128_from_v64(c_v64_ssub_u8(a.v64[1], b.v64[1]),
268 c_v64_ssub_u8(a.v64[0], b.v64[0]));
269 }
270
c_v128_ssub_s8(c_v128 a,c_v128 b)271 SIMD_INLINE c_v128 c_v128_ssub_s8(c_v128 a, c_v128 b) {
272 return c_v128_from_v64(c_v64_ssub_s8(a.v64[1], b.v64[1]),
273 c_v64_ssub_s8(a.v64[0], b.v64[0]));
274 }
275
c_v128_sub_16(c_v128 a,c_v128 b)276 SIMD_INLINE c_v128 c_v128_sub_16(c_v128 a, c_v128 b) {
277 return c_v128_from_v64(c_v64_sub_16(a.v64[1], b.v64[1]),
278 c_v64_sub_16(a.v64[0], b.v64[0]));
279 }
280
c_v128_ssub_s16(c_v128 a,c_v128 b)281 SIMD_INLINE c_v128 c_v128_ssub_s16(c_v128 a, c_v128 b) {
282 return c_v128_from_v64(c_v64_ssub_s16(a.v64[1], b.v64[1]),
283 c_v64_ssub_s16(a.v64[0], b.v64[0]));
284 }
285
c_v128_ssub_u16(c_v128 a,c_v128 b)286 SIMD_INLINE c_v128 c_v128_ssub_u16(c_v128 a, c_v128 b) {
287 return c_v128_from_v64(c_v64_ssub_u16(a.v64[1], b.v64[1]),
288 c_v64_ssub_u16(a.v64[0], b.v64[0]));
289 }
290
c_v128_sub_32(c_v128 a,c_v128 b)291 SIMD_INLINE c_v128 c_v128_sub_32(c_v128 a, c_v128 b) {
292 return c_v128_from_v64(c_v64_sub_32(a.v64[1], b.v64[1]),
293 c_v64_sub_32(a.v64[0], b.v64[0]));
294 }
295
c_v128_sub_64(c_v128 a,c_v128 b)296 SIMD_INLINE c_v128 c_v128_sub_64(c_v128 a, c_v128 b) {
297 // Two complement underflow (silences sanitizers)
298 return c_v128_from_64(
299 a.v64[1].u64 < b.v64[1].u64 ? a.v64[1].u64 + ~b.v64[1].u64 + 1
300 : a.v64[1].u64 - b.v64[1].u64,
301 a.v64[0].u64 < b.v64[0].u64 ? a.v64[0].u64 + ~b.v64[0].u64 + 1
302 : a.v64[0].u64 - b.v64[0].u64);
303 }
304
c_v128_abs_s16(c_v128 a)305 SIMD_INLINE c_v128 c_v128_abs_s16(c_v128 a) {
306 return c_v128_from_v64(c_v64_abs_s16(a.v64[1]), c_v64_abs_s16(a.v64[0]));
307 }
308
c_v128_abs_s8(c_v128 a)309 SIMD_INLINE c_v128 c_v128_abs_s8(c_v128 a) {
310 return c_v128_from_v64(c_v64_abs_s8(a.v64[1]), c_v64_abs_s8(a.v64[0]));
311 }
312
c_v128_mul_s16(c_v64 a,c_v64 b)313 SIMD_INLINE c_v128 c_v128_mul_s16(c_v64 a, c_v64 b) {
314 c_v64 lo_bits = c_v64_mullo_s16(a, b);
315 c_v64 hi_bits = c_v64_mulhi_s16(a, b);
316 return c_v128_from_v64(c_v64_ziphi_16(hi_bits, lo_bits),
317 c_v64_ziplo_16(hi_bits, lo_bits));
318 }
319
c_v128_mullo_s16(c_v128 a,c_v128 b)320 SIMD_INLINE c_v128 c_v128_mullo_s16(c_v128 a, c_v128 b) {
321 return c_v128_from_v64(c_v64_mullo_s16(a.v64[1], b.v64[1]),
322 c_v64_mullo_s16(a.v64[0], b.v64[0]));
323 }
324
c_v128_mulhi_s16(c_v128 a,c_v128 b)325 SIMD_INLINE c_v128 c_v128_mulhi_s16(c_v128 a, c_v128 b) {
326 return c_v128_from_v64(c_v64_mulhi_s16(a.v64[1], b.v64[1]),
327 c_v64_mulhi_s16(a.v64[0], b.v64[0]));
328 }
329
c_v128_mullo_s32(c_v128 a,c_v128 b)330 SIMD_INLINE c_v128 c_v128_mullo_s32(c_v128 a, c_v128 b) {
331 return c_v128_from_v64(c_v64_mullo_s32(a.v64[1], b.v64[1]),
332 c_v64_mullo_s32(a.v64[0], b.v64[0]));
333 }
334
c_v128_madd_s16(c_v128 a,c_v128 b)335 SIMD_INLINE c_v128 c_v128_madd_s16(c_v128 a, c_v128 b) {
336 return c_v128_from_v64(c_v64_madd_s16(a.v64[1], b.v64[1]),
337 c_v64_madd_s16(a.v64[0], b.v64[0]));
338 }
339
c_v128_madd_us8(c_v128 a,c_v128 b)340 SIMD_INLINE c_v128 c_v128_madd_us8(c_v128 a, c_v128 b) {
341 return c_v128_from_v64(c_v64_madd_us8(a.v64[1], b.v64[1]),
342 c_v64_madd_us8(a.v64[0], b.v64[0]));
343 }
344
c_v128_avg_u8(c_v128 a,c_v128 b)345 SIMD_INLINE c_v128 c_v128_avg_u8(c_v128 a, c_v128 b) {
346 return c_v128_from_v64(c_v64_avg_u8(a.v64[1], b.v64[1]),
347 c_v64_avg_u8(a.v64[0], b.v64[0]));
348 }
349
c_v128_rdavg_u8(c_v128 a,c_v128 b)350 SIMD_INLINE c_v128 c_v128_rdavg_u8(c_v128 a, c_v128 b) {
351 return c_v128_from_v64(c_v64_rdavg_u8(a.v64[1], b.v64[1]),
352 c_v64_rdavg_u8(a.v64[0], b.v64[0]));
353 }
354
c_v128_rdavg_u16(c_v128 a,c_v128 b)355 SIMD_INLINE c_v128 c_v128_rdavg_u16(c_v128 a, c_v128 b) {
356 return c_v128_from_v64(c_v64_rdavg_u16(a.v64[1], b.v64[1]),
357 c_v64_rdavg_u16(a.v64[0], b.v64[0]));
358 }
359
c_v128_avg_u16(c_v128 a,c_v128 b)360 SIMD_INLINE c_v128 c_v128_avg_u16(c_v128 a, c_v128 b) {
361 return c_v128_from_v64(c_v64_avg_u16(a.v64[1], b.v64[1]),
362 c_v64_avg_u16(a.v64[0], b.v64[0]));
363 }
364
c_v128_min_u8(c_v128 a,c_v128 b)365 SIMD_INLINE c_v128 c_v128_min_u8(c_v128 a, c_v128 b) {
366 return c_v128_from_v64(c_v64_min_u8(a.v64[1], b.v64[1]),
367 c_v64_min_u8(a.v64[0], b.v64[0]));
368 }
369
c_v128_max_u8(c_v128 a,c_v128 b)370 SIMD_INLINE c_v128 c_v128_max_u8(c_v128 a, c_v128 b) {
371 return c_v128_from_v64(c_v64_max_u8(a.v64[1], b.v64[1]),
372 c_v64_max_u8(a.v64[0], b.v64[0]));
373 }
374
c_v128_min_s8(c_v128 a,c_v128 b)375 SIMD_INLINE c_v128 c_v128_min_s8(c_v128 a, c_v128 b) {
376 return c_v128_from_v64(c_v64_min_s8(a.v64[1], b.v64[1]),
377 c_v64_min_s8(a.v64[0], b.v64[0]));
378 }
379
c_v128_movemask_8(c_v128 a)380 SIMD_INLINE uint32_t c_v128_movemask_8(c_v128 a) {
381 return ((a.s8[15] < 0) << 15) | ((a.s8[14] < 0) << 14) |
382 ((a.s8[13] < 0) << 13) | ((a.s8[12] < 0) << 12) |
383 ((a.s8[11] < 0) << 11) | ((a.s8[10] < 0) << 10) |
384 ((a.s8[9] < 0) << 9) | ((a.s8[8] < 0) << 8) | ((a.s8[7] < 0) << 7) |
385 ((a.s8[6] < 0) << 6) | ((a.s8[5] < 0) << 5) | ((a.s8[4] < 0) << 4) |
386 ((a.s8[3] < 0) << 3) | ((a.s8[2] < 0) << 2) | ((a.s8[1] < 0) << 1) |
387 ((a.s8[0] < 0) << 0);
388 }
389
c_v128_blend_8(c_v128 a,c_v128 b,c_v128 c)390 SIMD_INLINE c_v128 c_v128_blend_8(c_v128 a, c_v128 b, c_v128 c) {
391 c_v128 t;
392 for (int i = 0; i < 16; i++) t.u8[i] = c.s8[i] < 0 ? b.u8[i] : a.u8[i];
393 return t;
394 }
395
c_v128_max_s8(c_v128 a,c_v128 b)396 SIMD_INLINE c_v128 c_v128_max_s8(c_v128 a, c_v128 b) {
397 return c_v128_from_v64(c_v64_max_s8(a.v64[1], b.v64[1]),
398 c_v64_max_s8(a.v64[0], b.v64[0]));
399 }
400
c_v128_min_s16(c_v128 a,c_v128 b)401 SIMD_INLINE c_v128 c_v128_min_s16(c_v128 a, c_v128 b) {
402 return c_v128_from_v64(c_v64_min_s16(a.v64[1], b.v64[1]),
403 c_v64_min_s16(a.v64[0], b.v64[0]));
404 }
405
c_v128_max_s16(c_v128 a,c_v128 b)406 SIMD_INLINE c_v128 c_v128_max_s16(c_v128 a, c_v128 b) {
407 return c_v128_from_v64(c_v64_max_s16(a.v64[1], b.v64[1]),
408 c_v64_max_s16(a.v64[0], b.v64[0]));
409 }
410
c_v128_max_s32(c_v128 a,c_v128 b)411 SIMD_INLINE c_v128 c_v128_max_s32(c_v128 a, c_v128 b) {
412 c_v128 t;
413 int c;
414 for (c = 0; c < 4; c++) t.s32[c] = a.s32[c] > b.s32[c] ? a.s32[c] : b.s32[c];
415 return t;
416 }
417
c_v128_min_s32(c_v128 a,c_v128 b)418 SIMD_INLINE c_v128 c_v128_min_s32(c_v128 a, c_v128 b) {
419 c_v128 t;
420 int c;
421 for (c = 0; c < 4; c++) t.s32[c] = a.s32[c] > b.s32[c] ? b.s32[c] : a.s32[c];
422 return t;
423 }
424
c_v128_ziplo_8(c_v128 a,c_v128 b)425 SIMD_INLINE c_v128 c_v128_ziplo_8(c_v128 a, c_v128 b) {
426 return c_v128_from_v64(c_v64_ziphi_8(a.v64[0], b.v64[0]),
427 c_v64_ziplo_8(a.v64[0], b.v64[0]));
428 }
429
c_v128_ziphi_8(c_v128 a,c_v128 b)430 SIMD_INLINE c_v128 c_v128_ziphi_8(c_v128 a, c_v128 b) {
431 return c_v128_from_v64(c_v64_ziphi_8(a.v64[1], b.v64[1]),
432 c_v64_ziplo_8(a.v64[1], b.v64[1]));
433 }
434
c_v128_ziplo_16(c_v128 a,c_v128 b)435 SIMD_INLINE c_v128 c_v128_ziplo_16(c_v128 a, c_v128 b) {
436 return c_v128_from_v64(c_v64_ziphi_16(a.v64[0], b.v64[0]),
437 c_v64_ziplo_16(a.v64[0], b.v64[0]));
438 }
439
c_v128_ziphi_16(c_v128 a,c_v128 b)440 SIMD_INLINE c_v128 c_v128_ziphi_16(c_v128 a, c_v128 b) {
441 return c_v128_from_v64(c_v64_ziphi_16(a.v64[1], b.v64[1]),
442 c_v64_ziplo_16(a.v64[1], b.v64[1]));
443 }
444
c_v128_ziplo_32(c_v128 a,c_v128 b)445 SIMD_INLINE c_v128 c_v128_ziplo_32(c_v128 a, c_v128 b) {
446 return c_v128_from_v64(c_v64_ziphi_32(a.v64[0], b.v64[0]),
447 c_v64_ziplo_32(a.v64[0], b.v64[0]));
448 }
449
c_v128_ziphi_32(c_v128 a,c_v128 b)450 SIMD_INLINE c_v128 c_v128_ziphi_32(c_v128 a, c_v128 b) {
451 return c_v128_from_v64(c_v64_ziphi_32(a.v64[1], b.v64[1]),
452 c_v64_ziplo_32(a.v64[1], b.v64[1]));
453 }
454
c_v128_ziplo_64(c_v128 a,c_v128 b)455 SIMD_INLINE c_v128 c_v128_ziplo_64(c_v128 a, c_v128 b) {
456 return c_v128_from_v64(a.v64[0], b.v64[0]);
457 }
458
c_v128_ziphi_64(c_v128 a,c_v128 b)459 SIMD_INLINE c_v128 c_v128_ziphi_64(c_v128 a, c_v128 b) {
460 return c_v128_from_v64(a.v64[1], b.v64[1]);
461 }
462
c_v128_zip_8(c_v64 a,c_v64 b)463 SIMD_INLINE c_v128 c_v128_zip_8(c_v64 a, c_v64 b) {
464 return c_v128_from_v64(c_v64_ziphi_8(a, b), c_v64_ziplo_8(a, b));
465 }
466
c_v128_zip_16(c_v64 a,c_v64 b)467 SIMD_INLINE c_v128 c_v128_zip_16(c_v64 a, c_v64 b) {
468 return c_v128_from_v64(c_v64_ziphi_16(a, b), c_v64_ziplo_16(a, b));
469 }
470
c_v128_zip_32(c_v64 a,c_v64 b)471 SIMD_INLINE c_v128 c_v128_zip_32(c_v64 a, c_v64 b) {
472 return c_v128_from_v64(c_v64_ziphi_32(a, b), c_v64_ziplo_32(a, b));
473 }
474
_c_v128_unzip_8(c_v128 a,c_v128 b,int mode)475 SIMD_INLINE c_v128 _c_v128_unzip_8(c_v128 a, c_v128 b, int mode) {
476 c_v128 t;
477 if (mode) {
478 t.u8[15] = b.u8[15];
479 t.u8[14] = b.u8[13];
480 t.u8[13] = b.u8[11];
481 t.u8[12] = b.u8[9];
482 t.u8[11] = b.u8[7];
483 t.u8[10] = b.u8[5];
484 t.u8[9] = b.u8[3];
485 t.u8[8] = b.u8[1];
486 t.u8[7] = a.u8[15];
487 t.u8[6] = a.u8[13];
488 t.u8[5] = a.u8[11];
489 t.u8[4] = a.u8[9];
490 t.u8[3] = a.u8[7];
491 t.u8[2] = a.u8[5];
492 t.u8[1] = a.u8[3];
493 t.u8[0] = a.u8[1];
494 } else {
495 t.u8[15] = a.u8[14];
496 t.u8[14] = a.u8[12];
497 t.u8[13] = a.u8[10];
498 t.u8[12] = a.u8[8];
499 t.u8[11] = a.u8[6];
500 t.u8[10] = a.u8[4];
501 t.u8[9] = a.u8[2];
502 t.u8[8] = a.u8[0];
503 t.u8[7] = b.u8[14];
504 t.u8[6] = b.u8[12];
505 t.u8[5] = b.u8[10];
506 t.u8[4] = b.u8[8];
507 t.u8[3] = b.u8[6];
508 t.u8[2] = b.u8[4];
509 t.u8[1] = b.u8[2];
510 t.u8[0] = b.u8[0];
511 }
512 return t;
513 }
514
c_v128_unziplo_8(c_v128 a,c_v128 b)515 SIMD_INLINE c_v128 c_v128_unziplo_8(c_v128 a, c_v128 b) {
516 return CONFIG_BIG_ENDIAN ? _c_v128_unzip_8(a, b, 1)
517 : _c_v128_unzip_8(a, b, 0);
518 }
519
c_v128_unziphi_8(c_v128 a,c_v128 b)520 SIMD_INLINE c_v128 c_v128_unziphi_8(c_v128 a, c_v128 b) {
521 return CONFIG_BIG_ENDIAN ? _c_v128_unzip_8(b, a, 0)
522 : _c_v128_unzip_8(b, a, 1);
523 }
524
_c_v128_unzip_16(c_v128 a,c_v128 b,int mode)525 SIMD_INLINE c_v128 _c_v128_unzip_16(c_v128 a, c_v128 b, int mode) {
526 c_v128 t;
527 if (mode) {
528 t.u16[7] = b.u16[7];
529 t.u16[6] = b.u16[5];
530 t.u16[5] = b.u16[3];
531 t.u16[4] = b.u16[1];
532 t.u16[3] = a.u16[7];
533 t.u16[2] = a.u16[5];
534 t.u16[1] = a.u16[3];
535 t.u16[0] = a.u16[1];
536 } else {
537 t.u16[7] = a.u16[6];
538 t.u16[6] = a.u16[4];
539 t.u16[5] = a.u16[2];
540 t.u16[4] = a.u16[0];
541 t.u16[3] = b.u16[6];
542 t.u16[2] = b.u16[4];
543 t.u16[1] = b.u16[2];
544 t.u16[0] = b.u16[0];
545 }
546 return t;
547 }
548
c_v128_unziplo_16(c_v128 a,c_v128 b)549 SIMD_INLINE c_v128 c_v128_unziplo_16(c_v128 a, c_v128 b) {
550 return CONFIG_BIG_ENDIAN ? _c_v128_unzip_16(a, b, 1)
551 : _c_v128_unzip_16(a, b, 0);
552 }
553
c_v128_unziphi_16(c_v128 a,c_v128 b)554 SIMD_INLINE c_v128 c_v128_unziphi_16(c_v128 a, c_v128 b) {
555 return CONFIG_BIG_ENDIAN ? _c_v128_unzip_16(b, a, 0)
556 : _c_v128_unzip_16(b, a, 1);
557 }
558
_c_v128_unzip_32(c_v128 a,c_v128 b,int mode)559 SIMD_INLINE c_v128 _c_v128_unzip_32(c_v128 a, c_v128 b, int mode) {
560 c_v128 t;
561 if (mode) {
562 t.u32[3] = b.u32[3];
563 t.u32[2] = b.u32[1];
564 t.u32[1] = a.u32[3];
565 t.u32[0] = a.u32[1];
566 } else {
567 t.u32[3] = a.u32[2];
568 t.u32[2] = a.u32[0];
569 t.u32[1] = b.u32[2];
570 t.u32[0] = b.u32[0];
571 }
572 return t;
573 }
574
c_v128_unziplo_32(c_v128 a,c_v128 b)575 SIMD_INLINE c_v128 c_v128_unziplo_32(c_v128 a, c_v128 b) {
576 return CONFIG_BIG_ENDIAN ? _c_v128_unzip_32(a, b, 1)
577 : _c_v128_unzip_32(a, b, 0);
578 }
579
c_v128_unziphi_32(c_v128 a,c_v128 b)580 SIMD_INLINE c_v128 c_v128_unziphi_32(c_v128 a, c_v128 b) {
581 return CONFIG_BIG_ENDIAN ? _c_v128_unzip_32(b, a, 0)
582 : _c_v128_unzip_32(b, a, 1);
583 }
584
c_v128_unpack_u8_s16(c_v64 a)585 SIMD_INLINE c_v128 c_v128_unpack_u8_s16(c_v64 a) {
586 return c_v128_from_v64(c_v64_unpackhi_u8_s16(a), c_v64_unpacklo_u8_s16(a));
587 }
588
c_v128_unpacklo_u8_s16(c_v128 a)589 SIMD_INLINE c_v128 c_v128_unpacklo_u8_s16(c_v128 a) {
590 return c_v128_from_v64(c_v64_unpackhi_u8_s16(a.v64[0]),
591 c_v64_unpacklo_u8_s16(a.v64[0]));
592 }
593
c_v128_unpackhi_u8_s16(c_v128 a)594 SIMD_INLINE c_v128 c_v128_unpackhi_u8_s16(c_v128 a) {
595 return c_v128_from_v64(c_v64_unpackhi_u8_s16(a.v64[1]),
596 c_v64_unpacklo_u8_s16(a.v64[1]));
597 }
598
c_v128_unpack_s8_s16(c_v64 a)599 SIMD_INLINE c_v128 c_v128_unpack_s8_s16(c_v64 a) {
600 return c_v128_from_v64(c_v64_unpackhi_s8_s16(a), c_v64_unpacklo_s8_s16(a));
601 }
602
c_v128_unpacklo_s8_s16(c_v128 a)603 SIMD_INLINE c_v128 c_v128_unpacklo_s8_s16(c_v128 a) {
604 return c_v128_from_v64(c_v64_unpackhi_s8_s16(a.v64[0]),
605 c_v64_unpacklo_s8_s16(a.v64[0]));
606 }
607
c_v128_unpackhi_s8_s16(c_v128 a)608 SIMD_INLINE c_v128 c_v128_unpackhi_s8_s16(c_v128 a) {
609 return c_v128_from_v64(c_v64_unpackhi_s8_s16(a.v64[1]),
610 c_v64_unpacklo_s8_s16(a.v64[1]));
611 }
612
c_v128_pack_s32_s16(c_v128 a,c_v128 b)613 SIMD_INLINE c_v128 c_v128_pack_s32_s16(c_v128 a, c_v128 b) {
614 return c_v128_from_v64(c_v64_pack_s32_s16(a.v64[1], a.v64[0]),
615 c_v64_pack_s32_s16(b.v64[1], b.v64[0]));
616 }
617
c_v128_pack_s32_u16(c_v128 a,c_v128 b)618 SIMD_INLINE c_v128 c_v128_pack_s32_u16(c_v128 a, c_v128 b) {
619 return c_v128_from_v64(c_v64_pack_s32_u16(a.v64[1], a.v64[0]),
620 c_v64_pack_s32_u16(b.v64[1], b.v64[0]));
621 }
622
c_v128_pack_s16_u8(c_v128 a,c_v128 b)623 SIMD_INLINE c_v128 c_v128_pack_s16_u8(c_v128 a, c_v128 b) {
624 return c_v128_from_v64(c_v64_pack_s16_u8(a.v64[1], a.v64[0]),
625 c_v64_pack_s16_u8(b.v64[1], b.v64[0]));
626 }
627
c_v128_pack_s16_s8(c_v128 a,c_v128 b)628 SIMD_INLINE c_v128 c_v128_pack_s16_s8(c_v128 a, c_v128 b) {
629 return c_v128_from_v64(c_v64_pack_s16_s8(a.v64[1], a.v64[0]),
630 c_v64_pack_s16_s8(b.v64[1], b.v64[0]));
631 }
632
c_v128_unpack_u16_s32(c_v64 a)633 SIMD_INLINE c_v128 c_v128_unpack_u16_s32(c_v64 a) {
634 return c_v128_from_v64(c_v64_unpackhi_u16_s32(a), c_v64_unpacklo_u16_s32(a));
635 }
636
c_v128_unpack_s16_s32(c_v64 a)637 SIMD_INLINE c_v128 c_v128_unpack_s16_s32(c_v64 a) {
638 return c_v128_from_v64(c_v64_unpackhi_s16_s32(a), c_v64_unpacklo_s16_s32(a));
639 }
640
c_v128_unpacklo_u16_s32(c_v128 a)641 SIMD_INLINE c_v128 c_v128_unpacklo_u16_s32(c_v128 a) {
642 return c_v128_from_v64(c_v64_unpackhi_u16_s32(a.v64[0]),
643 c_v64_unpacklo_u16_s32(a.v64[0]));
644 }
645
c_v128_unpacklo_s16_s32(c_v128 a)646 SIMD_INLINE c_v128 c_v128_unpacklo_s16_s32(c_v128 a) {
647 return c_v128_from_v64(c_v64_unpackhi_s16_s32(a.v64[0]),
648 c_v64_unpacklo_s16_s32(a.v64[0]));
649 }
650
c_v128_unpackhi_u16_s32(c_v128 a)651 SIMD_INLINE c_v128 c_v128_unpackhi_u16_s32(c_v128 a) {
652 return c_v128_from_v64(c_v64_unpackhi_u16_s32(a.v64[1]),
653 c_v64_unpacklo_u16_s32(a.v64[1]));
654 }
655
c_v128_unpackhi_s16_s32(c_v128 a)656 SIMD_INLINE c_v128 c_v128_unpackhi_s16_s32(c_v128 a) {
657 return c_v128_from_v64(c_v64_unpackhi_s16_s32(a.v64[1]),
658 c_v64_unpacklo_s16_s32(a.v64[1]));
659 }
660
c_v128_shuffle_8(c_v128 a,c_v128 pattern)661 SIMD_INLINE c_v128 c_v128_shuffle_8(c_v128 a, c_v128 pattern) {
662 c_v128 t;
663 int c;
664 for (c = 0; c < 16; c++)
665 t.u8[c] = a.u8[CONFIG_BIG_ENDIAN ? 15 - (pattern.u8[c] & 15)
666 : pattern.u8[c] & 15];
667
668 return t;
669 }
670
c_v128_cmpgt_s8(c_v128 a,c_v128 b)671 SIMD_INLINE c_v128 c_v128_cmpgt_s8(c_v128 a, c_v128 b) {
672 return c_v128_from_v64(c_v64_cmpgt_s8(a.v64[1], b.v64[1]),
673 c_v64_cmpgt_s8(a.v64[0], b.v64[0]));
674 }
675
c_v128_cmplt_s8(c_v128 a,c_v128 b)676 SIMD_INLINE c_v128 c_v128_cmplt_s8(c_v128 a, c_v128 b) {
677 return c_v128_from_v64(c_v64_cmplt_s8(a.v64[1], b.v64[1]),
678 c_v64_cmplt_s8(a.v64[0], b.v64[0]));
679 }
680
c_v128_cmpeq_8(c_v128 a,c_v128 b)681 SIMD_INLINE c_v128 c_v128_cmpeq_8(c_v128 a, c_v128 b) {
682 return c_v128_from_v64(c_v64_cmpeq_8(a.v64[1], b.v64[1]),
683 c_v64_cmpeq_8(a.v64[0], b.v64[0]));
684 }
685
c_v128_cmpgt_s16(c_v128 a,c_v128 b)686 SIMD_INLINE c_v128 c_v128_cmpgt_s16(c_v128 a, c_v128 b) {
687 return c_v128_from_v64(c_v64_cmpgt_s16(a.v64[1], b.v64[1]),
688 c_v64_cmpgt_s16(a.v64[0], b.v64[0]));
689 }
690
c_v128_cmplt_s16(c_v128 a,c_v128 b)691 SIMD_INLINE c_v128 c_v128_cmplt_s16(c_v128 a, c_v128 b) {
692 return c_v128_from_v64(c_v64_cmplt_s16(a.v64[1], b.v64[1]),
693 c_v64_cmplt_s16(a.v64[0], b.v64[0]));
694 }
695
c_v128_cmpeq_16(c_v128 a,c_v128 b)696 SIMD_INLINE c_v128 c_v128_cmpeq_16(c_v128 a, c_v128 b) {
697 return c_v128_from_v64(c_v64_cmpeq_16(a.v64[1], b.v64[1]),
698 c_v64_cmpeq_16(a.v64[0], b.v64[0]));
699 }
700
c_v128_cmpgt_s32(c_v128 a,c_v128 b)701 SIMD_INLINE c_v128 c_v128_cmpgt_s32(c_v128 a, c_v128 b) {
702 c_v128 t;
703 int c;
704 for (c = 0; c < 4; c++) t.s32[c] = -(a.s32[c] > b.s32[c]);
705 return t;
706 }
707
c_v128_cmplt_s32(c_v128 a,c_v128 b)708 SIMD_INLINE c_v128 c_v128_cmplt_s32(c_v128 a, c_v128 b) {
709 c_v128 t;
710 int c;
711 for (c = 0; c < 4; c++) t.s32[c] = -(a.s32[c] < b.s32[c]);
712 return t;
713 }
714
c_v128_cmpeq_32(c_v128 a,c_v128 b)715 SIMD_INLINE c_v128 c_v128_cmpeq_32(c_v128 a, c_v128 b) {
716 c_v128 t;
717 int c;
718 for (c = 0; c < 4; c++) t.s32[c] = -(a.s32[c] == b.s32[c]);
719 return t;
720 }
721
c_v128_shl_n_byte(c_v128 a,const unsigned int n)722 SIMD_INLINE c_v128 c_v128_shl_n_byte(c_v128 a, const unsigned int n) {
723 if (n < 8)
724 return c_v128_from_v64(c_v64_or(c_v64_shl_n_byte(a.v64[1], n),
725 c_v64_shr_n_byte(a.v64[0], 8 - n)),
726 c_v64_shl_n_byte(a.v64[0], n));
727 else
728 return c_v128_from_v64(c_v64_shl_n_byte(a.v64[0], n - 8), c_v64_zero());
729 }
730
c_v128_shr_n_byte(c_v128 a,const unsigned int n)731 SIMD_INLINE c_v128 c_v128_shr_n_byte(c_v128 a, const unsigned int n) {
732 if (n < 8)
733 return c_v128_from_v64(c_v64_shr_n_byte(a.v64[1], n),
734 c_v64_or(c_v64_shr_n_byte(a.v64[0], n),
735 c_v64_shl_n_byte(a.v64[1], 8 - n)));
736 else
737 return c_v128_from_v64(c_v64_zero(), c_v64_shr_n_byte(a.v64[1], n - 8));
738 }
739
c_v128_align(c_v128 a,c_v128 b,const unsigned int c)740 SIMD_INLINE c_v128 c_v128_align(c_v128 a, c_v128 b, const unsigned int c) {
741 if (SIMD_CHECK && c > 15) {
742 fprintf(stderr, "Error: undefined alignment %d\n", c);
743 abort();
744 }
745 return c ? c_v128_or(c_v128_shr_n_byte(b, c), c_v128_shl_n_byte(a, 16 - c))
746 : b;
747 }
748
c_v128_shl_8(c_v128 a,const unsigned int c)749 SIMD_INLINE c_v128 c_v128_shl_8(c_v128 a, const unsigned int c) {
750 return c_v128_from_v64(c_v64_shl_8(a.v64[1], c), c_v64_shl_8(a.v64[0], c));
751 }
752
c_v128_shr_u8(c_v128 a,const unsigned int c)753 SIMD_INLINE c_v128 c_v128_shr_u8(c_v128 a, const unsigned int c) {
754 return c_v128_from_v64(c_v64_shr_u8(a.v64[1], c), c_v64_shr_u8(a.v64[0], c));
755 }
756
c_v128_shr_s8(c_v128 a,const unsigned int c)757 SIMD_INLINE c_v128 c_v128_shr_s8(c_v128 a, const unsigned int c) {
758 return c_v128_from_v64(c_v64_shr_s8(a.v64[1], c), c_v64_shr_s8(a.v64[0], c));
759 }
760
c_v128_shl_16(c_v128 a,const unsigned int c)761 SIMD_INLINE c_v128 c_v128_shl_16(c_v128 a, const unsigned int c) {
762 return c_v128_from_v64(c_v64_shl_16(a.v64[1], c), c_v64_shl_16(a.v64[0], c));
763 }
764
c_v128_shr_u16(c_v128 a,const unsigned int c)765 SIMD_INLINE c_v128 c_v128_shr_u16(c_v128 a, const unsigned int c) {
766 return c_v128_from_v64(c_v64_shr_u16(a.v64[1], c),
767 c_v64_shr_u16(a.v64[0], c));
768 }
769
c_v128_shr_s16(c_v128 a,const unsigned int c)770 SIMD_INLINE c_v128 c_v128_shr_s16(c_v128 a, const unsigned int c) {
771 return c_v128_from_v64(c_v64_shr_s16(a.v64[1], c),
772 c_v64_shr_s16(a.v64[0], c));
773 }
774
c_v128_shl_32(c_v128 a,const unsigned int c)775 SIMD_INLINE c_v128 c_v128_shl_32(c_v128 a, const unsigned int c) {
776 return c_v128_from_v64(c_v64_shl_32(a.v64[1], c), c_v64_shl_32(a.v64[0], c));
777 }
778
c_v128_shr_u32(c_v128 a,const unsigned int c)779 SIMD_INLINE c_v128 c_v128_shr_u32(c_v128 a, const unsigned int c) {
780 return c_v128_from_v64(c_v64_shr_u32(a.v64[1], c),
781 c_v64_shr_u32(a.v64[0], c));
782 }
783
c_v128_shr_s32(c_v128 a,const unsigned int c)784 SIMD_INLINE c_v128 c_v128_shr_s32(c_v128 a, const unsigned int c) {
785 return c_v128_from_v64(c_v64_shr_s32(a.v64[1], c),
786 c_v64_shr_s32(a.v64[0], c));
787 }
788
c_v128_shl_64(c_v128 a,const unsigned int c)789 SIMD_INLINE c_v128 c_v128_shl_64(c_v128 a, const unsigned int c) {
790 a.v64[1].u64 <<= c;
791 a.v64[0].u64 <<= c;
792 return c_v128_from_v64(a.v64[1], a.v64[0]);
793 }
794
c_v128_shr_u64(c_v128 a,const unsigned int c)795 SIMD_INLINE c_v128 c_v128_shr_u64(c_v128 a, const unsigned int c) {
796 a.v64[1].u64 >>= c;
797 a.v64[0].u64 >>= c;
798 return c_v128_from_v64(a.v64[1], a.v64[0]);
799 }
800
c_v128_shr_s64(c_v128 a,const unsigned int c)801 SIMD_INLINE c_v128 c_v128_shr_s64(c_v128 a, const unsigned int c) {
802 a.v64[1].s64 >>= c;
803 a.v64[0].s64 >>= c;
804 return c_v128_from_v64(a.v64[1], a.v64[0]);
805 }
806
c_v128_shl_n_8(c_v128 a,const unsigned int n)807 SIMD_INLINE c_v128 c_v128_shl_n_8(c_v128 a, const unsigned int n) {
808 return c_v128_shl_8(a, n);
809 }
810
c_v128_shl_n_16(c_v128 a,const unsigned int n)811 SIMD_INLINE c_v128 c_v128_shl_n_16(c_v128 a, const unsigned int n) {
812 return c_v128_shl_16(a, n);
813 }
814
c_v128_shl_n_32(c_v128 a,const unsigned int n)815 SIMD_INLINE c_v128 c_v128_shl_n_32(c_v128 a, const unsigned int n) {
816 return c_v128_shl_32(a, n);
817 }
818
c_v128_shl_n_64(c_v128 a,const unsigned int n)819 SIMD_INLINE c_v128 c_v128_shl_n_64(c_v128 a, const unsigned int n) {
820 return c_v128_shl_64(a, n);
821 }
822
c_v128_shr_n_u8(c_v128 a,const unsigned int n)823 SIMD_INLINE c_v128 c_v128_shr_n_u8(c_v128 a, const unsigned int n) {
824 return c_v128_shr_u8(a, n);
825 }
826
c_v128_shr_n_u16(c_v128 a,const unsigned int n)827 SIMD_INLINE c_v128 c_v128_shr_n_u16(c_v128 a, const unsigned int n) {
828 return c_v128_shr_u16(a, n);
829 }
830
c_v128_shr_n_u32(c_v128 a,const unsigned int n)831 SIMD_INLINE c_v128 c_v128_shr_n_u32(c_v128 a, const unsigned int n) {
832 return c_v128_shr_u32(a, n);
833 }
834
c_v128_shr_n_u64(c_v128 a,const unsigned int n)835 SIMD_INLINE c_v128 c_v128_shr_n_u64(c_v128 a, const unsigned int n) {
836 return c_v128_shr_u64(a, n);
837 }
838
c_v128_shr_n_s8(c_v128 a,const unsigned int n)839 SIMD_INLINE c_v128 c_v128_shr_n_s8(c_v128 a, const unsigned int n) {
840 return c_v128_shr_s8(a, n);
841 }
842
c_v128_shr_n_s16(c_v128 a,const unsigned int n)843 SIMD_INLINE c_v128 c_v128_shr_n_s16(c_v128 a, const unsigned int n) {
844 return c_v128_shr_s16(a, n);
845 }
846
c_v128_shr_n_s32(c_v128 a,const unsigned int n)847 SIMD_INLINE c_v128 c_v128_shr_n_s32(c_v128 a, const unsigned int n) {
848 return c_v128_shr_s32(a, n);
849 }
850
c_v128_shr_n_s64(c_v128 a,const unsigned int n)851 SIMD_INLINE c_v128 c_v128_shr_n_s64(c_v128 a, const unsigned int n) {
852 return c_v128_shr_s64(a, n);
853 }
854
855 typedef uint32_t c_sad128_internal_u16;
856
c_v128_sad_u16_init()857 SIMD_INLINE c_sad128_internal_u16 c_v128_sad_u16_init() { return 0; }
858
859 /* Implementation dependent return value. Result must be finalised with
860 * v128_sad_u16_sum(). */
c_v128_sad_u16(c_sad128_internal_u16 s,c_v128 a,c_v128 b)861 SIMD_INLINE c_sad128_internal_u16 c_v128_sad_u16(c_sad128_internal_u16 s,
862 c_v128 a, c_v128 b) {
863 int c;
864 for (c = 0; c < 8; c++)
865 s += a.u16[c] > b.u16[c] ? a.u16[c] - b.u16[c] : b.u16[c] - a.u16[c];
866 return s;
867 }
868
c_v128_sad_u16_sum(c_sad128_internal_u16 s)869 SIMD_INLINE uint32_t c_v128_sad_u16_sum(c_sad128_internal_u16 s) { return s; }
870
871 typedef uint64_t c_ssd128_internal_s16;
872
c_v128_ssd_s16_init()873 SIMD_INLINE c_ssd128_internal_s16 c_v128_ssd_s16_init() { return 0; }
874
875 /* Implementation dependent return value. Result must be finalised with
876 * v128_ssd_s16_sum(). */
c_v128_ssd_s16(c_ssd128_internal_s16 s,c_v128 a,c_v128 b)877 SIMD_INLINE c_ssd128_internal_s16 c_v128_ssd_s16(c_ssd128_internal_s16 s,
878 c_v128 a, c_v128 b) {
879 int c;
880 for (c = 0; c < 8; c++)
881 s += (int32_t)(int16_t)(a.s16[c] - b.s16[c]) *
882 (int32_t)(int16_t)(a.s16[c] - b.s16[c]);
883 return s;
884 }
885
c_v128_ssd_s16_sum(c_ssd128_internal_s16 s)886 SIMD_INLINE uint64_t c_v128_ssd_s16_sum(c_ssd128_internal_s16 s) { return s; }
887
888 #endif // AOM_AOM_DSP_SIMD_V128_INTRINSICS_C_H_
889