1 /*
2 * Copyright 2013 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/scale.h"
12
13 #include <assert.h>
14 #include <string.h>
15
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyARGB
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
20
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
25
Abs(int v)26 static __inline int Abs(int v) {
27 return v >= 0 ? v : -v;
28 }
29
30 // CPU agnostic row functions
ScaleRowDown2_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)31 void ScaleRowDown2_C(const uint8_t* src_ptr,
32 ptrdiff_t src_stride,
33 uint8_t* dst,
34 int dst_width) {
35 int x;
36 (void)src_stride;
37 for (x = 0; x < dst_width - 1; x += 2) {
38 dst[0] = src_ptr[1];
39 dst[1] = src_ptr[3];
40 dst += 2;
41 src_ptr += 4;
42 }
43 if (dst_width & 1) {
44 dst[0] = src_ptr[1];
45 }
46 }
47
ScaleRowDown2_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)48 void ScaleRowDown2_16_C(const uint16_t* src_ptr,
49 ptrdiff_t src_stride,
50 uint16_t* dst,
51 int dst_width) {
52 int x;
53 (void)src_stride;
54 for (x = 0; x < dst_width - 1; x += 2) {
55 dst[0] = src_ptr[1];
56 dst[1] = src_ptr[3];
57 dst += 2;
58 src_ptr += 4;
59 }
60 if (dst_width & 1) {
61 dst[0] = src_ptr[1];
62 }
63 }
64
ScaleRowDown2Linear_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)65 void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
66 ptrdiff_t src_stride,
67 uint8_t* dst,
68 int dst_width) {
69 const uint8_t* s = src_ptr;
70 int x;
71 (void)src_stride;
72 for (x = 0; x < dst_width - 1; x += 2) {
73 dst[0] = (s[0] + s[1] + 1) >> 1;
74 dst[1] = (s[2] + s[3] + 1) >> 1;
75 dst += 2;
76 s += 4;
77 }
78 if (dst_width & 1) {
79 dst[0] = (s[0] + s[1] + 1) >> 1;
80 }
81 }
82
ScaleRowDown2Linear_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)83 void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
84 ptrdiff_t src_stride,
85 uint16_t* dst,
86 int dst_width) {
87 const uint16_t* s = src_ptr;
88 int x;
89 (void)src_stride;
90 for (x = 0; x < dst_width - 1; x += 2) {
91 dst[0] = (s[0] + s[1] + 1) >> 1;
92 dst[1] = (s[2] + s[3] + 1) >> 1;
93 dst += 2;
94 s += 4;
95 }
96 if (dst_width & 1) {
97 dst[0] = (s[0] + s[1] + 1) >> 1;
98 }
99 }
100
ScaleRowDown2Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)101 void ScaleRowDown2Box_C(const uint8_t* src_ptr,
102 ptrdiff_t src_stride,
103 uint8_t* dst,
104 int dst_width) {
105 const uint8_t* s = src_ptr;
106 const uint8_t* t = src_ptr + src_stride;
107 int x;
108 for (x = 0; x < dst_width - 1; x += 2) {
109 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
110 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
111 dst += 2;
112 s += 4;
113 t += 4;
114 }
115 if (dst_width & 1) {
116 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
117 }
118 }
119
ScaleRowDown2Box_Odd_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)120 void ScaleRowDown2Box_Odd_C(const uint8_t* src_ptr,
121 ptrdiff_t src_stride,
122 uint8_t* dst,
123 int dst_width) {
124 const uint8_t* s = src_ptr;
125 const uint8_t* t = src_ptr + src_stride;
126 int x;
127 dst_width -= 1;
128 for (x = 0; x < dst_width - 1; x += 2) {
129 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
130 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
131 dst += 2;
132 s += 4;
133 t += 4;
134 }
135 if (dst_width & 1) {
136 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
137 dst += 1;
138 s += 2;
139 t += 2;
140 }
141 dst[0] = (s[0] + t[0] + 1) >> 1;
142 }
143
ScaleRowDown2Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)144 void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
145 ptrdiff_t src_stride,
146 uint16_t* dst,
147 int dst_width) {
148 const uint16_t* s = src_ptr;
149 const uint16_t* t = src_ptr + src_stride;
150 int x;
151 for (x = 0; x < dst_width - 1; x += 2) {
152 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
153 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
154 dst += 2;
155 s += 4;
156 t += 4;
157 }
158 if (dst_width & 1) {
159 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
160 }
161 }
162
ScaleRowDown4_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)163 void ScaleRowDown4_C(const uint8_t* src_ptr,
164 ptrdiff_t src_stride,
165 uint8_t* dst,
166 int dst_width) {
167 int x;
168 (void)src_stride;
169 for (x = 0; x < dst_width - 1; x += 2) {
170 dst[0] = src_ptr[2];
171 dst[1] = src_ptr[6];
172 dst += 2;
173 src_ptr += 8;
174 }
175 if (dst_width & 1) {
176 dst[0] = src_ptr[2];
177 }
178 }
179
ScaleRowDown4_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)180 void ScaleRowDown4_16_C(const uint16_t* src_ptr,
181 ptrdiff_t src_stride,
182 uint16_t* dst,
183 int dst_width) {
184 int x;
185 (void)src_stride;
186 for (x = 0; x < dst_width - 1; x += 2) {
187 dst[0] = src_ptr[2];
188 dst[1] = src_ptr[6];
189 dst += 2;
190 src_ptr += 8;
191 }
192 if (dst_width & 1) {
193 dst[0] = src_ptr[2];
194 }
195 }
196
ScaleRowDown4Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)197 void ScaleRowDown4Box_C(const uint8_t* src_ptr,
198 ptrdiff_t src_stride,
199 uint8_t* dst,
200 int dst_width) {
201 intptr_t stride = src_stride;
202 int x;
203 for (x = 0; x < dst_width - 1; x += 2) {
204 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
205 src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
206 src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
207 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
208 src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
209 src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
210 src_ptr[stride * 3 + 3] + 8) >>
211 4;
212 dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
213 src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
214 src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
215 src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
216 src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
217 src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
218 src_ptr[stride * 3 + 7] + 8) >>
219 4;
220 dst += 2;
221 src_ptr += 8;
222 }
223 if (dst_width & 1) {
224 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
225 src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
226 src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
227 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
228 src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
229 src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
230 src_ptr[stride * 3 + 3] + 8) >>
231 4;
232 }
233 }
234
ScaleRowDown4Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)235 void ScaleRowDown4Box_16_C(const uint16_t* src_ptr,
236 ptrdiff_t src_stride,
237 uint16_t* dst,
238 int dst_width) {
239 intptr_t stride = src_stride;
240 int x;
241 for (x = 0; x < dst_width - 1; x += 2) {
242 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
243 src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
244 src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
245 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
246 src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
247 src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
248 src_ptr[stride * 3 + 3] + 8) >>
249 4;
250 dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
251 src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
252 src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
253 src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
254 src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
255 src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
256 src_ptr[stride * 3 + 7] + 8) >>
257 4;
258 dst += 2;
259 src_ptr += 8;
260 }
261 if (dst_width & 1) {
262 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
263 src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
264 src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
265 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
266 src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
267 src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
268 src_ptr[stride * 3 + 3] + 8) >>
269 4;
270 }
271 }
272
ScaleRowDown34_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)273 void ScaleRowDown34_C(const uint8_t* src_ptr,
274 ptrdiff_t src_stride,
275 uint8_t* dst,
276 int dst_width) {
277 int x;
278 (void)src_stride;
279 assert((dst_width % 3 == 0) && (dst_width > 0));
280 for (x = 0; x < dst_width; x += 3) {
281 dst[0] = src_ptr[0];
282 dst[1] = src_ptr[1];
283 dst[2] = src_ptr[3];
284 dst += 3;
285 src_ptr += 4;
286 }
287 }
288
ScaleRowDown34_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)289 void ScaleRowDown34_16_C(const uint16_t* src_ptr,
290 ptrdiff_t src_stride,
291 uint16_t* dst,
292 int dst_width) {
293 int x;
294 (void)src_stride;
295 assert((dst_width % 3 == 0) && (dst_width > 0));
296 for (x = 0; x < dst_width; x += 3) {
297 dst[0] = src_ptr[0];
298 dst[1] = src_ptr[1];
299 dst[2] = src_ptr[3];
300 dst += 3;
301 src_ptr += 4;
302 }
303 }
304
305 // Filter rows 0 and 1 together, 3 : 1
ScaleRowDown34_0_Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * d,int dst_width)306 void ScaleRowDown34_0_Box_C(const uint8_t* src_ptr,
307 ptrdiff_t src_stride,
308 uint8_t* d,
309 int dst_width) {
310 const uint8_t* s = src_ptr;
311 const uint8_t* t = src_ptr + src_stride;
312 int x;
313 assert((dst_width % 3 == 0) && (dst_width > 0));
314 for (x = 0; x < dst_width; x += 3) {
315 uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
316 uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
317 uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
318 uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
319 uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
320 uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
321 d[0] = (a0 * 3 + b0 + 2) >> 2;
322 d[1] = (a1 * 3 + b1 + 2) >> 2;
323 d[2] = (a2 * 3 + b2 + 2) >> 2;
324 d += 3;
325 s += 4;
326 t += 4;
327 }
328 }
329
ScaleRowDown34_0_Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * d,int dst_width)330 void ScaleRowDown34_0_Box_16_C(const uint16_t* src_ptr,
331 ptrdiff_t src_stride,
332 uint16_t* d,
333 int dst_width) {
334 const uint16_t* s = src_ptr;
335 const uint16_t* t = src_ptr + src_stride;
336 int x;
337 assert((dst_width % 3 == 0) && (dst_width > 0));
338 for (x = 0; x < dst_width; x += 3) {
339 uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
340 uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
341 uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
342 uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
343 uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
344 uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
345 d[0] = (a0 * 3 + b0 + 2) >> 2;
346 d[1] = (a1 * 3 + b1 + 2) >> 2;
347 d[2] = (a2 * 3 + b2 + 2) >> 2;
348 d += 3;
349 s += 4;
350 t += 4;
351 }
352 }
353
354 // Filter rows 1 and 2 together, 1 : 1
ScaleRowDown34_1_Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * d,int dst_width)355 void ScaleRowDown34_1_Box_C(const uint8_t* src_ptr,
356 ptrdiff_t src_stride,
357 uint8_t* d,
358 int dst_width) {
359 const uint8_t* s = src_ptr;
360 const uint8_t* t = src_ptr + src_stride;
361 int x;
362 assert((dst_width % 3 == 0) && (dst_width > 0));
363 for (x = 0; x < dst_width; x += 3) {
364 uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
365 uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
366 uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
367 uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
368 uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
369 uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
370 d[0] = (a0 + b0 + 1) >> 1;
371 d[1] = (a1 + b1 + 1) >> 1;
372 d[2] = (a2 + b2 + 1) >> 1;
373 d += 3;
374 s += 4;
375 t += 4;
376 }
377 }
378
ScaleRowDown34_1_Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * d,int dst_width)379 void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr,
380 ptrdiff_t src_stride,
381 uint16_t* d,
382 int dst_width) {
383 const uint16_t* s = src_ptr;
384 const uint16_t* t = src_ptr + src_stride;
385 int x;
386 assert((dst_width % 3 == 0) && (dst_width > 0));
387 for (x = 0; x < dst_width; x += 3) {
388 uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
389 uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
390 uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
391 uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
392 uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
393 uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
394 d[0] = (a0 + b0 + 1) >> 1;
395 d[1] = (a1 + b1 + 1) >> 1;
396 d[2] = (a2 + b2 + 1) >> 1;
397 d += 3;
398 s += 4;
399 t += 4;
400 }
401 }
402
403 // Scales a single row of pixels using point sampling.
ScaleCols_C(uint8_t * dst_ptr,const uint8_t * src_ptr,int dst_width,int x,int dx)404 void ScaleCols_C(uint8_t* dst_ptr,
405 const uint8_t* src_ptr,
406 int dst_width,
407 int x,
408 int dx) {
409 int j;
410 for (j = 0; j < dst_width - 1; j += 2) {
411 dst_ptr[0] = src_ptr[x >> 16];
412 x += dx;
413 dst_ptr[1] = src_ptr[x >> 16];
414 x += dx;
415 dst_ptr += 2;
416 }
417 if (dst_width & 1) {
418 dst_ptr[0] = src_ptr[x >> 16];
419 }
420 }
421
ScaleCols_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,int dst_width,int x,int dx)422 void ScaleCols_16_C(uint16_t* dst_ptr,
423 const uint16_t* src_ptr,
424 int dst_width,
425 int x,
426 int dx) {
427 int j;
428 for (j = 0; j < dst_width - 1; j += 2) {
429 dst_ptr[0] = src_ptr[x >> 16];
430 x += dx;
431 dst_ptr[1] = src_ptr[x >> 16];
432 x += dx;
433 dst_ptr += 2;
434 }
435 if (dst_width & 1) {
436 dst_ptr[0] = src_ptr[x >> 16];
437 }
438 }
439
440 // Scales a single row of pixels up by 2x using point sampling.
ScaleColsUp2_C(uint8_t * dst_ptr,const uint8_t * src_ptr,int dst_width,int x,int dx)441 void ScaleColsUp2_C(uint8_t* dst_ptr,
442 const uint8_t* src_ptr,
443 int dst_width,
444 int x,
445 int dx) {
446 int j;
447 (void)x;
448 (void)dx;
449 for (j = 0; j < dst_width - 1; j += 2) {
450 dst_ptr[1] = dst_ptr[0] = src_ptr[0];
451 src_ptr += 1;
452 dst_ptr += 2;
453 }
454 if (dst_width & 1) {
455 dst_ptr[0] = src_ptr[0];
456 }
457 }
458
ScaleColsUp2_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,int dst_width,int x,int dx)459 void ScaleColsUp2_16_C(uint16_t* dst_ptr,
460 const uint16_t* src_ptr,
461 int dst_width,
462 int x,
463 int dx) {
464 int j;
465 (void)x;
466 (void)dx;
467 for (j = 0; j < dst_width - 1; j += 2) {
468 dst_ptr[1] = dst_ptr[0] = src_ptr[0];
469 src_ptr += 1;
470 dst_ptr += 2;
471 }
472 if (dst_width & 1) {
473 dst_ptr[0] = src_ptr[0];
474 }
475 }
476
477 // (1-f)a + fb can be replaced with a + f(b-a)
478 #if defined(__arm__) || defined(__aarch64__)
479 #define BLENDER(a, b, f) \
480 (uint8_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
481 #else
482 // Intel uses 7 bit math with rounding.
483 #define BLENDER(a, b, f) \
484 (uint8_t)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7))
485 #endif
486
ScaleFilterCols_C(uint8_t * dst_ptr,const uint8_t * src_ptr,int dst_width,int x,int dx)487 void ScaleFilterCols_C(uint8_t* dst_ptr,
488 const uint8_t* src_ptr,
489 int dst_width,
490 int x,
491 int dx) {
492 int j;
493 for (j = 0; j < dst_width - 1; j += 2) {
494 int xi = x >> 16;
495 int a = src_ptr[xi];
496 int b = src_ptr[xi + 1];
497 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
498 x += dx;
499 xi = x >> 16;
500 a = src_ptr[xi];
501 b = src_ptr[xi + 1];
502 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
503 x += dx;
504 dst_ptr += 2;
505 }
506 if (dst_width & 1) {
507 int xi = x >> 16;
508 int a = src_ptr[xi];
509 int b = src_ptr[xi + 1];
510 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
511 }
512 }
513
ScaleFilterCols64_C(uint8_t * dst_ptr,const uint8_t * src_ptr,int dst_width,int x32,int dx)514 void ScaleFilterCols64_C(uint8_t* dst_ptr,
515 const uint8_t* src_ptr,
516 int dst_width,
517 int x32,
518 int dx) {
519 int64_t x = (int64_t)(x32);
520 int j;
521 for (j = 0; j < dst_width - 1; j += 2) {
522 int64_t xi = x >> 16;
523 int a = src_ptr[xi];
524 int b = src_ptr[xi + 1];
525 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
526 x += dx;
527 xi = x >> 16;
528 a = src_ptr[xi];
529 b = src_ptr[xi + 1];
530 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
531 x += dx;
532 dst_ptr += 2;
533 }
534 if (dst_width & 1) {
535 int64_t xi = x >> 16;
536 int a = src_ptr[xi];
537 int b = src_ptr[xi + 1];
538 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
539 }
540 }
541 #undef BLENDER
542
543 // Same as 8 bit arm blender but return is cast to uint16_t
544 #define BLENDER(a, b, f) \
545 (uint16_t)( \
546 (int)(a) + \
547 (int)((((int64_t)((f)) * ((int64_t)(b) - (int)(a))) + 0x8000) >> 16))
548
ScaleFilterCols_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,int dst_width,int x,int dx)549 void ScaleFilterCols_16_C(uint16_t* dst_ptr,
550 const uint16_t* src_ptr,
551 int dst_width,
552 int x,
553 int dx) {
554 int j;
555 for (j = 0; j < dst_width - 1; j += 2) {
556 int xi = x >> 16;
557 int a = src_ptr[xi];
558 int b = src_ptr[xi + 1];
559 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
560 x += dx;
561 xi = x >> 16;
562 a = src_ptr[xi];
563 b = src_ptr[xi + 1];
564 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
565 x += dx;
566 dst_ptr += 2;
567 }
568 if (dst_width & 1) {
569 int xi = x >> 16;
570 int a = src_ptr[xi];
571 int b = src_ptr[xi + 1];
572 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
573 }
574 }
575
ScaleFilterCols64_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,int dst_width,int x32,int dx)576 void ScaleFilterCols64_16_C(uint16_t* dst_ptr,
577 const uint16_t* src_ptr,
578 int dst_width,
579 int x32,
580 int dx) {
581 int64_t x = (int64_t)(x32);
582 int j;
583 for (j = 0; j < dst_width - 1; j += 2) {
584 int64_t xi = x >> 16;
585 int a = src_ptr[xi];
586 int b = src_ptr[xi + 1];
587 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
588 x += dx;
589 xi = x >> 16;
590 a = src_ptr[xi];
591 b = src_ptr[xi + 1];
592 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
593 x += dx;
594 dst_ptr += 2;
595 }
596 if (dst_width & 1) {
597 int64_t xi = x >> 16;
598 int a = src_ptr[xi];
599 int b = src_ptr[xi + 1];
600 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
601 }
602 }
603 #undef BLENDER
604
ScaleRowDown38_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)605 void ScaleRowDown38_C(const uint8_t* src_ptr,
606 ptrdiff_t src_stride,
607 uint8_t* dst,
608 int dst_width) {
609 int x;
610 (void)src_stride;
611 assert(dst_width % 3 == 0);
612 for (x = 0; x < dst_width; x += 3) {
613 dst[0] = src_ptr[0];
614 dst[1] = src_ptr[3];
615 dst[2] = src_ptr[6];
616 dst += 3;
617 src_ptr += 8;
618 }
619 }
620
ScaleRowDown38_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)621 void ScaleRowDown38_16_C(const uint16_t* src_ptr,
622 ptrdiff_t src_stride,
623 uint16_t* dst,
624 int dst_width) {
625 int x;
626 (void)src_stride;
627 assert(dst_width % 3 == 0);
628 for (x = 0; x < dst_width; x += 3) {
629 dst[0] = src_ptr[0];
630 dst[1] = src_ptr[3];
631 dst[2] = src_ptr[6];
632 dst += 3;
633 src_ptr += 8;
634 }
635 }
636
637 // 8x3 -> 3x1
ScaleRowDown38_3_Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst_ptr,int dst_width)638 void ScaleRowDown38_3_Box_C(const uint8_t* src_ptr,
639 ptrdiff_t src_stride,
640 uint8_t* dst_ptr,
641 int dst_width) {
642 intptr_t stride = src_stride;
643 int i;
644 assert((dst_width % 3 == 0) && (dst_width > 0));
645 for (i = 0; i < dst_width; i += 3) {
646 dst_ptr[0] =
647 (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
648 src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
649 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
650 (65536 / 9) >>
651 16;
652 dst_ptr[1] =
653 (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
654 src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
655 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
656 (65536 / 9) >>
657 16;
658 dst_ptr[2] =
659 (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
660 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
661 (65536 / 6) >>
662 16;
663 src_ptr += 8;
664 dst_ptr += 3;
665 }
666 }
667
ScaleRowDown38_3_Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst_ptr,int dst_width)668 void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr,
669 ptrdiff_t src_stride,
670 uint16_t* dst_ptr,
671 int dst_width) {
672 intptr_t stride = src_stride;
673 int i;
674 assert((dst_width % 3 == 0) && (dst_width > 0));
675 for (i = 0; i < dst_width; i += 3) {
676 dst_ptr[0] =
677 (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
678 src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
679 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
680 (65536 / 9) >>
681 16;
682 dst_ptr[1] =
683 (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
684 src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
685 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
686 (65536 / 9) >>
687 16;
688 dst_ptr[2] =
689 (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
690 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
691 (65536 / 6) >>
692 16;
693 src_ptr += 8;
694 dst_ptr += 3;
695 }
696 }
697
698 // 8x2 -> 3x1
ScaleRowDown38_2_Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst_ptr,int dst_width)699 void ScaleRowDown38_2_Box_C(const uint8_t* src_ptr,
700 ptrdiff_t src_stride,
701 uint8_t* dst_ptr,
702 int dst_width) {
703 intptr_t stride = src_stride;
704 int i;
705 assert((dst_width % 3 == 0) && (dst_width > 0));
706 for (i = 0; i < dst_width; i += 3) {
707 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
708 src_ptr[stride + 1] + src_ptr[stride + 2]) *
709 (65536 / 6) >>
710 16;
711 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
712 src_ptr[stride + 4] + src_ptr[stride + 5]) *
713 (65536 / 6) >>
714 16;
715 dst_ptr[2] =
716 (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
717 (65536 / 4) >>
718 16;
719 src_ptr += 8;
720 dst_ptr += 3;
721 }
722 }
723
ScaleRowDown38_2_Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst_ptr,int dst_width)724 void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr,
725 ptrdiff_t src_stride,
726 uint16_t* dst_ptr,
727 int dst_width) {
728 intptr_t stride = src_stride;
729 int i;
730 assert((dst_width % 3 == 0) && (dst_width > 0));
731 for (i = 0; i < dst_width; i += 3) {
732 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
733 src_ptr[stride + 1] + src_ptr[stride + 2]) *
734 (65536 / 6) >>
735 16;
736 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
737 src_ptr[stride + 4] + src_ptr[stride + 5]) *
738 (65536 / 6) >>
739 16;
740 dst_ptr[2] =
741 (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
742 (65536 / 4) >>
743 16;
744 src_ptr += 8;
745 dst_ptr += 3;
746 }
747 }
748
ScaleAddRow_C(const uint8_t * src_ptr,uint16_t * dst_ptr,int src_width)749 void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
750 int x;
751 assert(src_width > 0);
752 for (x = 0; x < src_width - 1; x += 2) {
753 dst_ptr[0] += src_ptr[0];
754 dst_ptr[1] += src_ptr[1];
755 src_ptr += 2;
756 dst_ptr += 2;
757 }
758 if (src_width & 1) {
759 dst_ptr[0] += src_ptr[0];
760 }
761 }
762
ScaleAddRow_16_C(const uint16_t * src_ptr,uint32_t * dst_ptr,int src_width)763 void ScaleAddRow_16_C(const uint16_t* src_ptr,
764 uint32_t* dst_ptr,
765 int src_width) {
766 int x;
767 assert(src_width > 0);
768 for (x = 0; x < src_width - 1; x += 2) {
769 dst_ptr[0] += src_ptr[0];
770 dst_ptr[1] += src_ptr[1];
771 src_ptr += 2;
772 dst_ptr += 2;
773 }
774 if (src_width & 1) {
775 dst_ptr[0] += src_ptr[0];
776 }
777 }
778
ScaleARGBRowDown2_C(const uint8_t * src_argb,ptrdiff_t src_stride,uint8_t * dst_argb,int dst_width)779 void ScaleARGBRowDown2_C(const uint8_t* src_argb,
780 ptrdiff_t src_stride,
781 uint8_t* dst_argb,
782 int dst_width) {
783 const uint32_t* src = (const uint32_t*)(src_argb);
784 uint32_t* dst = (uint32_t*)(dst_argb);
785 int x;
786 (void)src_stride;
787 for (x = 0; x < dst_width - 1; x += 2) {
788 dst[0] = src[1];
789 dst[1] = src[3];
790 src += 4;
791 dst += 2;
792 }
793 if (dst_width & 1) {
794 dst[0] = src[1];
795 }
796 }
797
ScaleARGBRowDown2Linear_C(const uint8_t * src_argb,ptrdiff_t src_stride,uint8_t * dst_argb,int dst_width)798 void ScaleARGBRowDown2Linear_C(const uint8_t* src_argb,
799 ptrdiff_t src_stride,
800 uint8_t* dst_argb,
801 int dst_width) {
802 int x;
803 (void)src_stride;
804 for (x = 0; x < dst_width; ++x) {
805 dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
806 dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
807 dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
808 dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
809 src_argb += 8;
810 dst_argb += 4;
811 }
812 }
813
ScaleARGBRowDown2Box_C(const uint8_t * src_argb,ptrdiff_t src_stride,uint8_t * dst_argb,int dst_width)814 void ScaleARGBRowDown2Box_C(const uint8_t* src_argb,
815 ptrdiff_t src_stride,
816 uint8_t* dst_argb,
817 int dst_width) {
818 int x;
819 for (x = 0; x < dst_width; ++x) {
820 dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
821 src_argb[src_stride + 4] + 2) >>
822 2;
823 dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
824 src_argb[src_stride + 5] + 2) >>
825 2;
826 dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
827 src_argb[src_stride + 6] + 2) >>
828 2;
829 dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
830 src_argb[src_stride + 7] + 2) >>
831 2;
832 src_argb += 8;
833 dst_argb += 4;
834 }
835 }
836
ScaleARGBRowDownEven_C(const uint8_t * src_argb,ptrdiff_t src_stride,int src_stepx,uint8_t * dst_argb,int dst_width)837 void ScaleARGBRowDownEven_C(const uint8_t* src_argb,
838 ptrdiff_t src_stride,
839 int src_stepx,
840 uint8_t* dst_argb,
841 int dst_width) {
842 const uint32_t* src = (const uint32_t*)(src_argb);
843 uint32_t* dst = (uint32_t*)(dst_argb);
844 (void)src_stride;
845 int x;
846 for (x = 0; x < dst_width - 1; x += 2) {
847 dst[0] = src[0];
848 dst[1] = src[src_stepx];
849 src += src_stepx * 2;
850 dst += 2;
851 }
852 if (dst_width & 1) {
853 dst[0] = src[0];
854 }
855 }
856
ScaleARGBRowDownEvenBox_C(const uint8_t * src_argb,ptrdiff_t src_stride,int src_stepx,uint8_t * dst_argb,int dst_width)857 void ScaleARGBRowDownEvenBox_C(const uint8_t* src_argb,
858 ptrdiff_t src_stride,
859 int src_stepx,
860 uint8_t* dst_argb,
861 int dst_width) {
862 int x;
863 for (x = 0; x < dst_width; ++x) {
864 dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
865 src_argb[src_stride + 4] + 2) >>
866 2;
867 dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
868 src_argb[src_stride + 5] + 2) >>
869 2;
870 dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
871 src_argb[src_stride + 6] + 2) >>
872 2;
873 dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
874 src_argb[src_stride + 7] + 2) >>
875 2;
876 src_argb += src_stepx * 4;
877 dst_argb += 4;
878 }
879 }
880
881 // Scales a single row of pixels using point sampling.
ScaleARGBCols_C(uint8_t * dst_argb,const uint8_t * src_argb,int dst_width,int x,int dx)882 void ScaleARGBCols_C(uint8_t* dst_argb,
883 const uint8_t* src_argb,
884 int dst_width,
885 int x,
886 int dx) {
887 const uint32_t* src = (const uint32_t*)(src_argb);
888 uint32_t* dst = (uint32_t*)(dst_argb);
889 int j;
890 for (j = 0; j < dst_width - 1; j += 2) {
891 dst[0] = src[x >> 16];
892 x += dx;
893 dst[1] = src[x >> 16];
894 x += dx;
895 dst += 2;
896 }
897 if (dst_width & 1) {
898 dst[0] = src[x >> 16];
899 }
900 }
901
ScaleARGBCols64_C(uint8_t * dst_argb,const uint8_t * src_argb,int dst_width,int x32,int dx)902 void ScaleARGBCols64_C(uint8_t* dst_argb,
903 const uint8_t* src_argb,
904 int dst_width,
905 int x32,
906 int dx) {
907 int64_t x = (int64_t)(x32);
908 const uint32_t* src = (const uint32_t*)(src_argb);
909 uint32_t* dst = (uint32_t*)(dst_argb);
910 int j;
911 for (j = 0; j < dst_width - 1; j += 2) {
912 dst[0] = src[x >> 16];
913 x += dx;
914 dst[1] = src[x >> 16];
915 x += dx;
916 dst += 2;
917 }
918 if (dst_width & 1) {
919 dst[0] = src[x >> 16];
920 }
921 }
922
923 // Scales a single row of pixels up by 2x using point sampling.
ScaleARGBColsUp2_C(uint8_t * dst_argb,const uint8_t * src_argb,int dst_width,int x,int dx)924 void ScaleARGBColsUp2_C(uint8_t* dst_argb,
925 const uint8_t* src_argb,
926 int dst_width,
927 int x,
928 int dx) {
929 const uint32_t* src = (const uint32_t*)(src_argb);
930 uint32_t* dst = (uint32_t*)(dst_argb);
931 int j;
932 (void)x;
933 (void)dx;
934 for (j = 0; j < dst_width - 1; j += 2) {
935 dst[1] = dst[0] = src[0];
936 src += 1;
937 dst += 2;
938 }
939 if (dst_width & 1) {
940 dst[0] = src[0];
941 }
942 }
943
944 // TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
945 // Mimics SSSE3 blender
946 #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
947 #define BLENDERC(a, b, f, s) \
948 (uint32_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
949 #define BLENDER(a, b, f) \
950 BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | BLENDERC(a, b, f, 8) | \
951 BLENDERC(a, b, f, 0)
952
ScaleARGBFilterCols_C(uint8_t * dst_argb,const uint8_t * src_argb,int dst_width,int x,int dx)953 void ScaleARGBFilterCols_C(uint8_t* dst_argb,
954 const uint8_t* src_argb,
955 int dst_width,
956 int x,
957 int dx) {
958 const uint32_t* src = (const uint32_t*)(src_argb);
959 uint32_t* dst = (uint32_t*)(dst_argb);
960 int j;
961 for (j = 0; j < dst_width - 1; j += 2) {
962 int xi = x >> 16;
963 int xf = (x >> 9) & 0x7f;
964 uint32_t a = src[xi];
965 uint32_t b = src[xi + 1];
966 dst[0] = BLENDER(a, b, xf);
967 x += dx;
968 xi = x >> 16;
969 xf = (x >> 9) & 0x7f;
970 a = src[xi];
971 b = src[xi + 1];
972 dst[1] = BLENDER(a, b, xf);
973 x += dx;
974 dst += 2;
975 }
976 if (dst_width & 1) {
977 int xi = x >> 16;
978 int xf = (x >> 9) & 0x7f;
979 uint32_t a = src[xi];
980 uint32_t b = src[xi + 1];
981 dst[0] = BLENDER(a, b, xf);
982 }
983 }
984
ScaleARGBFilterCols64_C(uint8_t * dst_argb,const uint8_t * src_argb,int dst_width,int x32,int dx)985 void ScaleARGBFilterCols64_C(uint8_t* dst_argb,
986 const uint8_t* src_argb,
987 int dst_width,
988 int x32,
989 int dx) {
990 int64_t x = (int64_t)(x32);
991 const uint32_t* src = (const uint32_t*)(src_argb);
992 uint32_t* dst = (uint32_t*)(dst_argb);
993 int j;
994 for (j = 0; j < dst_width - 1; j += 2) {
995 int64_t xi = x >> 16;
996 int xf = (x >> 9) & 0x7f;
997 uint32_t a = src[xi];
998 uint32_t b = src[xi + 1];
999 dst[0] = BLENDER(a, b, xf);
1000 x += dx;
1001 xi = x >> 16;
1002 xf = (x >> 9) & 0x7f;
1003 a = src[xi];
1004 b = src[xi + 1];
1005 dst[1] = BLENDER(a, b, xf);
1006 x += dx;
1007 dst += 2;
1008 }
1009 if (dst_width & 1) {
1010 int64_t xi = x >> 16;
1011 int xf = (x >> 9) & 0x7f;
1012 uint32_t a = src[xi];
1013 uint32_t b = src[xi + 1];
1014 dst[0] = BLENDER(a, b, xf);
1015 }
1016 }
1017 #undef BLENDER1
1018 #undef BLENDERC
1019 #undef BLENDER
1020
1021 // Scale plane vertically with bilinear interpolation.
ScalePlaneVertical(int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int y,int dy,int bpp,enum FilterMode filtering)1022 void ScalePlaneVertical(int src_height,
1023 int dst_width,
1024 int dst_height,
1025 int src_stride,
1026 int dst_stride,
1027 const uint8_t* src_argb,
1028 uint8_t* dst_argb,
1029 int x,
1030 int y,
1031 int dy,
1032 int bpp,
1033 enum FilterMode filtering) {
1034 // TODO(fbarchard): Allow higher bpp.
1035 int dst_width_bytes = dst_width * bpp;
1036 void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
1037 ptrdiff_t src_stride, int dst_width,
1038 int source_y_fraction) = InterpolateRow_C;
1039 const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
1040 int j;
1041 assert(bpp >= 1 && bpp <= 4);
1042 assert(src_height != 0);
1043 assert(dst_width > 0);
1044 assert(dst_height > 0);
1045 src_argb += (x >> 16) * bpp;
1046 #if defined(HAS_INTERPOLATEROW_SSSE3)
1047 if (TestCpuFlag(kCpuHasSSSE3)) {
1048 InterpolateRow = InterpolateRow_Any_SSSE3;
1049 if (IS_ALIGNED(dst_width_bytes, 16)) {
1050 InterpolateRow = InterpolateRow_SSSE3;
1051 }
1052 }
1053 #endif
1054 #if defined(HAS_INTERPOLATEROW_AVX2)
1055 if (TestCpuFlag(kCpuHasAVX2)) {
1056 InterpolateRow = InterpolateRow_Any_AVX2;
1057 if (IS_ALIGNED(dst_width_bytes, 32)) {
1058 InterpolateRow = InterpolateRow_AVX2;
1059 }
1060 }
1061 #endif
1062 #if defined(HAS_INTERPOLATEROW_NEON)
1063 if (TestCpuFlag(kCpuHasNEON)) {
1064 InterpolateRow = InterpolateRow_Any_NEON;
1065 if (IS_ALIGNED(dst_width_bytes, 16)) {
1066 InterpolateRow = InterpolateRow_NEON;
1067 }
1068 }
1069 #endif
1070 #if defined(HAS_INTERPOLATEROW_MSA)
1071 if (TestCpuFlag(kCpuHasMSA)) {
1072 InterpolateRow = InterpolateRow_Any_MSA;
1073 if (IS_ALIGNED(dst_width_bytes, 32)) {
1074 InterpolateRow = InterpolateRow_MSA;
1075 }
1076 }
1077 #endif
1078 #if defined(HAS_INTERPOLATEROW_MMI)
1079 if (TestCpuFlag(kCpuHasMMI)) {
1080 InterpolateRow = InterpolateRow_Any_MMI;
1081 if (IS_ALIGNED(dst_width_bytes, 8)) {
1082 InterpolateRow = InterpolateRow_MMI;
1083 }
1084 }
1085 #endif
1086 for (j = 0; j < dst_height; ++j) {
1087 int yi;
1088 int yf;
1089 if (y > max_y) {
1090 y = max_y;
1091 }
1092 yi = y >> 16;
1093 yf = filtering ? ((y >> 8) & 255) : 0;
1094 InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
1095 dst_width_bytes, yf);
1096 dst_argb += dst_stride;
1097 y += dy;
1098 }
1099 }
ScalePlaneVertical_16(int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_argb,uint16_t * dst_argb,int x,int y,int dy,int wpp,enum FilterMode filtering)1100 void ScalePlaneVertical_16(int src_height,
1101 int dst_width,
1102 int dst_height,
1103 int src_stride,
1104 int dst_stride,
1105 const uint16_t* src_argb,
1106 uint16_t* dst_argb,
1107 int x,
1108 int y,
1109 int dy,
1110 int wpp,
1111 enum FilterMode filtering) {
1112 // TODO(fbarchard): Allow higher wpp.
1113 int dst_width_words = dst_width * wpp;
1114 void (*InterpolateRow)(uint16_t * dst_argb, const uint16_t* src_argb,
1115 ptrdiff_t src_stride, int dst_width,
1116 int source_y_fraction) = InterpolateRow_16_C;
1117 const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
1118 int j;
1119 assert(wpp >= 1 && wpp <= 2);
1120 assert(src_height != 0);
1121 assert(dst_width > 0);
1122 assert(dst_height > 0);
1123 src_argb += (x >> 16) * wpp;
1124 #if defined(HAS_INTERPOLATEROW_16_SSE2)
1125 if (TestCpuFlag(kCpuHasSSE2)) {
1126 InterpolateRow = InterpolateRow_Any_16_SSE2;
1127 if (IS_ALIGNED(dst_width_bytes, 16)) {
1128 InterpolateRow = InterpolateRow_16_SSE2;
1129 }
1130 }
1131 #endif
1132 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
1133 if (TestCpuFlag(kCpuHasSSSE3)) {
1134 InterpolateRow = InterpolateRow_Any_16_SSSE3;
1135 if (IS_ALIGNED(dst_width_bytes, 16)) {
1136 InterpolateRow = InterpolateRow_16_SSSE3;
1137 }
1138 }
1139 #endif
1140 #if defined(HAS_INTERPOLATEROW_16_AVX2)
1141 if (TestCpuFlag(kCpuHasAVX2)) {
1142 InterpolateRow = InterpolateRow_Any_16_AVX2;
1143 if (IS_ALIGNED(dst_width_bytes, 32)) {
1144 InterpolateRow = InterpolateRow_16_AVX2;
1145 }
1146 }
1147 #endif
1148 #if defined(HAS_INTERPOLATEROW_16_NEON)
1149 if (TestCpuFlag(kCpuHasNEON)) {
1150 InterpolateRow = InterpolateRow_Any_16_NEON;
1151 if (IS_ALIGNED(dst_width_bytes, 16)) {
1152 InterpolateRow = InterpolateRow_16_NEON;
1153 }
1154 }
1155 #endif
1156 for (j = 0; j < dst_height; ++j) {
1157 int yi;
1158 int yf;
1159 if (y > max_y) {
1160 y = max_y;
1161 }
1162 yi = y >> 16;
1163 yf = filtering ? ((y >> 8) & 255) : 0;
1164 InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
1165 dst_width_words, yf);
1166 dst_argb += dst_stride;
1167 y += dy;
1168 }
1169 }
1170
1171 // Simplify the filtering based on scale factors.
ScaleFilterReduce(int src_width,int src_height,int dst_width,int dst_height,enum FilterMode filtering)1172 enum FilterMode ScaleFilterReduce(int src_width,
1173 int src_height,
1174 int dst_width,
1175 int dst_height,
1176 enum FilterMode filtering) {
1177 if (src_width < 0) {
1178 src_width = -src_width;
1179 }
1180 if (src_height < 0) {
1181 src_height = -src_height;
1182 }
1183 if (filtering == kFilterBox) {
1184 // If scaling both axis to 0.5 or larger, switch from Box to Bilinear.
1185 if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
1186 filtering = kFilterBilinear;
1187 }
1188 }
1189 if (filtering == kFilterBilinear) {
1190 if (src_height == 1) {
1191 filtering = kFilterLinear;
1192 }
1193 // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
1194 if (dst_height == src_height || dst_height * 3 == src_height) {
1195 filtering = kFilterLinear;
1196 }
1197 // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
1198 // avoid reading 2 pixels horizontally that causes memory exception.
1199 if (src_width == 1) {
1200 filtering = kFilterNone;
1201 }
1202 }
1203 if (filtering == kFilterLinear) {
1204 if (src_width == 1) {
1205 filtering = kFilterNone;
1206 }
1207 // TODO(fbarchard): Detect any odd scale factor and reduce to None.
1208 if (dst_width == src_width || dst_width * 3 == src_width) {
1209 filtering = kFilterNone;
1210 }
1211 }
1212 return filtering;
1213 }
1214
1215 // Divide num by div and return as 16.16 fixed point result.
FixedDiv_C(int num,int div)1216 int FixedDiv_C(int num, int div) {
1217 return (int)(((int64_t)(num) << 16) / div);
1218 }
1219
1220 // Divide num by div and return as 16.16 fixed point result.
FixedDiv1_C(int num,int div)1221 int FixedDiv1_C(int num, int div) {
1222 return (int)((((int64_t)(num) << 16) - 0x00010001) / (div - 1));
1223 }
1224
1225 #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
1226
1227 // Compute slope values for stepping.
ScaleSlope(int src_width,int src_height,int dst_width,int dst_height,enum FilterMode filtering,int * x,int * y,int * dx,int * dy)1228 void ScaleSlope(int src_width,
1229 int src_height,
1230 int dst_width,
1231 int dst_height,
1232 enum FilterMode filtering,
1233 int* x,
1234 int* y,
1235 int* dx,
1236 int* dy) {
1237 assert(x != NULL);
1238 assert(y != NULL);
1239 assert(dx != NULL);
1240 assert(dy != NULL);
1241 assert(src_width != 0);
1242 assert(src_height != 0);
1243 assert(dst_width > 0);
1244 assert(dst_height > 0);
1245 // Check for 1 pixel and avoid FixedDiv overflow.
1246 if (dst_width == 1 && src_width >= 32768) {
1247 dst_width = src_width;
1248 }
1249 if (dst_height == 1 && src_height >= 32768) {
1250 dst_height = src_height;
1251 }
1252 if (filtering == kFilterBox) {
1253 // Scale step for point sampling duplicates all pixels equally.
1254 *dx = FixedDiv(Abs(src_width), dst_width);
1255 *dy = FixedDiv(src_height, dst_height);
1256 *x = 0;
1257 *y = 0;
1258 } else if (filtering == kFilterBilinear) {
1259 // Scale step for bilinear sampling renders last pixel once for upsample.
1260 if (dst_width <= Abs(src_width)) {
1261 *dx = FixedDiv(Abs(src_width), dst_width);
1262 *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
1263 } else if (dst_width > 1) {
1264 *dx = FixedDiv1(Abs(src_width), dst_width);
1265 *x = 0;
1266 }
1267 if (dst_height <= src_height) {
1268 *dy = FixedDiv(src_height, dst_height);
1269 *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
1270 } else if (dst_height > 1) {
1271 *dy = FixedDiv1(src_height, dst_height);
1272 *y = 0;
1273 }
1274 } else if (filtering == kFilterLinear) {
1275 // Scale step for bilinear sampling renders last pixel once for upsample.
1276 if (dst_width <= Abs(src_width)) {
1277 *dx = FixedDiv(Abs(src_width), dst_width);
1278 *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
1279 } else if (dst_width > 1) {
1280 *dx = FixedDiv1(Abs(src_width), dst_width);
1281 *x = 0;
1282 }
1283 *dy = FixedDiv(src_height, dst_height);
1284 *y = *dy >> 1;
1285 } else {
1286 // Scale step for point sampling duplicates all pixels equally.
1287 *dx = FixedDiv(Abs(src_width), dst_width);
1288 *dy = FixedDiv(src_height, dst_height);
1289 *x = CENTERSTART(*dx, 0);
1290 *y = CENTERSTART(*dy, 0);
1291 }
1292 // Negative src_width means horizontally mirror.
1293 if (src_width < 0) {
1294 *x += (dst_width - 1) * *dx;
1295 *dx = -*dx;
1296 // src_width = -src_width; // Caller must do this.
1297 }
1298 }
1299 #undef CENTERSTART
1300
1301 // Read 8x2 upsample with filtering and write 16x1.
1302 // actually reads an extra pixel, so 9x2.
ScaleRowUp2_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)1303 void ScaleRowUp2_16_C(const uint16_t* src_ptr,
1304 ptrdiff_t src_stride,
1305 uint16_t* dst,
1306 int dst_width) {
1307 const uint16_t* src2 = src_ptr + src_stride;
1308
1309 int x;
1310 for (x = 0; x < dst_width - 1; x += 2) {
1311 uint16_t p0 = src_ptr[0];
1312 uint16_t p1 = src_ptr[1];
1313 uint16_t p2 = src2[0];
1314 uint16_t p3 = src2[1];
1315 dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4;
1316 dst[1] = (p0 * 3 + p1 * 9 + p2 + p3 * 3 + 8) >> 4;
1317 ++src_ptr;
1318 ++src2;
1319 dst += 2;
1320 }
1321 if (dst_width & 1) {
1322 uint16_t p0 = src_ptr[0];
1323 uint16_t p1 = src_ptr[1];
1324 uint16_t p2 = src2[0];
1325 uint16_t p3 = src2[1];
1326 dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4;
1327 }
1328 }
1329
1330 #ifdef __cplusplus
1331 } // extern "C"
1332 } // namespace libyuv
1333 #endif
1334