1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/scale.h"
12
13 #include <assert.h>
14 #include <string.h>
15
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyPlane
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
20
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
25
Abs(int v)26 static __inline int Abs(int v) {
27 return v >= 0 ? v : -v;
28 }
29
30 #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
31
32 // Scale plane, 1/2
33 // This is an optimized version for scaling down a plane to 1/2 of
34 // its original size.
35
ScalePlaneDown2(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_ptr,uint8 * dst_ptr,enum FilterMode filtering)36 static void ScalePlaneDown2(int src_width,
37 int src_height,
38 int dst_width,
39 int dst_height,
40 int src_stride,
41 int dst_stride,
42 const uint8* src_ptr,
43 uint8* dst_ptr,
44 enum FilterMode filtering) {
45 int y;
46 void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
47 uint8* dst_ptr, int dst_width) =
48 filtering == kFilterNone
49 ? ScaleRowDown2_C
50 : (filtering == kFilterLinear ? ScaleRowDown2Linear_C
51 : ScaleRowDown2Box_C);
52 int row_stride = src_stride << 1;
53 (void)src_width;
54 (void)src_height;
55 if (!filtering) {
56 src_ptr += src_stride; // Point to odd rows.
57 src_stride = 0;
58 }
59
60 #if defined(HAS_SCALEROWDOWN2_NEON)
61 if (TestCpuFlag(kCpuHasNEON)) {
62 ScaleRowDown2 =
63 filtering == kFilterNone
64 ? ScaleRowDown2_Any_NEON
65 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON
66 : ScaleRowDown2Box_Any_NEON);
67 if (IS_ALIGNED(dst_width, 16)) {
68 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON
69 : (filtering == kFilterLinear
70 ? ScaleRowDown2Linear_NEON
71 : ScaleRowDown2Box_NEON);
72 }
73 }
74 #endif
75 #if defined(HAS_SCALEROWDOWN2_SSSE3)
76 if (TestCpuFlag(kCpuHasSSSE3)) {
77 ScaleRowDown2 =
78 filtering == kFilterNone
79 ? ScaleRowDown2_Any_SSSE3
80 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSSE3
81 : ScaleRowDown2Box_Any_SSSE3);
82 if (IS_ALIGNED(dst_width, 16)) {
83 ScaleRowDown2 =
84 filtering == kFilterNone
85 ? ScaleRowDown2_SSSE3
86 : (filtering == kFilterLinear ? ScaleRowDown2Linear_SSSE3
87 : ScaleRowDown2Box_SSSE3);
88 }
89 }
90 #endif
91 #if defined(HAS_SCALEROWDOWN2_AVX2)
92 if (TestCpuFlag(kCpuHasAVX2)) {
93 ScaleRowDown2 =
94 filtering == kFilterNone
95 ? ScaleRowDown2_Any_AVX2
96 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2
97 : ScaleRowDown2Box_Any_AVX2);
98 if (IS_ALIGNED(dst_width, 32)) {
99 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2
100 : (filtering == kFilterLinear
101 ? ScaleRowDown2Linear_AVX2
102 : ScaleRowDown2Box_AVX2);
103 }
104 }
105 #endif
106 #if defined(HAS_SCALEROWDOWN2_DSPR2)
107 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_ptr, 4) &&
108 IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
109 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
110 ScaleRowDown2 = filtering ? ScaleRowDown2Box_DSPR2 : ScaleRowDown2_DSPR2;
111 }
112 #endif
113 #if defined(HAS_SCALEROWDOWN2_MSA)
114 if (TestCpuFlag(kCpuHasMSA)) {
115 ScaleRowDown2 =
116 filtering == kFilterNone
117 ? ScaleRowDown2_Any_MSA
118 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_MSA
119 : ScaleRowDown2Box_Any_MSA);
120 if (IS_ALIGNED(dst_width, 32)) {
121 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_MSA
122 : (filtering == kFilterLinear
123 ? ScaleRowDown2Linear_MSA
124 : ScaleRowDown2Box_MSA);
125 }
126 }
127 #endif
128
129 if (filtering == kFilterLinear) {
130 src_stride = 0;
131 }
132 // TODO(fbarchard): Loop through source height to allow odd height.
133 for (y = 0; y < dst_height; ++y) {
134 ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
135 src_ptr += row_stride;
136 dst_ptr += dst_stride;
137 }
138 }
139
ScalePlaneDown2_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16 * src_ptr,uint16 * dst_ptr,enum FilterMode filtering)140 static void ScalePlaneDown2_16(int src_width,
141 int src_height,
142 int dst_width,
143 int dst_height,
144 int src_stride,
145 int dst_stride,
146 const uint16* src_ptr,
147 uint16* dst_ptr,
148 enum FilterMode filtering) {
149 int y;
150 void (*ScaleRowDown2)(const uint16* src_ptr, ptrdiff_t src_stride,
151 uint16* dst_ptr, int dst_width) =
152 filtering == kFilterNone
153 ? ScaleRowDown2_16_C
154 : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C
155 : ScaleRowDown2Box_16_C);
156 int row_stride = src_stride << 1;
157 (void)src_width;
158 (void)src_height;
159 if (!filtering) {
160 src_ptr += src_stride; // Point to odd rows.
161 src_stride = 0;
162 }
163
164 #if defined(HAS_SCALEROWDOWN2_16_NEON)
165 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
166 ScaleRowDown2 =
167 filtering ? ScaleRowDown2Box_16_NEON : ScaleRowDown2_16_NEON;
168 }
169 #endif
170 #if defined(HAS_SCALEROWDOWN2_16_SSE2)
171 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
172 ScaleRowDown2 =
173 filtering == kFilterNone
174 ? ScaleRowDown2_16_SSE2
175 : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2
176 : ScaleRowDown2Box_16_SSE2);
177 }
178 #endif
179 #if defined(HAS_SCALEROWDOWN2_16_DSPR2)
180 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_ptr, 4) &&
181 IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
182 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
183 ScaleRowDown2 =
184 filtering ? ScaleRowDown2Box_16_DSPR2 : ScaleRowDown2_16_DSPR2;
185 }
186 #endif
187
188 if (filtering == kFilterLinear) {
189 src_stride = 0;
190 }
191 // TODO(fbarchard): Loop through source height to allow odd height.
192 for (y = 0; y < dst_height; ++y) {
193 ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
194 src_ptr += row_stride;
195 dst_ptr += dst_stride;
196 }
197 }
198
199 // Scale plane, 1/4
200 // This is an optimized version for scaling down a plane to 1/4 of
201 // its original size.
202
ScalePlaneDown4(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_ptr,uint8 * dst_ptr,enum FilterMode filtering)203 static void ScalePlaneDown4(int src_width,
204 int src_height,
205 int dst_width,
206 int dst_height,
207 int src_stride,
208 int dst_stride,
209 const uint8* src_ptr,
210 uint8* dst_ptr,
211 enum FilterMode filtering) {
212 int y;
213 void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride,
214 uint8* dst_ptr, int dst_width) =
215 filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
216 int row_stride = src_stride << 2;
217 (void)src_width;
218 (void)src_height;
219 if (!filtering) {
220 src_ptr += src_stride * 2; // Point to row 2.
221 src_stride = 0;
222 }
223 #if defined(HAS_SCALEROWDOWN4_NEON)
224 if (TestCpuFlag(kCpuHasNEON)) {
225 ScaleRowDown4 =
226 filtering ? ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON;
227 if (IS_ALIGNED(dst_width, 8)) {
228 ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
229 }
230 }
231 #endif
232 #if defined(HAS_SCALEROWDOWN4_SSSE3)
233 if (TestCpuFlag(kCpuHasSSSE3)) {
234 ScaleRowDown4 =
235 filtering ? ScaleRowDown4Box_Any_SSSE3 : ScaleRowDown4_Any_SSSE3;
236 if (IS_ALIGNED(dst_width, 8)) {
237 ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSSE3 : ScaleRowDown4_SSSE3;
238 }
239 }
240 #endif
241 #if defined(HAS_SCALEROWDOWN4_AVX2)
242 if (TestCpuFlag(kCpuHasAVX2)) {
243 ScaleRowDown4 =
244 filtering ? ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2;
245 if (IS_ALIGNED(dst_width, 16)) {
246 ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2;
247 }
248 }
249 #endif
250 #if defined(HAS_SCALEROWDOWN4_DSPR2)
251 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(row_stride, 4) &&
252 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
253 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
254 ScaleRowDown4 = filtering ? ScaleRowDown4Box_DSPR2 : ScaleRowDown4_DSPR2;
255 }
256 #endif
257 #if defined(HAS_SCALEROWDOWN4_MSA)
258 if (TestCpuFlag(kCpuHasMSA)) {
259 ScaleRowDown4 =
260 filtering ? ScaleRowDown4Box_Any_MSA : ScaleRowDown4_Any_MSA;
261 if (IS_ALIGNED(dst_width, 16)) {
262 ScaleRowDown4 = filtering ? ScaleRowDown4Box_MSA : ScaleRowDown4_MSA;
263 }
264 }
265 #endif
266
267 if (filtering == kFilterLinear) {
268 src_stride = 0;
269 }
270 for (y = 0; y < dst_height; ++y) {
271 ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
272 src_ptr += row_stride;
273 dst_ptr += dst_stride;
274 }
275 }
276
ScalePlaneDown4_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16 * src_ptr,uint16 * dst_ptr,enum FilterMode filtering)277 static void ScalePlaneDown4_16(int src_width,
278 int src_height,
279 int dst_width,
280 int dst_height,
281 int src_stride,
282 int dst_stride,
283 const uint16* src_ptr,
284 uint16* dst_ptr,
285 enum FilterMode filtering) {
286 int y;
287 void (*ScaleRowDown4)(const uint16* src_ptr, ptrdiff_t src_stride,
288 uint16* dst_ptr, int dst_width) =
289 filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
290 int row_stride = src_stride << 2;
291 (void)src_width;
292 (void)src_height;
293 if (!filtering) {
294 src_ptr += src_stride * 2; // Point to row 2.
295 src_stride = 0;
296 }
297 #if defined(HAS_SCALEROWDOWN4_16_NEON)
298 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
299 ScaleRowDown4 =
300 filtering ? ScaleRowDown4Box_16_NEON : ScaleRowDown4_16_NEON;
301 }
302 #endif
303 #if defined(HAS_SCALEROWDOWN4_16_SSE2)
304 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
305 ScaleRowDown4 =
306 filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4_16_SSE2;
307 }
308 #endif
309 #if defined(HAS_SCALEROWDOWN4_16_DSPR2)
310 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(row_stride, 4) &&
311 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
312 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
313 ScaleRowDown4 =
314 filtering ? ScaleRowDown4Box_16_DSPR2 : ScaleRowDown4_16_DSPR2;
315 }
316 #endif
317
318 if (filtering == kFilterLinear) {
319 src_stride = 0;
320 }
321 for (y = 0; y < dst_height; ++y) {
322 ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
323 src_ptr += row_stride;
324 dst_ptr += dst_stride;
325 }
326 }
327
328 // Scale plane down, 3/4
ScalePlaneDown34(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_ptr,uint8 * dst_ptr,enum FilterMode filtering)329 static void ScalePlaneDown34(int src_width,
330 int src_height,
331 int dst_width,
332 int dst_height,
333 int src_stride,
334 int dst_stride,
335 const uint8* src_ptr,
336 uint8* dst_ptr,
337 enum FilterMode filtering) {
338 int y;
339 void (*ScaleRowDown34_0)(const uint8* src_ptr, ptrdiff_t src_stride,
340 uint8* dst_ptr, int dst_width);
341 void (*ScaleRowDown34_1)(const uint8* src_ptr, ptrdiff_t src_stride,
342 uint8* dst_ptr, int dst_width);
343 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
344 (void)src_width;
345 (void)src_height;
346 assert(dst_width % 3 == 0);
347 if (!filtering) {
348 ScaleRowDown34_0 = ScaleRowDown34_C;
349 ScaleRowDown34_1 = ScaleRowDown34_C;
350 } else {
351 ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
352 ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
353 }
354 #if defined(HAS_SCALEROWDOWN34_NEON)
355 if (TestCpuFlag(kCpuHasNEON)) {
356 if (!filtering) {
357 ScaleRowDown34_0 = ScaleRowDown34_Any_NEON;
358 ScaleRowDown34_1 = ScaleRowDown34_Any_NEON;
359 } else {
360 ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON;
361 ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON;
362 }
363 if (dst_width % 24 == 0) {
364 if (!filtering) {
365 ScaleRowDown34_0 = ScaleRowDown34_NEON;
366 ScaleRowDown34_1 = ScaleRowDown34_NEON;
367 } else {
368 ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
369 ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
370 }
371 }
372 }
373 #endif
374 #if defined(HAS_SCALEROWDOWN34_SSSE3)
375 if (TestCpuFlag(kCpuHasSSSE3)) {
376 if (!filtering) {
377 ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3;
378 ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3;
379 } else {
380 ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3;
381 ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3;
382 }
383 if (dst_width % 24 == 0) {
384 if (!filtering) {
385 ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
386 ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
387 } else {
388 ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
389 ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
390 }
391 }
392 }
393 #endif
394 #if defined(HAS_SCALEROWDOWN34_DSPR2)
395 if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 24 == 0) &&
396 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
397 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
398 if (!filtering) {
399 ScaleRowDown34_0 = ScaleRowDown34_DSPR2;
400 ScaleRowDown34_1 = ScaleRowDown34_DSPR2;
401 } else {
402 ScaleRowDown34_0 = ScaleRowDown34_0_Box_DSPR2;
403 ScaleRowDown34_1 = ScaleRowDown34_1_Box_DSPR2;
404 }
405 }
406 #endif
407
408 for (y = 0; y < dst_height - 2; y += 3) {
409 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
410 src_ptr += src_stride;
411 dst_ptr += dst_stride;
412 ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
413 src_ptr += src_stride;
414 dst_ptr += dst_stride;
415 ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
416 src_ptr += src_stride * 2;
417 dst_ptr += dst_stride;
418 }
419
420 // Remainder 1 or 2 rows with last row vertically unfiltered
421 if ((dst_height % 3) == 2) {
422 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
423 src_ptr += src_stride;
424 dst_ptr += dst_stride;
425 ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
426 } else if ((dst_height % 3) == 1) {
427 ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
428 }
429 }
430
ScalePlaneDown34_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16 * src_ptr,uint16 * dst_ptr,enum FilterMode filtering)431 static void ScalePlaneDown34_16(int src_width,
432 int src_height,
433 int dst_width,
434 int dst_height,
435 int src_stride,
436 int dst_stride,
437 const uint16* src_ptr,
438 uint16* dst_ptr,
439 enum FilterMode filtering) {
440 int y;
441 void (*ScaleRowDown34_0)(const uint16* src_ptr, ptrdiff_t src_stride,
442 uint16* dst_ptr, int dst_width);
443 void (*ScaleRowDown34_1)(const uint16* src_ptr, ptrdiff_t src_stride,
444 uint16* dst_ptr, int dst_width);
445 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
446 (void)src_width;
447 (void)src_height;
448 assert(dst_width % 3 == 0);
449 if (!filtering) {
450 ScaleRowDown34_0 = ScaleRowDown34_16_C;
451 ScaleRowDown34_1 = ScaleRowDown34_16_C;
452 } else {
453 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C;
454 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C;
455 }
456 #if defined(HAS_SCALEROWDOWN34_16_NEON)
457 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
458 if (!filtering) {
459 ScaleRowDown34_0 = ScaleRowDown34_16_NEON;
460 ScaleRowDown34_1 = ScaleRowDown34_16_NEON;
461 } else {
462 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON;
463 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON;
464 }
465 }
466 #endif
467 #if defined(HAS_SCALEROWDOWN34_16_SSSE3)
468 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
469 if (!filtering) {
470 ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;
471 ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;
472 } else {
473 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3;
474 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3;
475 }
476 }
477 #endif
478 #if defined(HAS_SCALEROWDOWN34_16_DSPR2)
479 if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 24 == 0) &&
480 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
481 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
482 if (!filtering) {
483 ScaleRowDown34_0 = ScaleRowDown34_16_DSPR2;
484 ScaleRowDown34_1 = ScaleRowDown34_16_DSPR2;
485 } else {
486 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_DSPR2;
487 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_DSPR2;
488 }
489 }
490 #endif
491
492 for (y = 0; y < dst_height - 2; y += 3) {
493 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
494 src_ptr += src_stride;
495 dst_ptr += dst_stride;
496 ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
497 src_ptr += src_stride;
498 dst_ptr += dst_stride;
499 ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
500 src_ptr += src_stride * 2;
501 dst_ptr += dst_stride;
502 }
503
504 // Remainder 1 or 2 rows with last row vertically unfiltered
505 if ((dst_height % 3) == 2) {
506 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
507 src_ptr += src_stride;
508 dst_ptr += dst_stride;
509 ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
510 } else if ((dst_height % 3) == 1) {
511 ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
512 }
513 }
514
515 // Scale plane, 3/8
516 // This is an optimized version for scaling down a plane to 3/8
517 // of its original size.
518 //
519 // Uses box filter arranges like this
520 // aaabbbcc -> abc
521 // aaabbbcc def
522 // aaabbbcc ghi
523 // dddeeeff
524 // dddeeeff
525 // dddeeeff
526 // ggghhhii
527 // ggghhhii
528 // Boxes are 3x3, 2x3, 3x2 and 2x2
529
ScalePlaneDown38(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_ptr,uint8 * dst_ptr,enum FilterMode filtering)530 static void ScalePlaneDown38(int src_width,
531 int src_height,
532 int dst_width,
533 int dst_height,
534 int src_stride,
535 int dst_stride,
536 const uint8* src_ptr,
537 uint8* dst_ptr,
538 enum FilterMode filtering) {
539 int y;
540 void (*ScaleRowDown38_3)(const uint8* src_ptr, ptrdiff_t src_stride,
541 uint8* dst_ptr, int dst_width);
542 void (*ScaleRowDown38_2)(const uint8* src_ptr, ptrdiff_t src_stride,
543 uint8* dst_ptr, int dst_width);
544 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
545 assert(dst_width % 3 == 0);
546 (void)src_width;
547 (void)src_height;
548 if (!filtering) {
549 ScaleRowDown38_3 = ScaleRowDown38_C;
550 ScaleRowDown38_2 = ScaleRowDown38_C;
551 } else {
552 ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
553 ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
554 }
555
556 #if defined(HAS_SCALEROWDOWN38_NEON)
557 if (TestCpuFlag(kCpuHasNEON)) {
558 if (!filtering) {
559 ScaleRowDown38_3 = ScaleRowDown38_Any_NEON;
560 ScaleRowDown38_2 = ScaleRowDown38_Any_NEON;
561 } else {
562 ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON;
563 ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON;
564 }
565 if (dst_width % 12 == 0) {
566 if (!filtering) {
567 ScaleRowDown38_3 = ScaleRowDown38_NEON;
568 ScaleRowDown38_2 = ScaleRowDown38_NEON;
569 } else {
570 ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
571 ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
572 }
573 }
574 }
575 #endif
576 #if defined(HAS_SCALEROWDOWN38_SSSE3)
577 if (TestCpuFlag(kCpuHasSSSE3)) {
578 if (!filtering) {
579 ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3;
580 ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3;
581 } else {
582 ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3;
583 ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3;
584 }
585 if (dst_width % 12 == 0 && !filtering) {
586 ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
587 ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
588 }
589 if (dst_width % 6 == 0 && filtering) {
590 ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
591 ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
592 }
593 }
594 #endif
595 #if defined(HAS_SCALEROWDOWN38_DSPR2)
596 if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 12 == 0) &&
597 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
598 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
599 if (!filtering) {
600 ScaleRowDown38_3 = ScaleRowDown38_DSPR2;
601 ScaleRowDown38_2 = ScaleRowDown38_DSPR2;
602 } else {
603 ScaleRowDown38_3 = ScaleRowDown38_3_Box_DSPR2;
604 ScaleRowDown38_2 = ScaleRowDown38_2_Box_DSPR2;
605 }
606 }
607 #endif
608 #if defined(HAS_SCALEROWDOWN38_MSA)
609 if (TestCpuFlag(kCpuHasMSA)) {
610 if (!filtering) {
611 ScaleRowDown38_3 = ScaleRowDown38_Any_MSA;
612 ScaleRowDown38_2 = ScaleRowDown38_Any_MSA;
613 } else {
614 ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_MSA;
615 ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_MSA;
616 }
617 if (dst_width % 12 == 0) {
618 if (!filtering) {
619 ScaleRowDown38_3 = ScaleRowDown38_MSA;
620 ScaleRowDown38_2 = ScaleRowDown38_MSA;
621 } else {
622 ScaleRowDown38_3 = ScaleRowDown38_3_Box_MSA;
623 ScaleRowDown38_2 = ScaleRowDown38_2_Box_MSA;
624 }
625 }
626 }
627 #endif
628
629 for (y = 0; y < dst_height - 2; y += 3) {
630 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
631 src_ptr += src_stride * 3;
632 dst_ptr += dst_stride;
633 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
634 src_ptr += src_stride * 3;
635 dst_ptr += dst_stride;
636 ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
637 src_ptr += src_stride * 2;
638 dst_ptr += dst_stride;
639 }
640
641 // Remainder 1 or 2 rows with last row vertically unfiltered
642 if ((dst_height % 3) == 2) {
643 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
644 src_ptr += src_stride * 3;
645 dst_ptr += dst_stride;
646 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
647 } else if ((dst_height % 3) == 1) {
648 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
649 }
650 }
651
ScalePlaneDown38_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16 * src_ptr,uint16 * dst_ptr,enum FilterMode filtering)652 static void ScalePlaneDown38_16(int src_width,
653 int src_height,
654 int dst_width,
655 int dst_height,
656 int src_stride,
657 int dst_stride,
658 const uint16* src_ptr,
659 uint16* dst_ptr,
660 enum FilterMode filtering) {
661 int y;
662 void (*ScaleRowDown38_3)(const uint16* src_ptr, ptrdiff_t src_stride,
663 uint16* dst_ptr, int dst_width);
664 void (*ScaleRowDown38_2)(const uint16* src_ptr, ptrdiff_t src_stride,
665 uint16* dst_ptr, int dst_width);
666 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
667 (void)src_width;
668 (void)src_height;
669 assert(dst_width % 3 == 0);
670 if (!filtering) {
671 ScaleRowDown38_3 = ScaleRowDown38_16_C;
672 ScaleRowDown38_2 = ScaleRowDown38_16_C;
673 } else {
674 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C;
675 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C;
676 }
677 #if defined(HAS_SCALEROWDOWN38_16_NEON)
678 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
679 if (!filtering) {
680 ScaleRowDown38_3 = ScaleRowDown38_16_NEON;
681 ScaleRowDown38_2 = ScaleRowDown38_16_NEON;
682 } else {
683 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON;
684 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON;
685 }
686 }
687 #endif
688 #if defined(HAS_SCALEROWDOWN38_16_SSSE3)
689 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
690 if (!filtering) {
691 ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;
692 ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;
693 } else {
694 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3;
695 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3;
696 }
697 }
698 #endif
699 #if defined(HAS_SCALEROWDOWN38_16_DSPR2)
700 if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 12 == 0) &&
701 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
702 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
703 if (!filtering) {
704 ScaleRowDown38_3 = ScaleRowDown38_16_DSPR2;
705 ScaleRowDown38_2 = ScaleRowDown38_16_DSPR2;
706 } else {
707 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_DSPR2;
708 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_DSPR2;
709 }
710 }
711 #endif
712
713 for (y = 0; y < dst_height - 2; y += 3) {
714 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
715 src_ptr += src_stride * 3;
716 dst_ptr += dst_stride;
717 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
718 src_ptr += src_stride * 3;
719 dst_ptr += dst_stride;
720 ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
721 src_ptr += src_stride * 2;
722 dst_ptr += dst_stride;
723 }
724
725 // Remainder 1 or 2 rows with last row vertically unfiltered
726 if ((dst_height % 3) == 2) {
727 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
728 src_ptr += src_stride * 3;
729 dst_ptr += dst_stride;
730 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
731 } else if ((dst_height % 3) == 1) {
732 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
733 }
734 }
735
736 #define MIN1(x) ((x) < 1 ? 1 : (x))
737
SumPixels(int iboxwidth,const uint16 * src_ptr)738 static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {
739 uint32 sum = 0u;
740 int x;
741 assert(iboxwidth > 0);
742 for (x = 0; x < iboxwidth; ++x) {
743 sum += src_ptr[x];
744 }
745 return sum;
746 }
747
SumPixels_16(int iboxwidth,const uint32 * src_ptr)748 static __inline uint32 SumPixels_16(int iboxwidth, const uint32* src_ptr) {
749 uint32 sum = 0u;
750 int x;
751 assert(iboxwidth > 0);
752 for (x = 0; x < iboxwidth; ++x) {
753 sum += src_ptr[x];
754 }
755 return sum;
756 }
757
ScaleAddCols2_C(int dst_width,int boxheight,int x,int dx,const uint16 * src_ptr,uint8 * dst_ptr)758 static void ScaleAddCols2_C(int dst_width,
759 int boxheight,
760 int x,
761 int dx,
762 const uint16* src_ptr,
763 uint8* dst_ptr) {
764 int i;
765 int scaletbl[2];
766 int minboxwidth = dx >> 16;
767 int boxwidth;
768 scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
769 scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
770 for (i = 0; i < dst_width; ++i) {
771 int ix = x >> 16;
772 x += dx;
773 boxwidth = MIN1((x >> 16) - ix);
774 *dst_ptr++ =
775 SumPixels(boxwidth, src_ptr + ix) * scaletbl[boxwidth - minboxwidth] >>
776 16;
777 }
778 }
779
ScaleAddCols2_16_C(int dst_width,int boxheight,int x,int dx,const uint32 * src_ptr,uint16 * dst_ptr)780 static void ScaleAddCols2_16_C(int dst_width,
781 int boxheight,
782 int x,
783 int dx,
784 const uint32* src_ptr,
785 uint16* dst_ptr) {
786 int i;
787 int scaletbl[2];
788 int minboxwidth = dx >> 16;
789 int boxwidth;
790 scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
791 scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
792 for (i = 0; i < dst_width; ++i) {
793 int ix = x >> 16;
794 x += dx;
795 boxwidth = MIN1((x >> 16) - ix);
796 *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) *
797 scaletbl[boxwidth - minboxwidth] >>
798 16;
799 }
800 }
801
ScaleAddCols0_C(int dst_width,int boxheight,int x,int,const uint16 * src_ptr,uint8 * dst_ptr)802 static void ScaleAddCols0_C(int dst_width,
803 int boxheight,
804 int x,
805 int,
806 const uint16* src_ptr,
807 uint8* dst_ptr) {
808 int scaleval = 65536 / boxheight;
809 int i;
810 src_ptr += (x >> 16);
811 for (i = 0; i < dst_width; ++i) {
812 *dst_ptr++ = src_ptr[i] * scaleval >> 16;
813 }
814 }
815
ScaleAddCols1_C(int dst_width,int boxheight,int x,int dx,const uint16 * src_ptr,uint8 * dst_ptr)816 static void ScaleAddCols1_C(int dst_width,
817 int boxheight,
818 int x,
819 int dx,
820 const uint16* src_ptr,
821 uint8* dst_ptr) {
822 int boxwidth = MIN1(dx >> 16);
823 int scaleval = 65536 / (boxwidth * boxheight);
824 int i;
825 x >>= 16;
826 for (i = 0; i < dst_width; ++i) {
827 *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
828 x += boxwidth;
829 }
830 }
831
ScaleAddCols1_16_C(int dst_width,int boxheight,int x,int dx,const uint32 * src_ptr,uint16 * dst_ptr)832 static void ScaleAddCols1_16_C(int dst_width,
833 int boxheight,
834 int x,
835 int dx,
836 const uint32* src_ptr,
837 uint16* dst_ptr) {
838 int boxwidth = MIN1(dx >> 16);
839 int scaleval = 65536 / (boxwidth * boxheight);
840 int i;
841 for (i = 0; i < dst_width; ++i) {
842 *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16;
843 x += boxwidth;
844 }
845 }
846
847 // Scale plane down to any dimensions, with interpolation.
848 // (boxfilter).
849 //
850 // Same method as SimpleScale, which is fixed point, outputting
851 // one pixel of destination using fixed point (16.16) to step
852 // through source, sampling a box of pixel with simple
853 // averaging.
ScalePlaneBox(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_ptr,uint8 * dst_ptr)854 static void ScalePlaneBox(int src_width,
855 int src_height,
856 int dst_width,
857 int dst_height,
858 int src_stride,
859 int dst_stride,
860 const uint8* src_ptr,
861 uint8* dst_ptr) {
862 int j, k;
863 // Initial source x/y coordinate and step values as 16.16 fixed point.
864 int x = 0;
865 int y = 0;
866 int dx = 0;
867 int dy = 0;
868 const int max_y = (src_height << 16);
869 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
870 &dx, &dy);
871 src_width = Abs(src_width);
872 {
873 // Allocate a row buffer of uint16.
874 align_buffer_64(row16, src_width * 2);
875 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
876 const uint16* src_ptr, uint8* dst_ptr) =
877 (dx & 0xffff) ? ScaleAddCols2_C
878 : ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
879 void (*ScaleAddRow)(const uint8* src_ptr, uint16* dst_ptr, int src_width) =
880 ScaleAddRow_C;
881 #if defined(HAS_SCALEADDROW_SSE2)
882 if (TestCpuFlag(kCpuHasSSE2)) {
883 ScaleAddRow = ScaleAddRow_Any_SSE2;
884 if (IS_ALIGNED(src_width, 16)) {
885 ScaleAddRow = ScaleAddRow_SSE2;
886 }
887 }
888 #endif
889 #if defined(HAS_SCALEADDROW_AVX2)
890 if (TestCpuFlag(kCpuHasAVX2)) {
891 ScaleAddRow = ScaleAddRow_Any_AVX2;
892 if (IS_ALIGNED(src_width, 32)) {
893 ScaleAddRow = ScaleAddRow_AVX2;
894 }
895 }
896 #endif
897 #if defined(HAS_SCALEADDROW_NEON)
898 if (TestCpuFlag(kCpuHasNEON)) {
899 ScaleAddRow = ScaleAddRow_Any_NEON;
900 if (IS_ALIGNED(src_width, 16)) {
901 ScaleAddRow = ScaleAddRow_NEON;
902 }
903 }
904 #endif
905 #if defined(HAS_SCALEADDROW_MSA)
906 if (TestCpuFlag(kCpuHasMSA)) {
907 ScaleAddRow = ScaleAddRow_Any_MSA;
908 if (IS_ALIGNED(src_width, 16)) {
909 ScaleAddRow = ScaleAddRow_MSA;
910 }
911 }
912 #endif
913 #if defined(HAS_SCALEADDROW_DSPR2)
914 if (TestCpuFlag(kCpuHasDSPR2)) {
915 ScaleAddRow = ScaleAddRow_Any_DSPR2;
916 if (IS_ALIGNED(src_width, 16)) {
917 ScaleAddRow = ScaleAddRow_DSPR2;
918 }
919 }
920 #endif
921
922 for (j = 0; j < dst_height; ++j) {
923 int boxheight;
924 int iy = y >> 16;
925 const uint8* src = src_ptr + iy * src_stride;
926 y += dy;
927 if (y > max_y) {
928 y = max_y;
929 }
930 boxheight = MIN1((y >> 16) - iy);
931 memset(row16, 0, src_width * 2);
932 for (k = 0; k < boxheight; ++k) {
933 ScaleAddRow(src, (uint16*)(row16), src_width);
934 src += src_stride;
935 }
936 ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16), dst_ptr);
937 dst_ptr += dst_stride;
938 }
939 free_aligned_buffer_64(row16);
940 }
941 }
942
ScalePlaneBox_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16 * src_ptr,uint16 * dst_ptr)943 static void ScalePlaneBox_16(int src_width,
944 int src_height,
945 int dst_width,
946 int dst_height,
947 int src_stride,
948 int dst_stride,
949 const uint16* src_ptr,
950 uint16* dst_ptr) {
951 int j, k;
952 // Initial source x/y coordinate and step values as 16.16 fixed point.
953 int x = 0;
954 int y = 0;
955 int dx = 0;
956 int dy = 0;
957 const int max_y = (src_height << 16);
958 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
959 &dx, &dy);
960 src_width = Abs(src_width);
961 {
962 // Allocate a row buffer of uint32.
963 align_buffer_64(row32, src_width * 4);
964 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
965 const uint32* src_ptr, uint16* dst_ptr) =
966 (dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C;
967 void (*ScaleAddRow)(const uint16* src_ptr, uint32* dst_ptr, int src_width) =
968 ScaleAddRow_16_C;
969
970 #if defined(HAS_SCALEADDROW_16_SSE2)
971 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
972 ScaleAddRow = ScaleAddRow_16_SSE2;
973 }
974 #endif
975
976 for (j = 0; j < dst_height; ++j) {
977 int boxheight;
978 int iy = y >> 16;
979 const uint16* src = src_ptr + iy * src_stride;
980 y += dy;
981 if (y > max_y) {
982 y = max_y;
983 }
984 boxheight = MIN1((y >> 16) - iy);
985 memset(row32, 0, src_width * 4);
986 for (k = 0; k < boxheight; ++k) {
987 ScaleAddRow(src, (uint32*)(row32), src_width);
988 src += src_stride;
989 }
990 ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32), dst_ptr);
991 dst_ptr += dst_stride;
992 }
993 free_aligned_buffer_64(row32);
994 }
995 }
996
997 // Scale plane down with bilinear interpolation.
ScalePlaneBilinearDown(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_ptr,uint8 * dst_ptr,enum FilterMode filtering)998 void ScalePlaneBilinearDown(int src_width,
999 int src_height,
1000 int dst_width,
1001 int dst_height,
1002 int src_stride,
1003 int dst_stride,
1004 const uint8* src_ptr,
1005 uint8* dst_ptr,
1006 enum FilterMode filtering) {
1007 // Initial source x/y coordinate and step values as 16.16 fixed point.
1008 int x = 0;
1009 int y = 0;
1010 int dx = 0;
1011 int dy = 0;
1012 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
1013 // Allocate a row buffer.
1014 align_buffer_64(row, src_width);
1015
1016 const int max_y = (src_height - 1) << 16;
1017 int j;
1018 void (*ScaleFilterCols)(uint8 * dst_ptr, const uint8* src_ptr, int dst_width,
1019 int x, int dx) =
1020 (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
1021 void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr,
1022 ptrdiff_t src_stride, int dst_width,
1023 int source_y_fraction) = InterpolateRow_C;
1024 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1025 &dx, &dy);
1026 src_width = Abs(src_width);
1027
1028 #if defined(HAS_INTERPOLATEROW_SSSE3)
1029 if (TestCpuFlag(kCpuHasSSSE3)) {
1030 InterpolateRow = InterpolateRow_Any_SSSE3;
1031 if (IS_ALIGNED(src_width, 16)) {
1032 InterpolateRow = InterpolateRow_SSSE3;
1033 }
1034 }
1035 #endif
1036 #if defined(HAS_INTERPOLATEROW_AVX2)
1037 if (TestCpuFlag(kCpuHasAVX2)) {
1038 InterpolateRow = InterpolateRow_Any_AVX2;
1039 if (IS_ALIGNED(src_width, 32)) {
1040 InterpolateRow = InterpolateRow_AVX2;
1041 }
1042 }
1043 #endif
1044 #if defined(HAS_INTERPOLATEROW_NEON)
1045 if (TestCpuFlag(kCpuHasNEON)) {
1046 InterpolateRow = InterpolateRow_Any_NEON;
1047 if (IS_ALIGNED(src_width, 16)) {
1048 InterpolateRow = InterpolateRow_NEON;
1049 }
1050 }
1051 #endif
1052 #if defined(HAS_INTERPOLATEROW_DSPR2)
1053 if (TestCpuFlag(kCpuHasDSPR2)) {
1054 InterpolateRow = InterpolateRow_Any_DSPR2;
1055 if (IS_ALIGNED(src_width, 4)) {
1056 InterpolateRow = InterpolateRow_DSPR2;
1057 }
1058 }
1059 #endif
1060 #if defined(HAS_INTERPOLATEROW_MSA)
1061 if (TestCpuFlag(kCpuHasMSA)) {
1062 InterpolateRow = InterpolateRow_Any_MSA;
1063 if (IS_ALIGNED(src_width, 32)) {
1064 InterpolateRow = InterpolateRow_MSA;
1065 }
1066 }
1067 #endif
1068
1069 #if defined(HAS_SCALEFILTERCOLS_SSSE3)
1070 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1071 ScaleFilterCols = ScaleFilterCols_SSSE3;
1072 }
1073 #endif
1074 #if defined(HAS_SCALEFILTERCOLS_NEON)
1075 if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
1076 ScaleFilterCols = ScaleFilterCols_Any_NEON;
1077 if (IS_ALIGNED(dst_width, 8)) {
1078 ScaleFilterCols = ScaleFilterCols_NEON;
1079 }
1080 }
1081 #endif
1082 if (y > max_y) {
1083 y = max_y;
1084 }
1085
1086 for (j = 0; j < dst_height; ++j) {
1087 int yi = y >> 16;
1088 const uint8* src = src_ptr + yi * src_stride;
1089 if (filtering == kFilterLinear) {
1090 ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
1091 } else {
1092 int yf = (y >> 8) & 255;
1093 InterpolateRow(row, src, src_stride, src_width, yf);
1094 ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
1095 }
1096 dst_ptr += dst_stride;
1097 y += dy;
1098 if (y > max_y) {
1099 y = max_y;
1100 }
1101 }
1102 free_aligned_buffer_64(row);
1103 }
1104
ScalePlaneBilinearDown_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16 * src_ptr,uint16 * dst_ptr,enum FilterMode filtering)1105 void ScalePlaneBilinearDown_16(int src_width,
1106 int src_height,
1107 int dst_width,
1108 int dst_height,
1109 int src_stride,
1110 int dst_stride,
1111 const uint16* src_ptr,
1112 uint16* dst_ptr,
1113 enum FilterMode filtering) {
1114 // Initial source x/y coordinate and step values as 16.16 fixed point.
1115 int x = 0;
1116 int y = 0;
1117 int dx = 0;
1118 int dy = 0;
1119 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
1120 // Allocate a row buffer.
1121 align_buffer_64(row, src_width * 2);
1122
1123 const int max_y = (src_height - 1) << 16;
1124 int j;
1125 void (*ScaleFilterCols)(uint16 * dst_ptr, const uint16* src_ptr,
1126 int dst_width, int x, int dx) =
1127 (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C;
1128 void (*InterpolateRow)(uint16 * dst_ptr, const uint16* src_ptr,
1129 ptrdiff_t src_stride, int dst_width,
1130 int source_y_fraction) = InterpolateRow_16_C;
1131 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1132 &dx, &dy);
1133 src_width = Abs(src_width);
1134
1135 #if defined(HAS_INTERPOLATEROW_16_SSE2)
1136 if (TestCpuFlag(kCpuHasSSE2)) {
1137 InterpolateRow = InterpolateRow_Any_16_SSE2;
1138 if (IS_ALIGNED(src_width, 16)) {
1139 InterpolateRow = InterpolateRow_16_SSE2;
1140 }
1141 }
1142 #endif
1143 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
1144 if (TestCpuFlag(kCpuHasSSSE3)) {
1145 InterpolateRow = InterpolateRow_Any_16_SSSE3;
1146 if (IS_ALIGNED(src_width, 16)) {
1147 InterpolateRow = InterpolateRow_16_SSSE3;
1148 }
1149 }
1150 #endif
1151 #if defined(HAS_INTERPOLATEROW_16_AVX2)
1152 if (TestCpuFlag(kCpuHasAVX2)) {
1153 InterpolateRow = InterpolateRow_Any_16_AVX2;
1154 if (IS_ALIGNED(src_width, 32)) {
1155 InterpolateRow = InterpolateRow_16_AVX2;
1156 }
1157 }
1158 #endif
1159 #if defined(HAS_INTERPOLATEROW_16_NEON)
1160 if (TestCpuFlag(kCpuHasNEON)) {
1161 InterpolateRow = InterpolateRow_Any_16_NEON;
1162 if (IS_ALIGNED(src_width, 16)) {
1163 InterpolateRow = InterpolateRow_16_NEON;
1164 }
1165 }
1166 #endif
1167 #if defined(HAS_INTERPOLATEROW_16_DSPR2)
1168 if (TestCpuFlag(kCpuHasDSPR2)) {
1169 InterpolateRow = InterpolateRow_Any_16_DSPR2;
1170 if (IS_ALIGNED(src_width, 4)) {
1171 InterpolateRow = InterpolateRow_16_DSPR2;
1172 }
1173 }
1174 #endif
1175
1176 #if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1177 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1178 ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1179 }
1180 #endif
1181 if (y > max_y) {
1182 y = max_y;
1183 }
1184
1185 for (j = 0; j < dst_height; ++j) {
1186 int yi = y >> 16;
1187 const uint16* src = src_ptr + yi * src_stride;
1188 if (filtering == kFilterLinear) {
1189 ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
1190 } else {
1191 int yf = (y >> 8) & 255;
1192 InterpolateRow((uint16*)row, src, src_stride, src_width, yf);
1193 ScaleFilterCols(dst_ptr, (uint16*)row, dst_width, x, dx);
1194 }
1195 dst_ptr += dst_stride;
1196 y += dy;
1197 if (y > max_y) {
1198 y = max_y;
1199 }
1200 }
1201 free_aligned_buffer_64(row);
1202 }
1203
1204 // Scale up down with bilinear interpolation.
ScalePlaneBilinearUp(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_ptr,uint8 * dst_ptr,enum FilterMode filtering)1205 void ScalePlaneBilinearUp(int src_width,
1206 int src_height,
1207 int dst_width,
1208 int dst_height,
1209 int src_stride,
1210 int dst_stride,
1211 const uint8* src_ptr,
1212 uint8* dst_ptr,
1213 enum FilterMode filtering) {
1214 int j;
1215 // Initial source x/y coordinate and step values as 16.16 fixed point.
1216 int x = 0;
1217 int y = 0;
1218 int dx = 0;
1219 int dy = 0;
1220 const int max_y = (src_height - 1) << 16;
1221 void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr,
1222 ptrdiff_t src_stride, int dst_width,
1223 int source_y_fraction) = InterpolateRow_C;
1224 void (*ScaleFilterCols)(uint8 * dst_ptr, const uint8* src_ptr, int dst_width,
1225 int x, int dx) =
1226 filtering ? ScaleFilterCols_C : ScaleCols_C;
1227 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1228 &dx, &dy);
1229 src_width = Abs(src_width);
1230
1231 #if defined(HAS_INTERPOLATEROW_SSSE3)
1232 if (TestCpuFlag(kCpuHasSSSE3)) {
1233 InterpolateRow = InterpolateRow_Any_SSSE3;
1234 if (IS_ALIGNED(dst_width, 16)) {
1235 InterpolateRow = InterpolateRow_SSSE3;
1236 }
1237 }
1238 #endif
1239 #if defined(HAS_INTERPOLATEROW_AVX2)
1240 if (TestCpuFlag(kCpuHasAVX2)) {
1241 InterpolateRow = InterpolateRow_Any_AVX2;
1242 if (IS_ALIGNED(dst_width, 32)) {
1243 InterpolateRow = InterpolateRow_AVX2;
1244 }
1245 }
1246 #endif
1247 #if defined(HAS_INTERPOLATEROW_NEON)
1248 if (TestCpuFlag(kCpuHasNEON)) {
1249 InterpolateRow = InterpolateRow_Any_NEON;
1250 if (IS_ALIGNED(dst_width, 16)) {
1251 InterpolateRow = InterpolateRow_NEON;
1252 }
1253 }
1254 #endif
1255 #if defined(HAS_INTERPOLATEROW_DSPR2)
1256 if (TestCpuFlag(kCpuHasDSPR2)) {
1257 InterpolateRow = InterpolateRow_Any_DSPR2;
1258 if (IS_ALIGNED(dst_width, 4)) {
1259 InterpolateRow = InterpolateRow_DSPR2;
1260 }
1261 }
1262 #endif
1263
1264 if (filtering && src_width >= 32768) {
1265 ScaleFilterCols = ScaleFilterCols64_C;
1266 }
1267 #if defined(HAS_SCALEFILTERCOLS_SSSE3)
1268 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1269 ScaleFilterCols = ScaleFilterCols_SSSE3;
1270 }
1271 #endif
1272 #if defined(HAS_SCALEFILTERCOLS_NEON)
1273 if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
1274 ScaleFilterCols = ScaleFilterCols_Any_NEON;
1275 if (IS_ALIGNED(dst_width, 8)) {
1276 ScaleFilterCols = ScaleFilterCols_NEON;
1277 }
1278 }
1279 #endif
1280 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1281 ScaleFilterCols = ScaleColsUp2_C;
1282 #if defined(HAS_SCALECOLS_SSE2)
1283 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1284 ScaleFilterCols = ScaleColsUp2_SSE2;
1285 }
1286 #endif
1287 }
1288
1289 if (y > max_y) {
1290 y = max_y;
1291 }
1292 {
1293 int yi = y >> 16;
1294 const uint8* src = src_ptr + yi * src_stride;
1295
1296 // Allocate 2 row buffers.
1297 const int kRowSize = (dst_width + 31) & ~31;
1298 align_buffer_64(row, kRowSize * 2);
1299
1300 uint8* rowptr = row;
1301 int rowstride = kRowSize;
1302 int lasty = yi;
1303
1304 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1305 if (src_height > 1) {
1306 src += src_stride;
1307 }
1308 ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1309 src += src_stride;
1310
1311 for (j = 0; j < dst_height; ++j) {
1312 yi = y >> 16;
1313 if (yi != lasty) {
1314 if (y > max_y) {
1315 y = max_y;
1316 yi = y >> 16;
1317 src = src_ptr + yi * src_stride;
1318 }
1319 if (yi != lasty) {
1320 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1321 rowptr += rowstride;
1322 rowstride = -rowstride;
1323 lasty = yi;
1324 src += src_stride;
1325 }
1326 }
1327 if (filtering == kFilterLinear) {
1328 InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1329 } else {
1330 int yf = (y >> 8) & 255;
1331 InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1332 }
1333 dst_ptr += dst_stride;
1334 y += dy;
1335 }
1336 free_aligned_buffer_64(row);
1337 }
1338 }
1339
ScalePlaneBilinearUp_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16 * src_ptr,uint16 * dst_ptr,enum FilterMode filtering)1340 void ScalePlaneBilinearUp_16(int src_width,
1341 int src_height,
1342 int dst_width,
1343 int dst_height,
1344 int src_stride,
1345 int dst_stride,
1346 const uint16* src_ptr,
1347 uint16* dst_ptr,
1348 enum FilterMode filtering) {
1349 int j;
1350 // Initial source x/y coordinate and step values as 16.16 fixed point.
1351 int x = 0;
1352 int y = 0;
1353 int dx = 0;
1354 int dy = 0;
1355 const int max_y = (src_height - 1) << 16;
1356 void (*InterpolateRow)(uint16 * dst_ptr, const uint16* src_ptr,
1357 ptrdiff_t src_stride, int dst_width,
1358 int source_y_fraction) = InterpolateRow_16_C;
1359 void (*ScaleFilterCols)(uint16 * dst_ptr, const uint16* src_ptr,
1360 int dst_width, int x, int dx) =
1361 filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
1362 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1363 &dx, &dy);
1364 src_width = Abs(src_width);
1365
1366 #if defined(HAS_INTERPOLATEROW_16_SSE2)
1367 if (TestCpuFlag(kCpuHasSSE2)) {
1368 InterpolateRow = InterpolateRow_Any_16_SSE2;
1369 if (IS_ALIGNED(dst_width, 16)) {
1370 InterpolateRow = InterpolateRow_16_SSE2;
1371 }
1372 }
1373 #endif
1374 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
1375 if (TestCpuFlag(kCpuHasSSSE3)) {
1376 InterpolateRow = InterpolateRow_Any_16_SSSE3;
1377 if (IS_ALIGNED(dst_width, 16)) {
1378 InterpolateRow = InterpolateRow_16_SSSE3;
1379 }
1380 }
1381 #endif
1382 #if defined(HAS_INTERPOLATEROW_16_AVX2)
1383 if (TestCpuFlag(kCpuHasAVX2)) {
1384 InterpolateRow = InterpolateRow_Any_16_AVX2;
1385 if (IS_ALIGNED(dst_width, 32)) {
1386 InterpolateRow = InterpolateRow_16_AVX2;
1387 }
1388 }
1389 #endif
1390 #if defined(HAS_INTERPOLATEROW_16_NEON)
1391 if (TestCpuFlag(kCpuHasNEON)) {
1392 InterpolateRow = InterpolateRow_Any_16_NEON;
1393 if (IS_ALIGNED(dst_width, 16)) {
1394 InterpolateRow = InterpolateRow_16_NEON;
1395 }
1396 }
1397 #endif
1398 #if defined(HAS_INTERPOLATEROW_16_DSPR2)
1399 if (TestCpuFlag(kCpuHasDSPR2)) {
1400 InterpolateRow = InterpolateRow_Any_16_DSPR2;
1401 if (IS_ALIGNED(dst_width, 4)) {
1402 InterpolateRow = InterpolateRow_16_DSPR2;
1403 }
1404 }
1405 #endif
1406
1407 if (filtering && src_width >= 32768) {
1408 ScaleFilterCols = ScaleFilterCols64_16_C;
1409 }
1410 #if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1411 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1412 ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1413 }
1414 #endif
1415 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1416 ScaleFilterCols = ScaleColsUp2_16_C;
1417 #if defined(HAS_SCALECOLS_16_SSE2)
1418 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1419 ScaleFilterCols = ScaleColsUp2_16_SSE2;
1420 }
1421 #endif
1422 }
1423
1424 if (y > max_y) {
1425 y = max_y;
1426 }
1427 {
1428 int yi = y >> 16;
1429 const uint16* src = src_ptr + yi * src_stride;
1430
1431 // Allocate 2 row buffers.
1432 const int kRowSize = (dst_width + 31) & ~31;
1433 align_buffer_64(row, kRowSize * 4);
1434
1435 uint16* rowptr = (uint16*)row;
1436 int rowstride = kRowSize;
1437 int lasty = yi;
1438
1439 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1440 if (src_height > 1) {
1441 src += src_stride;
1442 }
1443 ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1444 src += src_stride;
1445
1446 for (j = 0; j < dst_height; ++j) {
1447 yi = y >> 16;
1448 if (yi != lasty) {
1449 if (y > max_y) {
1450 y = max_y;
1451 yi = y >> 16;
1452 src = src_ptr + yi * src_stride;
1453 }
1454 if (yi != lasty) {
1455 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1456 rowptr += rowstride;
1457 rowstride = -rowstride;
1458 lasty = yi;
1459 src += src_stride;
1460 }
1461 }
1462 if (filtering == kFilterLinear) {
1463 InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1464 } else {
1465 int yf = (y >> 8) & 255;
1466 InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1467 }
1468 dst_ptr += dst_stride;
1469 y += dy;
1470 }
1471 free_aligned_buffer_64(row);
1472 }
1473 }
1474
1475 // Scale Plane to/from any dimensions, without interpolation.
1476 // Fixed point math is used for performance: The upper 16 bits
1477 // of x and dx is the integer part of the source position and
1478 // the lower 16 bits are the fixed decimal part.
1479
ScalePlaneSimple(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_ptr,uint8 * dst_ptr)1480 static void ScalePlaneSimple(int src_width,
1481 int src_height,
1482 int dst_width,
1483 int dst_height,
1484 int src_stride,
1485 int dst_stride,
1486 const uint8* src_ptr,
1487 uint8* dst_ptr) {
1488 int i;
1489 void (*ScaleCols)(uint8 * dst_ptr, const uint8* src_ptr, int dst_width, int x,
1490 int dx) = ScaleCols_C;
1491 // Initial source x/y coordinate and step values as 16.16 fixed point.
1492 int x = 0;
1493 int y = 0;
1494 int dx = 0;
1495 int dy = 0;
1496 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
1497 &dx, &dy);
1498 src_width = Abs(src_width);
1499
1500 if (src_width * 2 == dst_width && x < 0x8000) {
1501 ScaleCols = ScaleColsUp2_C;
1502 #if defined(HAS_SCALECOLS_SSE2)
1503 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1504 ScaleCols = ScaleColsUp2_SSE2;
1505 }
1506 #endif
1507 }
1508
1509 for (i = 0; i < dst_height; ++i) {
1510 ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
1511 dst_ptr += dst_stride;
1512 y += dy;
1513 }
1514 }
1515
ScalePlaneSimple_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16 * src_ptr,uint16 * dst_ptr)1516 static void ScalePlaneSimple_16(int src_width,
1517 int src_height,
1518 int dst_width,
1519 int dst_height,
1520 int src_stride,
1521 int dst_stride,
1522 const uint16* src_ptr,
1523 uint16* dst_ptr) {
1524 int i;
1525 void (*ScaleCols)(uint16 * dst_ptr, const uint16* src_ptr, int dst_width,
1526 int x, int dx) = ScaleCols_16_C;
1527 // Initial source x/y coordinate and step values as 16.16 fixed point.
1528 int x = 0;
1529 int y = 0;
1530 int dx = 0;
1531 int dy = 0;
1532 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
1533 &dx, &dy);
1534 src_width = Abs(src_width);
1535
1536 if (src_width * 2 == dst_width && x < 0x8000) {
1537 ScaleCols = ScaleColsUp2_16_C;
1538 #if defined(HAS_SCALECOLS_16_SSE2)
1539 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1540 ScaleCols = ScaleColsUp2_16_SSE2;
1541 }
1542 #endif
1543 }
1544
1545 for (i = 0; i < dst_height; ++i) {
1546 ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
1547 dst_ptr += dst_stride;
1548 y += dy;
1549 }
1550 }
1551
1552 // Scale a plane.
1553 // This function dispatches to a specialized scaler based on scale factor.
1554
1555 LIBYUV_API
ScalePlane(const uint8 * src,int src_stride,int src_width,int src_height,uint8 * dst,int dst_stride,int dst_width,int dst_height,enum FilterMode filtering)1556 void ScalePlane(const uint8* src,
1557 int src_stride,
1558 int src_width,
1559 int src_height,
1560 uint8* dst,
1561 int dst_stride,
1562 int dst_width,
1563 int dst_height,
1564 enum FilterMode filtering) {
1565 // Simplify filtering when possible.
1566 filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
1567 filtering);
1568
1569 // Negative height means invert the image.
1570 if (src_height < 0) {
1571 src_height = -src_height;
1572 src = src + (src_height - 1) * src_stride;
1573 src_stride = -src_stride;
1574 }
1575
1576 // Use specialized scales to improve performance for common resolutions.
1577 // For example, all the 1/2 scalings will use ScalePlaneDown2()
1578 if (dst_width == src_width && dst_height == src_height) {
1579 // Straight copy.
1580 CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
1581 return;
1582 }
1583 if (dst_width == src_width && filtering != kFilterBox) {
1584 int dy = FixedDiv(src_height, dst_height);
1585 // Arbitrary scale vertically, but unscaled horizontally.
1586 ScalePlaneVertical(src_height, dst_width, dst_height, src_stride,
1587 dst_stride, src, dst, 0, 0, dy, 1, filtering);
1588 return;
1589 }
1590 if (dst_width <= Abs(src_width) && dst_height <= src_height) {
1591 // Scale down.
1592 if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
1593 // optimized, 3/4
1594 ScalePlaneDown34(src_width, src_height, dst_width, dst_height, src_stride,
1595 dst_stride, src, dst, filtering);
1596 return;
1597 }
1598 if (2 * dst_width == src_width && 2 * dst_height == src_height) {
1599 // optimized, 1/2
1600 ScalePlaneDown2(src_width, src_height, dst_width, dst_height, src_stride,
1601 dst_stride, src, dst, filtering);
1602 return;
1603 }
1604 // 3/8 rounded up for odd sized chroma height.
1605 if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
1606 // optimized, 3/8
1607 ScalePlaneDown38(src_width, src_height, dst_width, dst_height, src_stride,
1608 dst_stride, src, dst, filtering);
1609 return;
1610 }
1611 if (4 * dst_width == src_width && 4 * dst_height == src_height &&
1612 (filtering == kFilterBox || filtering == kFilterNone)) {
1613 // optimized, 1/4
1614 ScalePlaneDown4(src_width, src_height, dst_width, dst_height, src_stride,
1615 dst_stride, src, dst, filtering);
1616 return;
1617 }
1618 }
1619 if (filtering == kFilterBox && dst_height * 2 < src_height) {
1620 ScalePlaneBox(src_width, src_height, dst_width, dst_height, src_stride,
1621 dst_stride, src, dst);
1622 return;
1623 }
1624 if (filtering && dst_height > src_height) {
1625 ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
1626 src_stride, dst_stride, src, dst, filtering);
1627 return;
1628 }
1629 if (filtering) {
1630 ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
1631 src_stride, dst_stride, src, dst, filtering);
1632 return;
1633 }
1634 ScalePlaneSimple(src_width, src_height, dst_width, dst_height, src_stride,
1635 dst_stride, src, dst);
1636 }
1637
1638 LIBYUV_API
ScalePlane_16(const uint16 * src,int src_stride,int src_width,int src_height,uint16 * dst,int dst_stride,int dst_width,int dst_height,enum FilterMode filtering)1639 void ScalePlane_16(const uint16* src,
1640 int src_stride,
1641 int src_width,
1642 int src_height,
1643 uint16* dst,
1644 int dst_stride,
1645 int dst_width,
1646 int dst_height,
1647 enum FilterMode filtering) {
1648 // Simplify filtering when possible.
1649 filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
1650 filtering);
1651
1652 // Negative height means invert the image.
1653 if (src_height < 0) {
1654 src_height = -src_height;
1655 src = src + (src_height - 1) * src_stride;
1656 src_stride = -src_stride;
1657 }
1658
1659 // Use specialized scales to improve performance for common resolutions.
1660 // For example, all the 1/2 scalings will use ScalePlaneDown2()
1661 if (dst_width == src_width && dst_height == src_height) {
1662 // Straight copy.
1663 CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);
1664 return;
1665 }
1666 if (dst_width == src_width) {
1667 int dy = FixedDiv(src_height, dst_height);
1668 // Arbitrary scale vertically, but unscaled vertically.
1669 ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride,
1670 dst_stride, src, dst, 0, 0, dy, 1, filtering);
1671 return;
1672 }
1673 if (dst_width <= Abs(src_width) && dst_height <= src_height) {
1674 // Scale down.
1675 if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
1676 // optimized, 3/4
1677 ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height,
1678 src_stride, dst_stride, src, dst, filtering);
1679 return;
1680 }
1681 if (2 * dst_width == src_width && 2 * dst_height == src_height) {
1682 // optimized, 1/2
1683 ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height,
1684 src_stride, dst_stride, src, dst, filtering);
1685 return;
1686 }
1687 // 3/8 rounded up for odd sized chroma height.
1688 if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
1689 // optimized, 3/8
1690 ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height,
1691 src_stride, dst_stride, src, dst, filtering);
1692 return;
1693 }
1694 if (4 * dst_width == src_width && 4 * dst_height == src_height &&
1695 filtering != kFilterBilinear) {
1696 // optimized, 1/4
1697 ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,
1698 src_stride, dst_stride, src, dst, filtering);
1699 return;
1700 }
1701 }
1702 if (filtering == kFilterBox && dst_height * 2 < src_height) {
1703 ScalePlaneBox_16(src_width, src_height, dst_width, dst_height, src_stride,
1704 dst_stride, src, dst);
1705 return;
1706 }
1707 if (filtering && dst_height > src_height) {
1708 ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
1709 src_stride, dst_stride, src, dst, filtering);
1710 return;
1711 }
1712 if (filtering) {
1713 ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height,
1714 src_stride, dst_stride, src, dst, filtering);
1715 return;
1716 }
1717 ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, src_stride,
1718 dst_stride, src, dst);
1719 }
1720
1721 // Scale an I420 image.
1722 // This function in turn calls a scaling function for each plane.
1723
1724 LIBYUV_API
I420Scale(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,int src_width,int src_height,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int dst_width,int dst_height,enum FilterMode filtering)1725 int I420Scale(const uint8* src_y,
1726 int src_stride_y,
1727 const uint8* src_u,
1728 int src_stride_u,
1729 const uint8* src_v,
1730 int src_stride_v,
1731 int src_width,
1732 int src_height,
1733 uint8* dst_y,
1734 int dst_stride_y,
1735 uint8* dst_u,
1736 int dst_stride_u,
1737 uint8* dst_v,
1738 int dst_stride_v,
1739 int dst_width,
1740 int dst_height,
1741 enum FilterMode filtering) {
1742 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1743 int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1744 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1745 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1746 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
1747 src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
1748 dst_width <= 0 || dst_height <= 0) {
1749 return -1;
1750 }
1751
1752 ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
1753 dst_width, dst_height, filtering);
1754 ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
1755 dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
1756 ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
1757 dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
1758 return 0;
1759 }
1760
1761 LIBYUV_API
I420Scale_16(const uint16 * src_y,int src_stride_y,const uint16 * src_u,int src_stride_u,const uint16 * src_v,int src_stride_v,int src_width,int src_height,uint16 * dst_y,int dst_stride_y,uint16 * dst_u,int dst_stride_u,uint16 * dst_v,int dst_stride_v,int dst_width,int dst_height,enum FilterMode filtering)1762 int I420Scale_16(const uint16* src_y,
1763 int src_stride_y,
1764 const uint16* src_u,
1765 int src_stride_u,
1766 const uint16* src_v,
1767 int src_stride_v,
1768 int src_width,
1769 int src_height,
1770 uint16* dst_y,
1771 int dst_stride_y,
1772 uint16* dst_u,
1773 int dst_stride_u,
1774 uint16* dst_v,
1775 int dst_stride_v,
1776 int dst_width,
1777 int dst_height,
1778 enum FilterMode filtering) {
1779 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1780 int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1781 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1782 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1783 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
1784 src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
1785 dst_width <= 0 || dst_height <= 0) {
1786 return -1;
1787 }
1788
1789 ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
1790 dst_width, dst_height, filtering);
1791 ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
1792 dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
1793 ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
1794 dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
1795 return 0;
1796 }
1797
1798 // Deprecated api
1799 LIBYUV_API
Scale(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,int src_stride_y,int src_stride_u,int src_stride_v,int src_width,int src_height,uint8 * dst_y,uint8 * dst_u,uint8 * dst_v,int dst_stride_y,int dst_stride_u,int dst_stride_v,int dst_width,int dst_height,LIBYUV_BOOL interpolate)1800 int Scale(const uint8* src_y,
1801 const uint8* src_u,
1802 const uint8* src_v,
1803 int src_stride_y,
1804 int src_stride_u,
1805 int src_stride_v,
1806 int src_width,
1807 int src_height,
1808 uint8* dst_y,
1809 uint8* dst_u,
1810 uint8* dst_v,
1811 int dst_stride_y,
1812 int dst_stride_u,
1813 int dst_stride_v,
1814 int dst_width,
1815 int dst_height,
1816 LIBYUV_BOOL interpolate) {
1817 return I420Scale(src_y, src_stride_y, src_u, src_stride_u, src_v,
1818 src_stride_v, src_width, src_height, dst_y, dst_stride_y,
1819 dst_u, dst_stride_u, dst_v, dst_stride_v, dst_width,
1820 dst_height, interpolate ? kFilterBox : kFilterNone);
1821 }
1822
1823 // Deprecated api
1824 LIBYUV_API
ScaleOffset(const uint8 * src,int src_width,int src_height,uint8 * dst,int dst_width,int dst_height,int dst_yoffset,LIBYUV_BOOL interpolate)1825 int ScaleOffset(const uint8* src,
1826 int src_width,
1827 int src_height,
1828 uint8* dst,
1829 int dst_width,
1830 int dst_height,
1831 int dst_yoffset,
1832 LIBYUV_BOOL interpolate) {
1833 // Chroma requires offset to multiple of 2.
1834 int dst_yoffset_even = dst_yoffset & ~1;
1835 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1836 int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1837 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1838 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1839 int aheight = dst_height - dst_yoffset_even * 2; // actual output height
1840 const uint8* src_y = src;
1841 const uint8* src_u = src + src_width * src_height;
1842 const uint8* src_v =
1843 src + src_width * src_height + src_halfwidth * src_halfheight;
1844 uint8* dst_y = dst + dst_yoffset_even * dst_width;
1845 uint8* dst_u =
1846 dst + dst_width * dst_height + (dst_yoffset_even >> 1) * dst_halfwidth;
1847 uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight +
1848 (dst_yoffset_even >> 1) * dst_halfwidth;
1849 if (!src || src_width <= 0 || src_height <= 0 || !dst || dst_width <= 0 ||
1850 dst_height <= 0 || dst_yoffset_even < 0 ||
1851 dst_yoffset_even >= dst_height) {
1852 return -1;
1853 }
1854 return I420Scale(src_y, src_width, src_u, src_halfwidth, src_v, src_halfwidth,
1855 src_width, src_height, dst_y, dst_width, dst_u,
1856 dst_halfwidth, dst_v, dst_halfwidth, dst_width, aheight,
1857 interpolate ? kFilterBox : kFilterNone);
1858 }
1859
1860 #ifdef __cplusplus
1861 } // extern "C"
1862 } // namespace libyuv
1863 #endif
1864