1 /*
2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <stdlib.h>
12 #include <time.h>
13 
14 #include "../unit_test/unit_test.h"
15 #include "libyuv/convert_argb.h"
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/scale_argb.h"
18 #include "libyuv/video_common.h"
19 
20 namespace libyuv {
21 
22 #define STRINGIZE(line) #line
23 #define FILELINESTR(file, line) file ":" STRINGIZE(line)
24 
25 // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
ARGBTestFilter(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)26 static int ARGBTestFilter(int src_width,
27                           int src_height,
28                           int dst_width,
29                           int dst_height,
30                           FilterMode f,
31                           int benchmark_iterations,
32                           int disable_cpu_flags,
33                           int benchmark_cpu_info) {
34   if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
35     return 0;
36   }
37 
38   int i, j;
39   const int b = 0;  // 128 to test for padding/stride.
40   int64 src_argb_plane_size =
41       (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2) * 4LL;
42   int src_stride_argb = (b * 2 + Abs(src_width)) * 4;
43 
44   align_buffer_page_end(src_argb, src_argb_plane_size);
45   if (!src_argb) {
46     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
47     return 0;
48   }
49   MemRandomize(src_argb, src_argb_plane_size);
50 
51   int64 dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4LL;
52   int dst_stride_argb = (b * 2 + dst_width) * 4;
53 
54   align_buffer_page_end(dst_argb_c, dst_argb_plane_size);
55   align_buffer_page_end(dst_argb_opt, dst_argb_plane_size);
56   if (!dst_argb_c || !dst_argb_opt) {
57     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
58     return 0;
59   }
60   memset(dst_argb_c, 2, dst_argb_plane_size);
61   memset(dst_argb_opt, 3, dst_argb_plane_size);
62 
63   // Warm up both versions for consistent benchmarks.
64   MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
65   ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
66             src_width, src_height, dst_argb_c + (dst_stride_argb * b) + b * 4,
67             dst_stride_argb, dst_width, dst_height, f);
68   MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
69   ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
70             src_width, src_height, dst_argb_opt + (dst_stride_argb * b) + b * 4,
71             dst_stride_argb, dst_width, dst_height, f);
72 
73   MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
74   double c_time = get_time();
75   ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
76             src_width, src_height, dst_argb_c + (dst_stride_argb * b) + b * 4,
77             dst_stride_argb, dst_width, dst_height, f);
78 
79   c_time = (get_time() - c_time);
80 
81   MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
82   double opt_time = get_time();
83   for (i = 0; i < benchmark_iterations; ++i) {
84     ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
85               src_width, src_height,
86               dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb,
87               dst_width, dst_height, f);
88   }
89   opt_time = (get_time() - opt_time) / benchmark_iterations;
90 
91   // Report performance of C vs OPT
92   printf("filter %d - %8d us C - %8d us OPT\n", f,
93          static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
94 
95   // C version may be a little off from the optimized. Order of
96   //  operations may introduce rounding somewhere. So do a difference
97   //  of the buffers and look to see that the max difference isn't
98   //  over 2.
99   int max_diff = 0;
100   for (i = b; i < (dst_height + b); ++i) {
101     for (j = b * 4; j < (dst_width + b) * 4; ++j) {
102       int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] -
103                          dst_argb_opt[(i * dst_stride_argb) + j]);
104       if (abs_diff > max_diff) {
105         max_diff = abs_diff;
106       }
107     }
108   }
109 
110   free_aligned_buffer_page_end(dst_argb_c);
111   free_aligned_buffer_page_end(dst_argb_opt);
112   free_aligned_buffer_page_end(src_argb);
113   return max_diff;
114 }
115 
116 static const int kTileX = 8;
117 static const int kTileY = 8;
118 
TileARGBScale(const uint8 * src_argb,int src_stride_argb,int src_width,int src_height,uint8 * dst_argb,int dst_stride_argb,int dst_width,int dst_height,FilterMode filtering)119 static int TileARGBScale(const uint8* src_argb,
120                          int src_stride_argb,
121                          int src_width,
122                          int src_height,
123                          uint8* dst_argb,
124                          int dst_stride_argb,
125                          int dst_width,
126                          int dst_height,
127                          FilterMode filtering) {
128   for (int y = 0; y < dst_height; y += kTileY) {
129     for (int x = 0; x < dst_width; x += kTileX) {
130       int clip_width = kTileX;
131       if (x + clip_width > dst_width) {
132         clip_width = dst_width - x;
133       }
134       int clip_height = kTileY;
135       if (y + clip_height > dst_height) {
136         clip_height = dst_height - y;
137       }
138       int r = ARGBScaleClip(src_argb, src_stride_argb, src_width, src_height,
139                             dst_argb, dst_stride_argb, dst_width, dst_height, x,
140                             y, clip_width, clip_height, filtering);
141       if (r) {
142         return r;
143       }
144     }
145   }
146   return 0;
147 }
148 
ARGBClipTestFilter(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations)149 static int ARGBClipTestFilter(int src_width,
150                               int src_height,
151                               int dst_width,
152                               int dst_height,
153                               FilterMode f,
154                               int benchmark_iterations) {
155   if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
156     return 0;
157   }
158 
159   const int b = 128;
160   int64 src_argb_plane_size =
161       (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2) * 4;
162   int src_stride_argb = (b * 2 + Abs(src_width)) * 4;
163 
164   align_buffer_page_end(src_argb, src_argb_plane_size);
165   if (!src_argb) {
166     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
167     return 0;
168   }
169   memset(src_argb, 1, src_argb_plane_size);
170 
171   int64 dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4;
172   int dst_stride_argb = (b * 2 + dst_width) * 4;
173 
174   int i, j;
175   for (i = b; i < (Abs(src_height) + b); ++i) {
176     for (j = b; j < (Abs(src_width) + b) * 4; ++j) {
177       src_argb[(i * src_stride_argb) + j] = (fastrand() & 0xff);
178     }
179   }
180 
181   align_buffer_page_end(dst_argb_c, dst_argb_plane_size);
182   align_buffer_page_end(dst_argb_opt, dst_argb_plane_size);
183   if (!dst_argb_c || !dst_argb_opt) {
184     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
185     return 0;
186   }
187   memset(dst_argb_c, 2, dst_argb_plane_size);
188   memset(dst_argb_opt, 3, dst_argb_plane_size);
189 
190   // Do full image, no clipping.
191   double c_time = get_time();
192   ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
193             src_width, src_height, dst_argb_c + (dst_stride_argb * b) + b * 4,
194             dst_stride_argb, dst_width, dst_height, f);
195   c_time = (get_time() - c_time);
196 
197   // Do tiled image, clipping scale to a tile at a time.
198   double opt_time = get_time();
199   for (i = 0; i < benchmark_iterations; ++i) {
200     TileARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
201                   src_width, src_height,
202                   dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb,
203                   dst_width, dst_height, f);
204   }
205   opt_time = (get_time() - opt_time) / benchmark_iterations;
206 
207   // Report performance of Full vs Tiled.
208   printf("filter %d - %8d us Full - %8d us Tiled\n", f,
209          static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
210 
211   // Compare full scaled image vs tiled image.
212   int max_diff = 0;
213   for (i = b; i < (dst_height + b); ++i) {
214     for (j = b * 4; j < (dst_width + b) * 4; ++j) {
215       int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] -
216                          dst_argb_opt[(i * dst_stride_argb) + j]);
217       if (abs_diff > max_diff) {
218         max_diff = abs_diff;
219       }
220     }
221   }
222 
223   free_aligned_buffer_page_end(dst_argb_c);
224   free_aligned_buffer_page_end(dst_argb_opt);
225   free_aligned_buffer_page_end(src_argb);
226   return max_diff;
227 }
228 
229 // The following adjustments in dimensions ensure the scale factor will be
230 // exactly achieved.
231 #define DX(x, nom, denom) static_cast<int>((Abs(x) / nom) * nom)
232 #define SX(x, nom, denom) static_cast<int>((x / nom) * denom)
233 
234 #define TEST_FACTOR1(name, filter, nom, denom, max_diff)                     \
235   TEST_F(LibYUVScaleTest, ARGBScaleDownBy##name##_##filter) {                \
236     int diff = ARGBTestFilter(                                               \
237         SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
238         DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
239         kFilter##filter, benchmark_iterations_, disable_cpu_flags_,          \
240         benchmark_cpu_info_);                                                \
241     EXPECT_LE(diff, max_diff);                                               \
242   }                                                                          \
243   TEST_F(LibYUVScaleTest, ARGBScaleDownClipBy##name##_##filter) {            \
244     int diff = ARGBClipTestFilter(                                           \
245         SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
246         DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
247         kFilter##filter, benchmark_iterations_);                             \
248     EXPECT_LE(diff, max_diff);                                               \
249   }
250 
251 // Test a scale factor with all 4 filters.  Expect unfiltered to be exact, but
252 // filtering is different fixed point implementations for SSSE3, Neon and C.
253 #define TEST_FACTOR(name, nom, denom)         \
254   TEST_FACTOR1(name, None, nom, denom, 0)     \
255   TEST_FACTOR1(name, Linear, nom, denom, 3)   \
256   TEST_FACTOR1(name, Bilinear, nom, denom, 3) \
257   TEST_FACTOR1(name, Box, nom, denom, 3)
258 
259 TEST_FACTOR(2, 1, 2)
260 TEST_FACTOR(4, 1, 4)
261 TEST_FACTOR(8, 1, 8)
262 TEST_FACTOR(3by4, 3, 4)
263 TEST_FACTOR(3by8, 3, 8)
264 TEST_FACTOR(3, 1, 3)
265 #undef TEST_FACTOR1
266 #undef TEST_FACTOR
267 #undef SX
268 #undef DX
269 
270 #define TEST_SCALETO1(name, width, height, filter, max_diff)                   \
271   TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) {             \
272     int diff = ARGBTestFilter(benchmark_width_, benchmark_height_, width,      \
273                               height, kFilter##filter, benchmark_iterations_,  \
274                               disable_cpu_flags_, benchmark_cpu_info_);        \
275     EXPECT_LE(diff, max_diff);                                                 \
276   }                                                                            \
277   TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) {           \
278     int diff = ARGBTestFilter(width, height, Abs(benchmark_width_),            \
279                               Abs(benchmark_height_), kFilter##filter,         \
280                               benchmark_iterations_, disable_cpu_flags_,       \
281                               benchmark_cpu_info_);                            \
282     EXPECT_LE(diff, max_diff);                                                 \
283   }                                                                            \
284   TEST_F(LibYUVScaleTest, name##ClipTo##width##x##height##_##filter) {         \
285     int diff =                                                                 \
286         ARGBClipTestFilter(benchmark_width_, benchmark_height_, width, height, \
287                            kFilter##filter, benchmark_iterations_);            \
288     EXPECT_LE(diff, max_diff);                                                 \
289   }                                                                            \
290   TEST_F(LibYUVScaleTest, name##ClipFrom##width##x##height##_##filter) {       \
291     int diff = ARGBClipTestFilter(width, height, Abs(benchmark_width_),        \
292                                   Abs(benchmark_height_), kFilter##filter,     \
293                                   benchmark_iterations_);                      \
294     EXPECT_LE(diff, max_diff);                                                 \
295   }
296 
297 /// Test scale to a specified size with all 4 filters.
298 #define TEST_SCALETO(name, width, height)       \
299   TEST_SCALETO1(name, width, height, None, 0)   \
300   TEST_SCALETO1(name, width, height, Linear, 3) \
301   TEST_SCALETO1(name, width, height, Bilinear, 3)
302 
303 TEST_SCALETO(ARGBScale, 1, 1)
304 TEST_SCALETO(ARGBScale, 320, 240)
305 TEST_SCALETO(ARGBScale, 352, 288)
306 TEST_SCALETO(ARGBScale, 569, 480)
307 TEST_SCALETO(ARGBScale, 640, 360)
308 TEST_SCALETO(ARGBScale, 1280, 720)
309 #undef TEST_SCALETO1
310 #undef TEST_SCALETO
311 
312 // Scale with YUV conversion to ARGB and clipping.
313 LIBYUV_API
YUVToARGBScaleReference2(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint32,int src_width,int src_height,uint8 * dst_argb,int dst_stride_argb,uint32,int dst_width,int dst_height,int clip_x,int clip_y,int clip_width,int clip_height,enum FilterMode filtering)314 int YUVToARGBScaleReference2(const uint8* src_y,
315                              int src_stride_y,
316                              const uint8* src_u,
317                              int src_stride_u,
318                              const uint8* src_v,
319                              int src_stride_v,
320                              uint32 /* src_fourcc */,  // TODO: Add support.
321                              int src_width,
322                              int src_height,
323                              uint8* dst_argb,
324                              int dst_stride_argb,
325                              uint32 /* dst_fourcc */,  // TODO: Add support.
326                              int dst_width,
327                              int dst_height,
328                              int clip_x,
329                              int clip_y,
330                              int clip_width,
331                              int clip_height,
332                              enum FilterMode filtering) {
333   uint8* argb_buffer = static_cast<uint8*>(malloc(src_width * src_height * 4));
334   int r;
335   I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
336              argb_buffer, src_width * 4, src_width, src_height);
337 
338   r = ARGBScaleClip(argb_buffer, src_width * 4, src_width, src_height, dst_argb,
339                     dst_stride_argb, dst_width, dst_height, clip_x, clip_y,
340                     clip_width, clip_height, filtering);
341   free(argb_buffer);
342   return r;
343 }
344 
FillRamp(uint8 * buf,int width,int height,int v,int dx,int dy)345 static void FillRamp(uint8* buf, int width, int height, int v, int dx, int dy) {
346   int rv = v;
347   for (int y = 0; y < height; ++y) {
348     for (int x = 0; x < width; ++x) {
349       *buf++ = v;
350       v += dx;
351       if (v < 0 || v > 255) {
352         dx = -dx;
353         v += dx;
354       }
355     }
356     v = rv + dy;
357     if (v < 0 || v > 255) {
358       dy = -dy;
359       v += dy;
360     }
361     rv = v;
362   }
363 }
364 
365 // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
YUVToARGBTestFilter(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations)366 static int YUVToARGBTestFilter(int src_width,
367                                int src_height,
368                                int dst_width,
369                                int dst_height,
370                                FilterMode f,
371                                int benchmark_iterations) {
372   int64 src_y_plane_size = Abs(src_width) * Abs(src_height);
373   int64 src_uv_plane_size =
374       ((Abs(src_width) + 1) / 2) * ((Abs(src_height) + 1) / 2);
375   int src_stride_y = Abs(src_width);
376   int src_stride_uv = (Abs(src_width) + 1) / 2;
377 
378   align_buffer_page_end(src_y, src_y_plane_size);
379   align_buffer_page_end(src_u, src_uv_plane_size);
380   align_buffer_page_end(src_v, src_uv_plane_size);
381 
382   int64 dst_argb_plane_size = (dst_width) * (dst_height)*4LL;
383   int dst_stride_argb = (dst_width)*4;
384   align_buffer_page_end(dst_argb_c, dst_argb_plane_size);
385   align_buffer_page_end(dst_argb_opt, dst_argb_plane_size);
386   if (!dst_argb_c || !dst_argb_opt || !src_y || !src_u || !src_v) {
387     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
388     return 0;
389   }
390   // Fill YUV image with continuous ramp, which is less sensitive to
391   // subsampling and filtering differences for test purposes.
392   FillRamp(src_y, Abs(src_width), Abs(src_height), 128, 1, 1);
393   FillRamp(src_u, (Abs(src_width) + 1) / 2, (Abs(src_height) + 1) / 2, 3, 1, 1);
394   FillRamp(src_v, (Abs(src_width) + 1) / 2, (Abs(src_height) + 1) / 2, 4, 1, 1);
395   memset(dst_argb_c, 2, dst_argb_plane_size);
396   memset(dst_argb_opt, 3, dst_argb_plane_size);
397 
398   YUVToARGBScaleReference2(src_y, src_stride_y, src_u, src_stride_uv, src_v,
399                            src_stride_uv, libyuv::FOURCC_I420, src_width,
400                            src_height, dst_argb_c, dst_stride_argb,
401                            libyuv::FOURCC_I420, dst_width, dst_height, 0, 0,
402                            dst_width, dst_height, f);
403 
404   for (int i = 0; i < benchmark_iterations; ++i) {
405     YUVToARGBScaleClip(src_y, src_stride_y, src_u, src_stride_uv, src_v,
406                        src_stride_uv, libyuv::FOURCC_I420, src_width,
407                        src_height, dst_argb_opt, dst_stride_argb,
408                        libyuv::FOURCC_I420, dst_width, dst_height, 0, 0,
409                        dst_width, dst_height, f);
410   }
411   int max_diff = 0;
412   for (int i = 0; i < dst_height; ++i) {
413     for (int j = 0; j < dst_width * 4; ++j) {
414       int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] -
415                          dst_argb_opt[(i * dst_stride_argb) + j]);
416       if (abs_diff > max_diff) {
417         printf("error %d at %d,%d c %d opt %d", abs_diff, j, i,
418                dst_argb_c[(i * dst_stride_argb) + j],
419                dst_argb_opt[(i * dst_stride_argb) + j]);
420         EXPECT_LE(abs_diff, 40);
421         max_diff = abs_diff;
422       }
423     }
424   }
425 
426   free_aligned_buffer_page_end(dst_argb_c);
427   free_aligned_buffer_page_end(dst_argb_opt);
428   free_aligned_buffer_page_end(src_y);
429   free_aligned_buffer_page_end(src_u);
430   free_aligned_buffer_page_end(src_v);
431   return max_diff;
432 }
433 
TEST_F(LibYUVScaleTest,YUVToRGBScaleUp)434 TEST_F(LibYUVScaleTest, YUVToRGBScaleUp) {
435   int diff =
436       YUVToARGBTestFilter(benchmark_width_, benchmark_height_,
437                           benchmark_width_ * 3 / 2, benchmark_height_ * 3 / 2,
438                           libyuv::kFilterBilinear, benchmark_iterations_);
439   EXPECT_LE(diff, 10);
440 }
441 
TEST_F(LibYUVScaleTest,YUVToRGBScaleDown)442 TEST_F(LibYUVScaleTest, YUVToRGBScaleDown) {
443   int diff = YUVToARGBTestFilter(
444       benchmark_width_ * 3 / 2, benchmark_height_ * 3 / 2, benchmark_width_,
445       benchmark_height_, libyuv::kFilterBilinear, benchmark_iterations_);
446   EXPECT_LE(diff, 10);
447 }
448 
449 }  // namespace libyuv
450