1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  *
10  *  This code was originally written by: Gregory Maxwell, at the Daala
11  *  project.
12  */
13 #include <assert.h>
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <math.h>
17 
18 #include "./vpx_config.h"
19 #include "./vpx_dsp_rtcd.h"
20 #include "vpx_dsp/ssim.h"
21 #include "vpx_ports/system_state.h"
22 #include "vpx_dsp/psnr.h"
23 
24 #if !defined(M_PI)
25 #define M_PI (3.141592653589793238462643)
26 #endif
27 #include <string.h>
28 
od_bin_fdct8x8(tran_low_t * y,int ystride,const int16_t * x,int xstride)29 static void od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x,
30                            int xstride) {
31   int i, j;
32   (void)xstride;
33   vpx_fdct8x8(x, y, ystride);
34   for (i = 0; i < 8; i++)
35     for (j = 0; j < 8; j++)
36       *(y + ystride * i + j) = (*(y + ystride * i + j) + 4) >> 3;
37 }
38 #if CONFIG_VP9_HIGHBITDEPTH
hbd_od_bin_fdct8x8(tran_low_t * y,int ystride,const int16_t * x,int xstride)39 static void hbd_od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x,
40                                int xstride) {
41   int i, j;
42   (void)xstride;
43   vpx_highbd_fdct8x8(x, y, ystride);
44   for (i = 0; i < 8; i++)
45     for (j = 0; j < 8; j++)
46       *(y + ystride * i + j) = (*(y + ystride * i + j) + 4) >> 3;
47 }
48 #endif
49 
50 /* Normalized inverse quantization matrix for 8x8 DCT at the point of
51  * transparency. This is not the JPEG based matrix from the paper,
52  this one gives a slightly higher MOS agreement.*/
53 static const double csf_y[8][8] = {
54   { 1.6193873005, 2.2901594831, 2.08509755623, 1.48366094411, 1.00227514334,
55     0.678296995242, 0.466224900598, 0.3265091542 },
56   { 2.2901594831, 1.94321815382, 2.04793073064, 1.68731108984, 1.2305666963,
57     0.868920337363, 0.61280991668, 0.436405793551 },
58   { 2.08509755623, 2.04793073064, 1.34329019223, 1.09205635862, 0.875748795257,
59     0.670882927016, 0.501731932449, 0.372504254596 },
60   { 1.48366094411, 1.68731108984, 1.09205635862, 0.772819797575, 0.605636379554,
61     0.48309405692, 0.380429446972, 0.295774038565 },
62   { 1.00227514334, 1.2305666963, 0.875748795257, 0.605636379554, 0.448996256676,
63     0.352889268808, 0.283006984131, 0.226951348204 },
64   { 0.678296995242, 0.868920337363, 0.670882927016, 0.48309405692,
65     0.352889268808, 0.27032073436, 0.215017739696, 0.17408067321 },
66   { 0.466224900598, 0.61280991668, 0.501731932449, 0.380429446972,
67     0.283006984131, 0.215017739696, 0.168869545842, 0.136153931001 },
68   { 0.3265091542, 0.436405793551, 0.372504254596, 0.295774038565,
69     0.226951348204, 0.17408067321, 0.136153931001, 0.109083846276 }
70 };
71 static const double csf_cb420[8][8] = {
72   { 1.91113096927, 2.46074210438, 1.18284184739, 1.14982565193, 1.05017074788,
73     0.898018824055, 0.74725392039, 0.615105596242 },
74   { 2.46074210438, 1.58529308355, 1.21363250036, 1.38190029285, 1.33100189972,
75     1.17428548929, 0.996404342439, 0.830890433625 },
76   { 1.18284184739, 1.21363250036, 0.978712413627, 1.02624506078, 1.03145147362,
77     0.960060382087, 0.849823426169, 0.731221236837 },
78   { 1.14982565193, 1.38190029285, 1.02624506078, 0.861317501629, 0.801821139099,
79     0.751437590932, 0.685398513368, 0.608694761374 },
80   { 1.05017074788, 1.33100189972, 1.03145147362, 0.801821139099, 0.676555426187,
81     0.605503172737, 0.55002013668, 0.495804539034 },
82   { 0.898018824055, 1.17428548929, 0.960060382087, 0.751437590932,
83     0.605503172737, 0.514674450957, 0.454353482512, 0.407050308965 },
84   { 0.74725392039, 0.996404342439, 0.849823426169, 0.685398513368,
85     0.55002013668, 0.454353482512, 0.389234902883, 0.342353999733 },
86   { 0.615105596242, 0.830890433625, 0.731221236837, 0.608694761374,
87     0.495804539034, 0.407050308965, 0.342353999733, 0.295530605237 }
88 };
89 static const double csf_cr420[8][8] = {
90   { 2.03871978502, 2.62502345193, 1.26180942886, 1.11019789803, 1.01397751469,
91     0.867069376285, 0.721500455585, 0.593906509971 },
92   { 2.62502345193, 1.69112867013, 1.17180569821, 1.3342742857, 1.28513006198,
93     1.13381474809, 0.962064122248, 0.802254508198 },
94   { 1.26180942886, 1.17180569821, 0.944981930573, 0.990876405848,
95     0.995903384143, 0.926972725286, 0.820534991409, 0.706020324706 },
96   { 1.11019789803, 1.3342742857, 0.990876405848, 0.831632933426, 0.77418706195,
97     0.725539939514, 0.661776842059, 0.587716619023 },
98   { 1.01397751469, 1.28513006198, 0.995903384143, 0.77418706195, 0.653238524286,
99     0.584635025748, 0.531064164893, 0.478717061273 },
100   { 0.867069376285, 1.13381474809, 0.926972725286, 0.725539939514,
101     0.584635025748, 0.496936637883, 0.438694579826, 0.393021669543 },
102   { 0.721500455585, 0.962064122248, 0.820534991409, 0.661776842059,
103     0.531064164893, 0.438694579826, 0.375820256136, 0.330555063063 },
104   { 0.593906509971, 0.802254508198, 0.706020324706, 0.587716619023,
105     0.478717061273, 0.393021669543, 0.330555063063, 0.285345396658 }
106 };
107 
convert_score_db(double _score,double _weight,int bit_depth)108 static double convert_score_db(double _score, double _weight, int bit_depth) {
109   int16_t pix_max = 255;
110   assert(_score * _weight >= 0.0);
111   if (bit_depth == 10)
112     pix_max = 1023;
113   else if (bit_depth == 12)
114     pix_max = 4095;
115 
116   if (_weight * _score < pix_max * pix_max * 1e-10) return MAX_PSNR;
117   return 10 * (log10(pix_max * pix_max) - log10(_weight * _score));
118 }
119 
calc_psnrhvs(const unsigned char * src,int _systride,const unsigned char * dst,int _dystride,double _par,int _w,int _h,int _step,const double _csf[8][8],uint32_t bit_depth,uint32_t _shift)120 static double calc_psnrhvs(const unsigned char *src, int _systride,
121                            const unsigned char *dst, int _dystride, double _par,
122                            int _w, int _h, int _step, const double _csf[8][8],
123                            uint32_t bit_depth, uint32_t _shift) {
124   double ret;
125   const uint8_t *_src8 = src;
126   const uint8_t *_dst8 = dst;
127   const uint16_t *_src16 = CONVERT_TO_SHORTPTR(src);
128   const uint16_t *_dst16 = CONVERT_TO_SHORTPTR(dst);
129   DECLARE_ALIGNED(16, int16_t, dct_s[8 * 8]);
130   DECLARE_ALIGNED(16, int16_t, dct_d[8 * 8]);
131   DECLARE_ALIGNED(16, tran_low_t, dct_s_coef[8 * 8]);
132   DECLARE_ALIGNED(16, tran_low_t, dct_d_coef[8 * 8]);
133   double mask[8][8];
134   int pixels;
135   int x;
136   int y;
137   (void)_par;
138   ret = pixels = 0;
139 
140   /*In the PSNR-HVS-M paper[1] the authors describe the construction of
141    their masking table as "we have used the quantization table for the
142    color component Y of JPEG [6] that has been also obtained on the
143    basis of CSF. Note that the values in quantization table JPEG have
144    been normalized and then squared." Their CSF matrix (from PSNR-HVS)
145    was also constructed from the JPEG matrices. I can not find any obvious
146    scheme of normalizing to produce their table, but if I multiply their
147    CSF by 0.38857 and square the result I get their masking table.
148    I have no idea where this constant comes from, but deviating from it
149    too greatly hurts MOS agreement.
150 
151    [1] Nikolay Ponomarenko, Flavia Silvestri, Karen Egiazarian, Marco Carli,
152    Jaakko Astola, Vladimir Lukin, "On between-coefficient contrast masking
153    of DCT basis functions", CD-ROM Proceedings of the Third
154    International Workshop on Video Processing and Quality Metrics for Consumer
155    Electronics VPQM-07, Scottsdale, Arizona, USA, 25-26 January, 2007, 4 p.*/
156   for (x = 0; x < 8; x++)
157     for (y = 0; y < 8; y++)
158       mask[x][y] =
159           (_csf[x][y] * 0.3885746225901003) * (_csf[x][y] * 0.3885746225901003);
160   for (y = 0; y < _h - 7; y += _step) {
161     for (x = 0; x < _w - 7; x += _step) {
162       int i;
163       int j;
164       double s_means[4];
165       double d_means[4];
166       double s_vars[4];
167       double d_vars[4];
168       double s_gmean = 0;
169       double d_gmean = 0;
170       double s_gvar = 0;
171       double d_gvar = 0;
172       double s_mask = 0;
173       double d_mask = 0;
174       for (i = 0; i < 4; i++)
175         s_means[i] = d_means[i] = s_vars[i] = d_vars[i] = 0;
176       for (i = 0; i < 8; i++) {
177         for (j = 0; j < 8; j++) {
178           int sub = ((i & 12) >> 2) + ((j & 12) >> 1);
179           if (bit_depth == 8 && _shift == 0) {
180             dct_s[i * 8 + j] = _src8[(y + i) * _systride + (j + x)];
181             dct_d[i * 8 + j] = _dst8[(y + i) * _dystride + (j + x)];
182           } else if (bit_depth == 10 || bit_depth == 12) {
183             dct_s[i * 8 + j] = _src16[(y + i) * _systride + (j + x)] >> _shift;
184             dct_d[i * 8 + j] = _dst16[(y + i) * _dystride + (j + x)] >> _shift;
185           }
186           s_gmean += dct_s[i * 8 + j];
187           d_gmean += dct_d[i * 8 + j];
188           s_means[sub] += dct_s[i * 8 + j];
189           d_means[sub] += dct_d[i * 8 + j];
190         }
191       }
192       s_gmean /= 64.f;
193       d_gmean /= 64.f;
194       for (i = 0; i < 4; i++) s_means[i] /= 16.f;
195       for (i = 0; i < 4; i++) d_means[i] /= 16.f;
196       for (i = 0; i < 8; i++) {
197         for (j = 0; j < 8; j++) {
198           int sub = ((i & 12) >> 2) + ((j & 12) >> 1);
199           s_gvar += (dct_s[i * 8 + j] - s_gmean) * (dct_s[i * 8 + j] - s_gmean);
200           d_gvar += (dct_d[i * 8 + j] - d_gmean) * (dct_d[i * 8 + j] - d_gmean);
201           s_vars[sub] += (dct_s[i * 8 + j] - s_means[sub]) *
202                          (dct_s[i * 8 + j] - s_means[sub]);
203           d_vars[sub] += (dct_d[i * 8 + j] - d_means[sub]) *
204                          (dct_d[i * 8 + j] - d_means[sub]);
205         }
206       }
207       s_gvar *= 1 / 63.f * 64;
208       d_gvar *= 1 / 63.f * 64;
209       for (i = 0; i < 4; i++) s_vars[i] *= 1 / 15.f * 16;
210       for (i = 0; i < 4; i++) d_vars[i] *= 1 / 15.f * 16;
211       if (s_gvar > 0)
212         s_gvar = (s_vars[0] + s_vars[1] + s_vars[2] + s_vars[3]) / s_gvar;
213       if (d_gvar > 0)
214         d_gvar = (d_vars[0] + d_vars[1] + d_vars[2] + d_vars[3]) / d_gvar;
215 #if CONFIG_VP9_HIGHBITDEPTH
216       if (bit_depth == 10 || bit_depth == 12) {
217         hbd_od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8);
218         hbd_od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8);
219       }
220 #endif
221       if (bit_depth == 8) {
222         od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8);
223         od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8);
224       }
225       for (i = 0; i < 8; i++)
226         for (j = (i == 0); j < 8; j++)
227           s_mask += dct_s_coef[i * 8 + j] * dct_s_coef[i * 8 + j] * mask[i][j];
228       for (i = 0; i < 8; i++)
229         for (j = (i == 0); j < 8; j++)
230           d_mask += dct_d_coef[i * 8 + j] * dct_d_coef[i * 8 + j] * mask[i][j];
231       s_mask = sqrt(s_mask * s_gvar) / 32.f;
232       d_mask = sqrt(d_mask * d_gvar) / 32.f;
233       if (d_mask > s_mask) s_mask = d_mask;
234       for (i = 0; i < 8; i++) {
235         for (j = 0; j < 8; j++) {
236           double err;
237           err = fabs((double)(dct_s_coef[i * 8 + j] - dct_d_coef[i * 8 + j]));
238           if (i != 0 || j != 0)
239             err = err < s_mask / mask[i][j] ? 0 : err - s_mask / mask[i][j];
240           ret += (err * _csf[i][j]) * (err * _csf[i][j]);
241           pixels++;
242         }
243       }
244     }
245   }
246   if (pixels <= 0) return 0;
247   ret /= pixels;
248   return ret;
249 }
250 
vpx_psnrhvs(const YV12_BUFFER_CONFIG * src,const YV12_BUFFER_CONFIG * dest,double * y_psnrhvs,double * u_psnrhvs,double * v_psnrhvs,uint32_t bd,uint32_t in_bd)251 double vpx_psnrhvs(const YV12_BUFFER_CONFIG *src,
252                    const YV12_BUFFER_CONFIG *dest, double *y_psnrhvs,
253                    double *u_psnrhvs, double *v_psnrhvs, uint32_t bd,
254                    uint32_t in_bd) {
255   double psnrhvs;
256   const double par = 1.0;
257   const int step = 7;
258   uint32_t bd_shift = 0;
259   vpx_clear_system_state();
260 
261   assert(bd == 8 || bd == 10 || bd == 12);
262   assert(bd >= in_bd);
263 
264   bd_shift = bd - in_bd;
265 
266   *y_psnrhvs = calc_psnrhvs(src->y_buffer, src->y_stride, dest->y_buffer,
267                             dest->y_stride, par, src->y_crop_width,
268                             src->y_crop_height, step, csf_y, bd, bd_shift);
269   *u_psnrhvs = calc_psnrhvs(src->u_buffer, src->uv_stride, dest->u_buffer,
270                             dest->uv_stride, par, src->uv_crop_width,
271                             src->uv_crop_height, step, csf_cb420, bd, bd_shift);
272   *v_psnrhvs = calc_psnrhvs(src->v_buffer, src->uv_stride, dest->v_buffer,
273                             dest->uv_stride, par, src->uv_crop_width,
274                             src->uv_crop_height, step, csf_cr420, bd, bd_shift);
275   psnrhvs = (*y_psnrhvs) * .8 + .1 * ((*u_psnrhvs) + (*v_psnrhvs));
276   return convert_score_db(psnrhvs, 1.0, in_bd);
277 }
278