1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <math.h>
12 
13 #include "./vp9_rtcd.h"
14 #include "./vpx_dsp_rtcd.h"
15 #include "vp9/common/vp9_blockd.h"
16 #include "vp9/common/vp9_idct.h"
17 #include "vpx_dsp/inv_txfm.h"
18 #include "vpx_ports/mem.h"
19 
vp9_iht4x4_16_add_c(const tran_low_t * input,uint8_t * dest,int stride,int tx_type)20 void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
21                          int tx_type) {
22   const transform_2d IHT_4[] = {
23     { idct4_c, idct4_c  },  // DCT_DCT  = 0
24     { iadst4_c, idct4_c  },   // ADST_DCT = 1
25     { idct4_c, iadst4_c },    // DCT_ADST = 2
26     { iadst4_c, iadst4_c }      // ADST_ADST = 3
27   };
28 
29   int i, j;
30   tran_low_t out[4 * 4];
31   tran_low_t *outptr = out;
32   tran_low_t temp_in[4], temp_out[4];
33 
34   // inverse transform row vectors
35   for (i = 0; i < 4; ++i) {
36     IHT_4[tx_type].rows(input, outptr);
37     input  += 4;
38     outptr += 4;
39   }
40 
41   // inverse transform column vectors
42   for (i = 0; i < 4; ++i) {
43     for (j = 0; j < 4; ++j)
44       temp_in[j] = out[j * 4 + i];
45     IHT_4[tx_type].cols(temp_in, temp_out);
46     for (j = 0; j < 4; ++j) {
47       dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
48                                             ROUND_POWER_OF_TWO(temp_out[j], 4));
49     }
50   }
51 }
52 
53 static const transform_2d IHT_8[] = {
54   { idct8_c,  idct8_c  },  // DCT_DCT  = 0
55   { iadst8_c, idct8_c  },  // ADST_DCT = 1
56   { idct8_c,  iadst8_c },  // DCT_ADST = 2
57   { iadst8_c, iadst8_c }   // ADST_ADST = 3
58 };
59 
vp9_iht8x8_64_add_c(const tran_low_t * input,uint8_t * dest,int stride,int tx_type)60 void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
61                          int tx_type) {
62   int i, j;
63   tran_low_t out[8 * 8];
64   tran_low_t *outptr = out;
65   tran_low_t temp_in[8], temp_out[8];
66   const transform_2d ht = IHT_8[tx_type];
67 
68   // inverse transform row vectors
69   for (i = 0; i < 8; ++i) {
70     ht.rows(input, outptr);
71     input += 8;
72     outptr += 8;
73   }
74 
75   // inverse transform column vectors
76   for (i = 0; i < 8; ++i) {
77     for (j = 0; j < 8; ++j)
78       temp_in[j] = out[j * 8 + i];
79     ht.cols(temp_in, temp_out);
80     for (j = 0; j < 8; ++j) {
81       dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
82                                             ROUND_POWER_OF_TWO(temp_out[j], 5));
83     }
84   }
85 }
86 
87 static const transform_2d IHT_16[] = {
88   { idct16_c,  idct16_c  },  // DCT_DCT  = 0
89   { iadst16_c, idct16_c  },  // ADST_DCT = 1
90   { idct16_c,  iadst16_c },  // DCT_ADST = 2
91   { iadst16_c, iadst16_c }   // ADST_ADST = 3
92 };
93 
vp9_iht16x16_256_add_c(const tran_low_t * input,uint8_t * dest,int stride,int tx_type)94 void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
95                             int tx_type) {
96   int i, j;
97   tran_low_t out[16 * 16];
98   tran_low_t *outptr = out;
99   tran_low_t temp_in[16], temp_out[16];
100   const transform_2d ht = IHT_16[tx_type];
101 
102   // Rows
103   for (i = 0; i < 16; ++i) {
104     ht.rows(input, outptr);
105     input += 16;
106     outptr += 16;
107   }
108 
109   // Columns
110   for (i = 0; i < 16; ++i) {
111     for (j = 0; j < 16; ++j)
112       temp_in[j] = out[j * 16 + i];
113     ht.cols(temp_in, temp_out);
114     for (j = 0; j < 16; ++j) {
115       dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
116                                             ROUND_POWER_OF_TWO(temp_out[j], 6));
117     }
118   }
119 }
120 
121 // idct
vp9_idct4x4_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)122 void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
123                      int eob) {
124   if (eob > 1)
125     vpx_idct4x4_16_add(input, dest, stride);
126   else
127     vpx_idct4x4_1_add(input, dest, stride);
128 }
129 
130 
vp9_iwht4x4_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)131 void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
132                      int eob) {
133   if (eob > 1)
134     vpx_iwht4x4_16_add(input, dest, stride);
135   else
136     vpx_iwht4x4_1_add(input, dest, stride);
137 }
138 
vp9_idct8x8_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)139 void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
140                      int eob) {
141   // If dc is 1, then input[0] is the reconstructed value, do not need
142   // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
143 
144   // The calculation can be simplified if there are not many non-zero dct
145   // coefficients. Use eobs to decide what to do.
146   // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c.
147   // Combine that with code here.
148   if (eob == 1)
149     // DC only DCT coefficient
150     vpx_idct8x8_1_add(input, dest, stride);
151   else if (eob <= 12)
152     vpx_idct8x8_12_add(input, dest, stride);
153   else
154     vpx_idct8x8_64_add(input, dest, stride);
155 }
156 
vp9_idct16x16_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)157 void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
158                        int eob) {
159   /* The calculation can be simplified if there are not many non-zero dct
160    * coefficients. Use eobs to separate different cases. */
161   if (eob == 1)
162     /* DC only DCT coefficient. */
163     vpx_idct16x16_1_add(input, dest, stride);
164   else if (eob <= 10)
165     vpx_idct16x16_10_add(input, dest, stride);
166   else
167     vpx_idct16x16_256_add(input, dest, stride);
168 }
169 
vp9_idct32x32_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)170 void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
171                        int eob) {
172   if (eob == 1)
173     vpx_idct32x32_1_add(input, dest, stride);
174   else if (eob <= 34)
175     // non-zero coeff only in upper-left 8x8
176     vpx_idct32x32_34_add(input, dest, stride);
177   else
178     vpx_idct32x32_1024_add(input, dest, stride);
179 }
180 
181 // iht
vp9_iht4x4_add(TX_TYPE tx_type,const tran_low_t * input,uint8_t * dest,int stride,int eob)182 void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
183                     int stride, int eob) {
184   if (tx_type == DCT_DCT)
185     vp9_idct4x4_add(input, dest, stride, eob);
186   else
187     vp9_iht4x4_16_add(input, dest, stride, tx_type);
188 }
189 
vp9_iht8x8_add(TX_TYPE tx_type,const tran_low_t * input,uint8_t * dest,int stride,int eob)190 void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
191                     int stride, int eob) {
192   if (tx_type == DCT_DCT) {
193     vp9_idct8x8_add(input, dest, stride, eob);
194   } else {
195     vp9_iht8x8_64_add(input, dest, stride, tx_type);
196   }
197 }
198 
vp9_iht16x16_add(TX_TYPE tx_type,const tran_low_t * input,uint8_t * dest,int stride,int eob)199 void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
200                       int stride, int eob) {
201   if (tx_type == DCT_DCT) {
202     vp9_idct16x16_add(input, dest, stride, eob);
203   } else {
204     vp9_iht16x16_256_add(input, dest, stride, tx_type);
205   }
206 }
207 
208 #if CONFIG_VP9_HIGHBITDEPTH
vp9_highbd_iht4x4_16_add_c(const tran_low_t * input,uint8_t * dest8,int stride,int tx_type,int bd)209 void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
210                                 int stride, int tx_type, int bd) {
211   const highbd_transform_2d IHT_4[] = {
212     { vpx_highbd_idct4_c, vpx_highbd_idct4_c  },    // DCT_DCT  = 0
213     { vpx_highbd_iadst4_c, vpx_highbd_idct4_c },    // ADST_DCT = 1
214     { vpx_highbd_idct4_c, vpx_highbd_iadst4_c },    // DCT_ADST = 2
215     { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }    // ADST_ADST = 3
216   };
217   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
218 
219   int i, j;
220   tran_low_t out[4 * 4];
221   tran_low_t *outptr = out;
222   tran_low_t temp_in[4], temp_out[4];
223 
224   // Inverse transform row vectors.
225   for (i = 0; i < 4; ++i) {
226     IHT_4[tx_type].rows(input, outptr, bd);
227     input  += 4;
228     outptr += 4;
229   }
230 
231   // Inverse transform column vectors.
232   for (i = 0; i < 4; ++i) {
233     for (j = 0; j < 4; ++j)
234       temp_in[j] = out[j * 4 + i];
235     IHT_4[tx_type].cols(temp_in, temp_out, bd);
236     for (j = 0; j < 4; ++j) {
237       dest[j * stride + i] = highbd_clip_pixel_add(
238           dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
239     }
240   }
241 }
242 
243 static const highbd_transform_2d HIGH_IHT_8[] = {
244   { vpx_highbd_idct8_c,  vpx_highbd_idct8_c  },  // DCT_DCT  = 0
245   { vpx_highbd_iadst8_c, vpx_highbd_idct8_c  },  // ADST_DCT = 1
246   { vpx_highbd_idct8_c,  vpx_highbd_iadst8_c },  // DCT_ADST = 2
247   { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }   // ADST_ADST = 3
248 };
249 
vp9_highbd_iht8x8_64_add_c(const tran_low_t * input,uint8_t * dest8,int stride,int tx_type,int bd)250 void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
251                                 int stride, int tx_type, int bd) {
252   int i, j;
253   tran_low_t out[8 * 8];
254   tran_low_t *outptr = out;
255   tran_low_t temp_in[8], temp_out[8];
256   const highbd_transform_2d ht = HIGH_IHT_8[tx_type];
257   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
258 
259   // Inverse transform row vectors.
260   for (i = 0; i < 8; ++i) {
261     ht.rows(input, outptr, bd);
262     input += 8;
263     outptr += 8;
264   }
265 
266   // Inverse transform column vectors.
267   for (i = 0; i < 8; ++i) {
268     for (j = 0; j < 8; ++j)
269       temp_in[j] = out[j * 8 + i];
270     ht.cols(temp_in, temp_out, bd);
271     for (j = 0; j < 8; ++j) {
272       dest[j * stride + i] = highbd_clip_pixel_add(
273           dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
274     }
275   }
276 }
277 
278 static const highbd_transform_2d HIGH_IHT_16[] = {
279   { vpx_highbd_idct16_c,  vpx_highbd_idct16_c  },  // DCT_DCT  = 0
280   { vpx_highbd_iadst16_c, vpx_highbd_idct16_c  },  // ADST_DCT = 1
281   { vpx_highbd_idct16_c,  vpx_highbd_iadst16_c },  // DCT_ADST = 2
282   { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }   // ADST_ADST = 3
283 };
284 
vp9_highbd_iht16x16_256_add_c(const tran_low_t * input,uint8_t * dest8,int stride,int tx_type,int bd)285 void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
286                                    int stride, int tx_type, int bd) {
287   int i, j;
288   tran_low_t out[16 * 16];
289   tran_low_t *outptr = out;
290   tran_low_t temp_in[16], temp_out[16];
291   const highbd_transform_2d ht = HIGH_IHT_16[tx_type];
292   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
293 
294   // Rows
295   for (i = 0; i < 16; ++i) {
296     ht.rows(input, outptr, bd);
297     input += 16;
298     outptr += 16;
299   }
300 
301   // Columns
302   for (i = 0; i < 16; ++i) {
303     for (j = 0; j < 16; ++j)
304       temp_in[j] = out[j * 16 + i];
305     ht.cols(temp_in, temp_out, bd);
306     for (j = 0; j < 16; ++j) {
307       dest[j * stride + i] = highbd_clip_pixel_add(
308           dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
309     }
310   }
311 }
312 
313 // idct
vp9_highbd_idct4x4_add(const tran_low_t * input,uint8_t * dest,int stride,int eob,int bd)314 void vp9_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
315                             int eob, int bd) {
316   if (eob > 1)
317     vpx_highbd_idct4x4_16_add(input, dest, stride, bd);
318   else
319     vpx_highbd_idct4x4_1_add(input, dest, stride, bd);
320 }
321 
322 
vp9_highbd_iwht4x4_add(const tran_low_t * input,uint8_t * dest,int stride,int eob,int bd)323 void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
324                             int eob, int bd) {
325   if (eob > 1)
326     vpx_highbd_iwht4x4_16_add(input, dest, stride, bd);
327   else
328     vpx_highbd_iwht4x4_1_add(input, dest, stride, bd);
329 }
330 
vp9_highbd_idct8x8_add(const tran_low_t * input,uint8_t * dest,int stride,int eob,int bd)331 void vp9_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
332                             int eob, int bd) {
333   // If dc is 1, then input[0] is the reconstructed value, do not need
334   // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
335 
336   // The calculation can be simplified if there are not many non-zero dct
337   // coefficients. Use eobs to decide what to do.
338   // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c.
339   // Combine that with code here.
340   // DC only DCT coefficient
341   if (eob == 1) {
342     vpx_highbd_idct8x8_1_add(input, dest, stride, bd);
343   } else if (eob <= 10) {
344     vpx_highbd_idct8x8_10_add(input, dest, stride, bd);
345   } else {
346     vpx_highbd_idct8x8_64_add(input, dest, stride, bd);
347   }
348 }
349 
vp9_highbd_idct16x16_add(const tran_low_t * input,uint8_t * dest,int stride,int eob,int bd)350 void vp9_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest,
351                               int stride, int eob, int bd) {
352   // The calculation can be simplified if there are not many non-zero dct
353   // coefficients. Use eobs to separate different cases.
354   // DC only DCT coefficient.
355   if (eob == 1) {
356     vpx_highbd_idct16x16_1_add(input, dest, stride, bd);
357   } else if (eob <= 10) {
358     vpx_highbd_idct16x16_10_add(input, dest, stride, bd);
359   } else {
360     vpx_highbd_idct16x16_256_add(input, dest, stride, bd);
361   }
362 }
363 
vp9_highbd_idct32x32_add(const tran_low_t * input,uint8_t * dest,int stride,int eob,int bd)364 void vp9_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest,
365                               int stride, int eob, int bd) {
366   // Non-zero coeff only in upper-left 8x8
367   if (eob == 1) {
368     vpx_highbd_idct32x32_1_add(input, dest, stride, bd);
369   } else if (eob <= 34) {
370     vpx_highbd_idct32x32_34_add(input, dest, stride, bd);
371   } else {
372     vpx_highbd_idct32x32_1024_add(input, dest, stride, bd);
373   }
374 }
375 
376 // iht
vp9_highbd_iht4x4_add(TX_TYPE tx_type,const tran_low_t * input,uint8_t * dest,int stride,int eob,int bd)377 void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input,
378                            uint8_t *dest, int stride, int eob, int bd) {
379   if (tx_type == DCT_DCT)
380     vp9_highbd_idct4x4_add(input, dest, stride, eob, bd);
381   else
382     vp9_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd);
383 }
384 
vp9_highbd_iht8x8_add(TX_TYPE tx_type,const tran_low_t * input,uint8_t * dest,int stride,int eob,int bd)385 void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,
386                            uint8_t *dest, int stride, int eob, int bd) {
387   if (tx_type == DCT_DCT) {
388     vp9_highbd_idct8x8_add(input, dest, stride, eob, bd);
389   } else {
390     vp9_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd);
391   }
392 }
393 
vp9_highbd_iht16x16_add(TX_TYPE tx_type,const tran_low_t * input,uint8_t * dest,int stride,int eob,int bd)394 void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
395                            uint8_t *dest, int stride, int eob, int bd) {
396   if (tx_type == DCT_DCT) {
397     vp9_highbd_idct16x16_add(input, dest, stride, eob, bd);
398   } else {
399     vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd);
400   }
401 }
402 #endif  // CONFIG_VP9_HIGHBITDEPTH
403