1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <math.h>
13 
14 #include "config/aom_dsp_rtcd.h"
15 #include "config/av1_rtcd.h"
16 
17 #include "aom_ports/mem.h"
18 #include "av1/common/av1_inv_txfm1d_cfg.h"
19 #include "av1/common/av1_txfm.h"
20 #include "av1/common/blockd.h"
21 #include "av1/common/enums.h"
22 #include "av1/common/idct.h"
23 
av1_get_tx_scale(const TX_SIZE tx_size)24 int av1_get_tx_scale(const TX_SIZE tx_size) {
25   const int pels = tx_size_2d[tx_size];
26   // Largest possible pels is 4096 (64x64).
27   return (pels > 256) + (pels > 1024);
28 }
29 
30 // NOTE: The implementation of all inverses need to be aware of the fact
31 // that input and output could be the same buffer.
32 
33 // idct
av1_highbd_iwht4x4_add(const tran_low_t * input,uint8_t * dest,int stride,int eob,int bd)34 void av1_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
35                             int eob, int bd) {
36   if (eob > 1)
37     av1_highbd_iwht4x4_16_add(input, dest, stride, bd);
38   else
39     av1_highbd_iwht4x4_1_add(input, dest, stride, bd);
40 }
41 
av1_highbd_inv_txfm_add_4x4_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)42 void av1_highbd_inv_txfm_add_4x4_c(const tran_low_t *input, uint8_t *dest,
43                                    int stride, const TxfmParam *txfm_param) {
44   assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
45   int eob = txfm_param->eob;
46   int bd = txfm_param->bd;
47   int lossless = txfm_param->lossless;
48   const int32_t *src = cast_to_int32(input);
49   const TX_TYPE tx_type = txfm_param->tx_type;
50   if (lossless) {
51     assert(tx_type == DCT_DCT);
52     av1_highbd_iwht4x4_add(input, dest, stride, eob, bd);
53     return;
54   }
55 
56   av1_inv_txfm2d_add_4x4_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type, bd);
57 }
58 
av1_highbd_inv_txfm_add_4x8_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)59 void av1_highbd_inv_txfm_add_4x8_c(const tran_low_t *input, uint8_t *dest,
60                                    int stride, const TxfmParam *txfm_param) {
61   assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
62   const int32_t *src = cast_to_int32(input);
63   av1_inv_txfm2d_add_4x8_c(src, CONVERT_TO_SHORTPTR(dest), stride,
64                            txfm_param->tx_type, txfm_param->bd);
65 }
66 
av1_highbd_inv_txfm_add_8x4_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)67 void av1_highbd_inv_txfm_add_8x4_c(const tran_low_t *input, uint8_t *dest,
68                                    int stride, const TxfmParam *txfm_param) {
69   assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
70   const int32_t *src = cast_to_int32(input);
71   av1_inv_txfm2d_add_8x4_c(src, CONVERT_TO_SHORTPTR(dest), stride,
72                            txfm_param->tx_type, txfm_param->bd);
73 }
74 
av1_highbd_inv_txfm_add_16x32_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)75 void av1_highbd_inv_txfm_add_16x32_c(const tran_low_t *input, uint8_t *dest,
76                                      int stride, const TxfmParam *txfm_param) {
77   const int32_t *src = cast_to_int32(input);
78   av1_inv_txfm2d_add_16x32_c(src, CONVERT_TO_SHORTPTR(dest), stride,
79                              txfm_param->tx_type, txfm_param->bd);
80 }
81 
av1_highbd_inv_txfm_add_32x16_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)82 void av1_highbd_inv_txfm_add_32x16_c(const tran_low_t *input, uint8_t *dest,
83                                      int stride, const TxfmParam *txfm_param) {
84   const int32_t *src = cast_to_int32(input);
85   av1_inv_txfm2d_add_32x16_c(src, CONVERT_TO_SHORTPTR(dest), stride,
86                              txfm_param->tx_type, txfm_param->bd);
87 }
88 
av1_highbd_inv_txfm_add_16x4_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)89 void av1_highbd_inv_txfm_add_16x4_c(const tran_low_t *input, uint8_t *dest,
90                                     int stride, const TxfmParam *txfm_param) {
91   const int32_t *src = cast_to_int32(input);
92   av1_inv_txfm2d_add_16x4_c(src, CONVERT_TO_SHORTPTR(dest), stride,
93                             txfm_param->tx_type, txfm_param->bd);
94 }
95 
av1_highbd_inv_txfm_add_4x16_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)96 void av1_highbd_inv_txfm_add_4x16_c(const tran_low_t *input, uint8_t *dest,
97                                     int stride, const TxfmParam *txfm_param) {
98   const int32_t *src = cast_to_int32(input);
99   av1_inv_txfm2d_add_4x16_c(src, CONVERT_TO_SHORTPTR(dest), stride,
100                             txfm_param->tx_type, txfm_param->bd);
101 }
102 
av1_highbd_inv_txfm_add_32x8_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)103 void av1_highbd_inv_txfm_add_32x8_c(const tran_low_t *input, uint8_t *dest,
104                                     int stride, const TxfmParam *txfm_param) {
105   const int32_t *src = cast_to_int32(input);
106   av1_inv_txfm2d_add_32x8_c(src, CONVERT_TO_SHORTPTR(dest), stride,
107                             txfm_param->tx_type, txfm_param->bd);
108 }
109 
av1_highbd_inv_txfm_add_8x32_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)110 void av1_highbd_inv_txfm_add_8x32_c(const tran_low_t *input, uint8_t *dest,
111                                     int stride, const TxfmParam *txfm_param) {
112   const int32_t *src = cast_to_int32(input);
113   av1_inv_txfm2d_add_8x32_c(src, CONVERT_TO_SHORTPTR(dest), stride,
114                             txfm_param->tx_type, txfm_param->bd);
115 }
116 
av1_highbd_inv_txfm_add_32x64_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)117 void av1_highbd_inv_txfm_add_32x64_c(const tran_low_t *input, uint8_t *dest,
118                                      int stride, const TxfmParam *txfm_param) {
119   const int32_t *src = cast_to_int32(input);
120   av1_inv_txfm2d_add_32x64_c(src, CONVERT_TO_SHORTPTR(dest), stride,
121                              txfm_param->tx_type, txfm_param->bd);
122 }
123 
av1_highbd_inv_txfm_add_64x32_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)124 void av1_highbd_inv_txfm_add_64x32_c(const tran_low_t *input, uint8_t *dest,
125                                      int stride, const TxfmParam *txfm_param) {
126   const int32_t *src = cast_to_int32(input);
127   av1_inv_txfm2d_add_64x32_c(src, CONVERT_TO_SHORTPTR(dest), stride,
128                              txfm_param->tx_type, txfm_param->bd);
129 }
130 
av1_highbd_inv_txfm_add_16x64_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)131 void av1_highbd_inv_txfm_add_16x64_c(const tran_low_t *input, uint8_t *dest,
132                                      int stride, const TxfmParam *txfm_param) {
133   const int32_t *src = cast_to_int32(input);
134   av1_inv_txfm2d_add_16x64_c(src, CONVERT_TO_SHORTPTR(dest), stride,
135                              txfm_param->tx_type, txfm_param->bd);
136 }
137 
av1_highbd_inv_txfm_add_64x16_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)138 void av1_highbd_inv_txfm_add_64x16_c(const tran_low_t *input, uint8_t *dest,
139                                      int stride, const TxfmParam *txfm_param) {
140   const int32_t *src = cast_to_int32(input);
141   av1_inv_txfm2d_add_64x16_c(src, CONVERT_TO_SHORTPTR(dest), stride,
142                              txfm_param->tx_type, txfm_param->bd);
143 }
144 
av1_highbd_inv_txfm_add_8x8_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)145 void av1_highbd_inv_txfm_add_8x8_c(const tran_low_t *input, uint8_t *dest,
146                                    int stride, const TxfmParam *txfm_param) {
147   int bd = txfm_param->bd;
148   const TX_TYPE tx_type = txfm_param->tx_type;
149   const int32_t *src = cast_to_int32(input);
150 
151   av1_inv_txfm2d_add_8x8_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type, bd);
152 }
153 
av1_highbd_inv_txfm_add_16x16_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)154 void av1_highbd_inv_txfm_add_16x16_c(const tran_low_t *input, uint8_t *dest,
155                                      int stride, const TxfmParam *txfm_param) {
156   int bd = txfm_param->bd;
157   const TX_TYPE tx_type = txfm_param->tx_type;
158   const int32_t *src = cast_to_int32(input);
159 
160   av1_inv_txfm2d_add_16x16_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
161                              bd);
162 }
163 
av1_highbd_inv_txfm_add_8x16_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)164 void av1_highbd_inv_txfm_add_8x16_c(const tran_low_t *input, uint8_t *dest,
165                                     int stride, const TxfmParam *txfm_param) {
166   const int32_t *src = cast_to_int32(input);
167   av1_inv_txfm2d_add_8x16_c(src, CONVERT_TO_SHORTPTR(dest), stride,
168                             txfm_param->tx_type, txfm_param->bd);
169 }
170 
av1_highbd_inv_txfm_add_16x8_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)171 void av1_highbd_inv_txfm_add_16x8_c(const tran_low_t *input, uint8_t *dest,
172                                     int stride, const TxfmParam *txfm_param) {
173   const int32_t *src = cast_to_int32(input);
174   av1_inv_txfm2d_add_16x8_c(src, CONVERT_TO_SHORTPTR(dest), stride,
175                             txfm_param->tx_type, txfm_param->bd);
176 }
177 
av1_highbd_inv_txfm_add_32x32_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)178 void av1_highbd_inv_txfm_add_32x32_c(const tran_low_t *input, uint8_t *dest,
179                                      int stride, const TxfmParam *txfm_param) {
180   const int bd = txfm_param->bd;
181   const TX_TYPE tx_type = txfm_param->tx_type;
182   const int32_t *src = cast_to_int32(input);
183 
184   av1_inv_txfm2d_add_32x32_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
185                              bd);
186 }
187 
av1_highbd_inv_txfm_add_64x64_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)188 void av1_highbd_inv_txfm_add_64x64_c(const tran_low_t *input, uint8_t *dest,
189                                      int stride, const TxfmParam *txfm_param) {
190   const int bd = txfm_param->bd;
191   const TX_TYPE tx_type = txfm_param->tx_type;
192   const int32_t *src = cast_to_int32(input);
193   assert(tx_type == DCT_DCT);
194   av1_inv_txfm2d_add_64x64_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
195                              bd);
196 }
197 
init_txfm_param(const MACROBLOCKD * xd,int plane,TX_SIZE tx_size,TX_TYPE tx_type,int eob,int reduced_tx_set,TxfmParam * txfm_param)198 static void init_txfm_param(const MACROBLOCKD *xd, int plane, TX_SIZE tx_size,
199                             TX_TYPE tx_type, int eob, int reduced_tx_set,
200                             TxfmParam *txfm_param) {
201   (void)plane;
202   txfm_param->tx_type = tx_type;
203   txfm_param->tx_size = tx_size;
204   txfm_param->eob = eob;
205   txfm_param->lossless = xd->lossless[xd->mi[0]->segment_id];
206   txfm_param->bd = xd->bd;
207   txfm_param->is_hbd = is_cur_buf_hbd(xd);
208   txfm_param->tx_set_type = av1_get_ext_tx_set_type(
209       txfm_param->tx_size, is_inter_block(xd->mi[0]), reduced_tx_set);
210 }
211 
av1_highbd_inv_txfm_add_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)212 void av1_highbd_inv_txfm_add_c(const tran_low_t *input, uint8_t *dest,
213                                int stride, const TxfmParam *txfm_param) {
214   assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
215   const TX_SIZE tx_size = txfm_param->tx_size;
216   switch (tx_size) {
217     case TX_32X32:
218       av1_highbd_inv_txfm_add_32x32_c(input, dest, stride, txfm_param);
219       break;
220     case TX_16X16:
221       av1_highbd_inv_txfm_add_16x16_c(input, dest, stride, txfm_param);
222       break;
223     case TX_8X8:
224       av1_highbd_inv_txfm_add_8x8_c(input, dest, stride, txfm_param);
225       break;
226     case TX_4X8:
227       av1_highbd_inv_txfm_add_4x8_c(input, dest, stride, txfm_param);
228       break;
229     case TX_8X4:
230       av1_highbd_inv_txfm_add_8x4_c(input, dest, stride, txfm_param);
231       break;
232     case TX_8X16:
233       av1_highbd_inv_txfm_add_8x16_c(input, dest, stride, txfm_param);
234       break;
235     case TX_16X8:
236       av1_highbd_inv_txfm_add_16x8_c(input, dest, stride, txfm_param);
237       break;
238     case TX_16X32:
239       av1_highbd_inv_txfm_add_16x32_c(input, dest, stride, txfm_param);
240       break;
241     case TX_32X16:
242       av1_highbd_inv_txfm_add_32x16_c(input, dest, stride, txfm_param);
243       break;
244     case TX_64X64:
245       av1_highbd_inv_txfm_add_64x64_c(input, dest, stride, txfm_param);
246       break;
247     case TX_32X64:
248       av1_highbd_inv_txfm_add_32x64_c(input, dest, stride, txfm_param);
249       break;
250     case TX_64X32:
251       av1_highbd_inv_txfm_add_64x32_c(input, dest, stride, txfm_param);
252       break;
253     case TX_16X64:
254       av1_highbd_inv_txfm_add_16x64_c(input, dest, stride, txfm_param);
255       break;
256     case TX_64X16:
257       av1_highbd_inv_txfm_add_64x16_c(input, dest, stride, txfm_param);
258       break;
259     case TX_4X4:
260       // this is like av1_short_idct4x4 but has a special case around eob<=1
261       // which is significant (not just an optimization) for the lossless
262       // case.
263       av1_highbd_inv_txfm_add_4x4_c(input, dest, stride, txfm_param);
264       break;
265     case TX_16X4:
266       av1_highbd_inv_txfm_add_16x4_c(input, dest, stride, txfm_param);
267       break;
268     case TX_4X16:
269       av1_highbd_inv_txfm_add_4x16_c(input, dest, stride, txfm_param);
270       break;
271     case TX_8X32:
272       av1_highbd_inv_txfm_add_8x32_c(input, dest, stride, txfm_param);
273       break;
274     case TX_32X8:
275       av1_highbd_inv_txfm_add_32x8_c(input, dest, stride, txfm_param);
276       break;
277     default: assert(0 && "Invalid transform size"); break;
278   }
279 }
280 
av1_inv_txfm_add_c(const tran_low_t * dqcoeff,uint8_t * dst,int stride,const TxfmParam * txfm_param)281 void av1_inv_txfm_add_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
282                         const TxfmParam *txfm_param) {
283   const TX_SIZE tx_size = txfm_param->tx_size;
284   DECLARE_ALIGNED(32, uint16_t, tmp[MAX_TX_SQUARE]);
285   int tmp_stride = MAX_TX_SIZE;
286   int w = tx_size_wide[tx_size];
287   int h = tx_size_high[tx_size];
288   for (int r = 0; r < h; ++r) {
289     for (int c = 0; c < w; ++c) {
290       tmp[r * tmp_stride + c] = dst[r * stride + c];
291     }
292   }
293 
294   av1_highbd_inv_txfm_add(dqcoeff, CONVERT_TO_BYTEPTR(tmp), tmp_stride,
295                           txfm_param);
296 
297   for (int r = 0; r < h; ++r) {
298     for (int c = 0; c < w; ++c) {
299       dst[r * stride + c] = (uint8_t)tmp[r * tmp_stride + c];
300     }
301   }
302 }
303 
av1_inverse_transform_block(const MACROBLOCKD * xd,const tran_low_t * dqcoeff,int plane,TX_TYPE tx_type,TX_SIZE tx_size,uint8_t * dst,int stride,int eob,int reduced_tx_set)304 void av1_inverse_transform_block(const MACROBLOCKD *xd,
305                                  const tran_low_t *dqcoeff, int plane,
306                                  TX_TYPE tx_type, TX_SIZE tx_size, uint8_t *dst,
307                                  int stride, int eob, int reduced_tx_set) {
308   if (!eob) return;
309 
310   assert(eob <= av1_get_max_eob(tx_size));
311 
312   TxfmParam txfm_param;
313   init_txfm_param(xd, plane, tx_size, tx_type, eob, reduced_tx_set,
314                   &txfm_param);
315   assert(av1_ext_tx_used[txfm_param.tx_set_type][txfm_param.tx_type]);
316 
317   if (txfm_param.is_hbd) {
318     av1_highbd_inv_txfm_add(dqcoeff, dst, stride, &txfm_param);
319   } else {
320     av1_inv_txfm_add(dqcoeff, dst, stride, &txfm_param);
321   }
322 }
323