1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <math.h>
12
13 #include "./vp9_rtcd.h"
14 #include "./vpx_dsp_rtcd.h"
15 #include "vp9/common/vp9_blockd.h"
16 #include "vp9/common/vp9_idct.h"
17 #include "vpx_dsp/inv_txfm.h"
18 #include "vpx_ports/mem.h"
19
vp9_iht4x4_16_add_c(const tran_low_t * input,uint8_t * dest,int stride,int tx_type)20 void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
21 int tx_type) {
22 const transform_2d IHT_4[] = {
23 { idct4_c, idct4_c }, // DCT_DCT = 0
24 { iadst4_c, idct4_c }, // ADST_DCT = 1
25 { idct4_c, iadst4_c }, // DCT_ADST = 2
26 { iadst4_c, iadst4_c } // ADST_ADST = 3
27 };
28
29 int i, j;
30 tran_low_t out[4 * 4];
31 tran_low_t *outptr = out;
32 tran_low_t temp_in[4], temp_out[4];
33
34 // inverse transform row vectors
35 for (i = 0; i < 4; ++i) {
36 IHT_4[tx_type].rows(input, outptr);
37 input += 4;
38 outptr += 4;
39 }
40
41 // inverse transform column vectors
42 for (i = 0; i < 4; ++i) {
43 for (j = 0; j < 4; ++j)
44 temp_in[j] = out[j * 4 + i];
45 IHT_4[tx_type].cols(temp_in, temp_out);
46 for (j = 0; j < 4; ++j) {
47 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
48 ROUND_POWER_OF_TWO(temp_out[j], 4));
49 }
50 }
51 }
52
53 static const transform_2d IHT_8[] = {
54 { idct8_c, idct8_c }, // DCT_DCT = 0
55 { iadst8_c, idct8_c }, // ADST_DCT = 1
56 { idct8_c, iadst8_c }, // DCT_ADST = 2
57 { iadst8_c, iadst8_c } // ADST_ADST = 3
58 };
59
vp9_iht8x8_64_add_c(const tran_low_t * input,uint8_t * dest,int stride,int tx_type)60 void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
61 int tx_type) {
62 int i, j;
63 tran_low_t out[8 * 8];
64 tran_low_t *outptr = out;
65 tran_low_t temp_in[8], temp_out[8];
66 const transform_2d ht = IHT_8[tx_type];
67
68 // inverse transform row vectors
69 for (i = 0; i < 8; ++i) {
70 ht.rows(input, outptr);
71 input += 8;
72 outptr += 8;
73 }
74
75 // inverse transform column vectors
76 for (i = 0; i < 8; ++i) {
77 for (j = 0; j < 8; ++j)
78 temp_in[j] = out[j * 8 + i];
79 ht.cols(temp_in, temp_out);
80 for (j = 0; j < 8; ++j) {
81 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
82 ROUND_POWER_OF_TWO(temp_out[j], 5));
83 }
84 }
85 }
86
87 static const transform_2d IHT_16[] = {
88 { idct16_c, idct16_c }, // DCT_DCT = 0
89 { iadst16_c, idct16_c }, // ADST_DCT = 1
90 { idct16_c, iadst16_c }, // DCT_ADST = 2
91 { iadst16_c, iadst16_c } // ADST_ADST = 3
92 };
93
vp9_iht16x16_256_add_c(const tran_low_t * input,uint8_t * dest,int stride,int tx_type)94 void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
95 int tx_type) {
96 int i, j;
97 tran_low_t out[16 * 16];
98 tran_low_t *outptr = out;
99 tran_low_t temp_in[16], temp_out[16];
100 const transform_2d ht = IHT_16[tx_type];
101
102 // Rows
103 for (i = 0; i < 16; ++i) {
104 ht.rows(input, outptr);
105 input += 16;
106 outptr += 16;
107 }
108
109 // Columns
110 for (i = 0; i < 16; ++i) {
111 for (j = 0; j < 16; ++j)
112 temp_in[j] = out[j * 16 + i];
113 ht.cols(temp_in, temp_out);
114 for (j = 0; j < 16; ++j) {
115 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
116 ROUND_POWER_OF_TWO(temp_out[j], 6));
117 }
118 }
119 }
120
121 // idct
vp9_idct4x4_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)122 void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
123 int eob) {
124 if (eob > 1)
125 vpx_idct4x4_16_add(input, dest, stride);
126 else
127 vpx_idct4x4_1_add(input, dest, stride);
128 }
129
130
vp9_iwht4x4_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)131 void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
132 int eob) {
133 if (eob > 1)
134 vpx_iwht4x4_16_add(input, dest, stride);
135 else
136 vpx_iwht4x4_1_add(input, dest, stride);
137 }
138
vp9_idct8x8_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)139 void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
140 int eob) {
141 // If dc is 1, then input[0] is the reconstructed value, do not need
142 // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
143
144 // The calculation can be simplified if there are not many non-zero dct
145 // coefficients. Use eobs to decide what to do.
146 // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c.
147 // Combine that with code here.
148 if (eob == 1)
149 // DC only DCT coefficient
150 vpx_idct8x8_1_add(input, dest, stride);
151 else if (eob <= 12)
152 vpx_idct8x8_12_add(input, dest, stride);
153 else
154 vpx_idct8x8_64_add(input, dest, stride);
155 }
156
vp9_idct16x16_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)157 void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
158 int eob) {
159 /* The calculation can be simplified if there are not many non-zero dct
160 * coefficients. Use eobs to separate different cases. */
161 if (eob == 1)
162 /* DC only DCT coefficient. */
163 vpx_idct16x16_1_add(input, dest, stride);
164 else if (eob <= 10)
165 vpx_idct16x16_10_add(input, dest, stride);
166 else
167 vpx_idct16x16_256_add(input, dest, stride);
168 }
169
vp9_idct32x32_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)170 void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
171 int eob) {
172 if (eob == 1)
173 vpx_idct32x32_1_add(input, dest, stride);
174 else if (eob <= 34)
175 // non-zero coeff only in upper-left 8x8
176 vpx_idct32x32_34_add(input, dest, stride);
177 else
178 vpx_idct32x32_1024_add(input, dest, stride);
179 }
180
181 // iht
vp9_iht4x4_add(TX_TYPE tx_type,const tran_low_t * input,uint8_t * dest,int stride,int eob)182 void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
183 int stride, int eob) {
184 if (tx_type == DCT_DCT)
185 vp9_idct4x4_add(input, dest, stride, eob);
186 else
187 vp9_iht4x4_16_add(input, dest, stride, tx_type);
188 }
189
vp9_iht8x8_add(TX_TYPE tx_type,const tran_low_t * input,uint8_t * dest,int stride,int eob)190 void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
191 int stride, int eob) {
192 if (tx_type == DCT_DCT) {
193 vp9_idct8x8_add(input, dest, stride, eob);
194 } else {
195 vp9_iht8x8_64_add(input, dest, stride, tx_type);
196 }
197 }
198
vp9_iht16x16_add(TX_TYPE tx_type,const tran_low_t * input,uint8_t * dest,int stride,int eob)199 void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
200 int stride, int eob) {
201 if (tx_type == DCT_DCT) {
202 vp9_idct16x16_add(input, dest, stride, eob);
203 } else {
204 vp9_iht16x16_256_add(input, dest, stride, tx_type);
205 }
206 }
207
208 #if CONFIG_VP9_HIGHBITDEPTH
vp9_highbd_iht4x4_16_add_c(const tran_low_t * input,uint8_t * dest8,int stride,int tx_type,int bd)209 void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
210 int stride, int tx_type, int bd) {
211 const highbd_transform_2d IHT_4[] = {
212 { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0
213 { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1
214 { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2
215 { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c } // ADST_ADST = 3
216 };
217 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
218
219 int i, j;
220 tran_low_t out[4 * 4];
221 tran_low_t *outptr = out;
222 tran_low_t temp_in[4], temp_out[4];
223
224 // Inverse transform row vectors.
225 for (i = 0; i < 4; ++i) {
226 IHT_4[tx_type].rows(input, outptr, bd);
227 input += 4;
228 outptr += 4;
229 }
230
231 // Inverse transform column vectors.
232 for (i = 0; i < 4; ++i) {
233 for (j = 0; j < 4; ++j)
234 temp_in[j] = out[j * 4 + i];
235 IHT_4[tx_type].cols(temp_in, temp_out, bd);
236 for (j = 0; j < 4; ++j) {
237 dest[j * stride + i] = highbd_clip_pixel_add(
238 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
239 }
240 }
241 }
242
243 static const highbd_transform_2d HIGH_IHT_8[] = {
244 { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0
245 { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1
246 { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2
247 { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c } // ADST_ADST = 3
248 };
249
vp9_highbd_iht8x8_64_add_c(const tran_low_t * input,uint8_t * dest8,int stride,int tx_type,int bd)250 void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
251 int stride, int tx_type, int bd) {
252 int i, j;
253 tran_low_t out[8 * 8];
254 tran_low_t *outptr = out;
255 tran_low_t temp_in[8], temp_out[8];
256 const highbd_transform_2d ht = HIGH_IHT_8[tx_type];
257 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
258
259 // Inverse transform row vectors.
260 for (i = 0; i < 8; ++i) {
261 ht.rows(input, outptr, bd);
262 input += 8;
263 outptr += 8;
264 }
265
266 // Inverse transform column vectors.
267 for (i = 0; i < 8; ++i) {
268 for (j = 0; j < 8; ++j)
269 temp_in[j] = out[j * 8 + i];
270 ht.cols(temp_in, temp_out, bd);
271 for (j = 0; j < 8; ++j) {
272 dest[j * stride + i] = highbd_clip_pixel_add(
273 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
274 }
275 }
276 }
277
278 static const highbd_transform_2d HIGH_IHT_16[] = {
279 { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0
280 { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1
281 { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2
282 { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c } // ADST_ADST = 3
283 };
284
vp9_highbd_iht16x16_256_add_c(const tran_low_t * input,uint8_t * dest8,int stride,int tx_type,int bd)285 void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
286 int stride, int tx_type, int bd) {
287 int i, j;
288 tran_low_t out[16 * 16];
289 tran_low_t *outptr = out;
290 tran_low_t temp_in[16], temp_out[16];
291 const highbd_transform_2d ht = HIGH_IHT_16[tx_type];
292 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
293
294 // Rows
295 for (i = 0; i < 16; ++i) {
296 ht.rows(input, outptr, bd);
297 input += 16;
298 outptr += 16;
299 }
300
301 // Columns
302 for (i = 0; i < 16; ++i) {
303 for (j = 0; j < 16; ++j)
304 temp_in[j] = out[j * 16 + i];
305 ht.cols(temp_in, temp_out, bd);
306 for (j = 0; j < 16; ++j) {
307 dest[j * stride + i] = highbd_clip_pixel_add(
308 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
309 }
310 }
311 }
312
313 // idct
vp9_highbd_idct4x4_add(const tran_low_t * input,uint8_t * dest,int stride,int eob,int bd)314 void vp9_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
315 int eob, int bd) {
316 if (eob > 1)
317 vpx_highbd_idct4x4_16_add(input, dest, stride, bd);
318 else
319 vpx_highbd_idct4x4_1_add(input, dest, stride, bd);
320 }
321
322
vp9_highbd_iwht4x4_add(const tran_low_t * input,uint8_t * dest,int stride,int eob,int bd)323 void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
324 int eob, int bd) {
325 if (eob > 1)
326 vpx_highbd_iwht4x4_16_add(input, dest, stride, bd);
327 else
328 vpx_highbd_iwht4x4_1_add(input, dest, stride, bd);
329 }
330
vp9_highbd_idct8x8_add(const tran_low_t * input,uint8_t * dest,int stride,int eob,int bd)331 void vp9_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
332 int eob, int bd) {
333 // If dc is 1, then input[0] is the reconstructed value, do not need
334 // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
335
336 // The calculation can be simplified if there are not many non-zero dct
337 // coefficients. Use eobs to decide what to do.
338 // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c.
339 // Combine that with code here.
340 // DC only DCT coefficient
341 if (eob == 1) {
342 vpx_highbd_idct8x8_1_add(input, dest, stride, bd);
343 } else if (eob <= 10) {
344 vpx_highbd_idct8x8_10_add(input, dest, stride, bd);
345 } else {
346 vpx_highbd_idct8x8_64_add(input, dest, stride, bd);
347 }
348 }
349
vp9_highbd_idct16x16_add(const tran_low_t * input,uint8_t * dest,int stride,int eob,int bd)350 void vp9_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest,
351 int stride, int eob, int bd) {
352 // The calculation can be simplified if there are not many non-zero dct
353 // coefficients. Use eobs to separate different cases.
354 // DC only DCT coefficient.
355 if (eob == 1) {
356 vpx_highbd_idct16x16_1_add(input, dest, stride, bd);
357 } else if (eob <= 10) {
358 vpx_highbd_idct16x16_10_add(input, dest, stride, bd);
359 } else {
360 vpx_highbd_idct16x16_256_add(input, dest, stride, bd);
361 }
362 }
363
vp9_highbd_idct32x32_add(const tran_low_t * input,uint8_t * dest,int stride,int eob,int bd)364 void vp9_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest,
365 int stride, int eob, int bd) {
366 // Non-zero coeff only in upper-left 8x8
367 if (eob == 1) {
368 vpx_highbd_idct32x32_1_add(input, dest, stride, bd);
369 } else if (eob <= 34) {
370 vpx_highbd_idct32x32_34_add(input, dest, stride, bd);
371 } else {
372 vpx_highbd_idct32x32_1024_add(input, dest, stride, bd);
373 }
374 }
375
376 // iht
vp9_highbd_iht4x4_add(TX_TYPE tx_type,const tran_low_t * input,uint8_t * dest,int stride,int eob,int bd)377 void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input,
378 uint8_t *dest, int stride, int eob, int bd) {
379 if (tx_type == DCT_DCT)
380 vp9_highbd_idct4x4_add(input, dest, stride, eob, bd);
381 else
382 vp9_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd);
383 }
384
vp9_highbd_iht8x8_add(TX_TYPE tx_type,const tran_low_t * input,uint8_t * dest,int stride,int eob,int bd)385 void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,
386 uint8_t *dest, int stride, int eob, int bd) {
387 if (tx_type == DCT_DCT) {
388 vp9_highbd_idct8x8_add(input, dest, stride, eob, bd);
389 } else {
390 vp9_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd);
391 }
392 }
393
vp9_highbd_iht16x16_add(TX_TYPE tx_type,const tran_low_t * input,uint8_t * dest,int stride,int eob,int bd)394 void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
395 uint8_t *dest, int stride, int eob, int bd) {
396 if (tx_type == DCT_DCT) {
397 vp9_highbd_idct16x16_add(input, dest, stride, eob, bd);
398 } else {
399 vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd);
400 }
401 }
402 #endif // CONFIG_VP9_HIGHBITDEPTH
403