1 /*
2  *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <assert.h>
12 #include <stdio.h>
13 
14 #include "./vpx_config.h"
15 #include "./vp9_rtcd.h"
16 #include "vp9/common/vp9_common.h"
17 #include "vp9/common/vp9_blockd.h"
18 #include "vp9/common/vp9_idct.h"
19 #include "vpx_dsp/mips/inv_txfm_dspr2.h"
20 #include "vpx_dsp/txfm_common.h"
21 #include "vpx_ports/mem.h"
22 
23 #if HAVE_DSPR2
vp9_iht4x4_16_add_dspr2(const int16_t * input,uint8_t * dest,int dest_stride,int tx_type)24 void vp9_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest,
25                              int dest_stride, int tx_type) {
26   int i, j;
27   DECLARE_ALIGNED(32, int16_t, out[4 * 4]);
28   int16_t *outptr = out;
29   int16_t temp_in[4 * 4], temp_out[4];
30   uint32_t pos = 45;
31 
32   /* bit positon for extract from acc */
33   __asm__ __volatile__ (
34     "wrdsp      %[pos],     1           \n\t"
35     :
36     : [pos] "r" (pos)
37   );
38 
39   switch (tx_type) {
40     case DCT_DCT:   // DCT in both horizontal and vertical
41       vpx_idct4_rows_dspr2(input, outptr);
42       vpx_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride);
43       break;
44     case ADST_DCT:  // ADST in vertical, DCT in horizontal
45       vpx_idct4_rows_dspr2(input, outptr);
46 
47       outptr = out;
48 
49       for (i = 0; i < 4; ++i) {
50         iadst4_dspr2(outptr, temp_out);
51 
52         for (j = 0; j < 4; ++j)
53           dest[j * dest_stride + i] =
54                     clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)
55                                       + dest[j * dest_stride + i]);
56 
57         outptr += 4;
58       }
59       break;
60     case DCT_ADST:  // DCT in vertical, ADST in horizontal
61       for (i = 0; i < 4; ++i) {
62         iadst4_dspr2(input, outptr);
63         input  += 4;
64         outptr += 4;
65       }
66 
67       for (i = 0; i < 4; ++i) {
68         for (j = 0; j < 4; ++j) {
69           temp_in[i * 4 + j] = out[j * 4 + i];
70         }
71       }
72       vpx_idct4_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);
73       break;
74     case ADST_ADST:  // ADST in both directions
75       for (i = 0; i < 4; ++i) {
76         iadst4_dspr2(input, outptr);
77         input  += 4;
78         outptr += 4;
79       }
80 
81       for (i = 0; i < 4; ++i) {
82         for (j = 0; j < 4; ++j)
83           temp_in[j] = out[j * 4 + i];
84         iadst4_dspr2(temp_in, temp_out);
85 
86         for (j = 0; j < 4; ++j)
87           dest[j * dest_stride + i] =
88                   clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)
89                                       + dest[j * dest_stride + i]);
90       }
91       break;
92     default:
93       printf("vp9_short_iht4x4_add_dspr2 : Invalid tx_type\n");
94       break;
95   }
96 }
97 #endif  // #if HAVE_DSPR2
98