1 /*
2  *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <assert.h>
12 #include <stdio.h>
13 
14 #include "./vpx_config.h"
15 #include "./vp9_rtcd.h"
16 #include "vp9/common/vp9_common.h"
17 #include "vp9/common/vp9_blockd.h"
18 #include "vp9/common/vp9_idct.h"
19 #include "vpx_dsp/mips/inv_txfm_dspr2.h"
20 #include "vpx_dsp/txfm_common.h"
21 #include "vpx_ports/mem.h"
22 
23 #if HAVE_DSPR2
vp9_iht4x4_16_add_dspr2(const int16_t * input,uint8_t * dest,int stride,int tx_type)24 void vp9_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, int stride,
25                              int tx_type) {
26   int i, j;
27   DECLARE_ALIGNED(32, int16_t, out[4 * 4]);
28   int16_t *outptr = out;
29   int16_t temp_in[4 * 4], temp_out[4];
30   uint32_t pos = 45;
31 
32   /* bit positon for extract from acc */
33   __asm__ __volatile__("wrdsp      %[pos],     1           \n\t"
34                        :
35                        : [pos] "r"(pos));
36 
37   switch (tx_type) {
38     case DCT_DCT:  // DCT in both horizontal and vertical
39       vpx_idct4_rows_dspr2(input, outptr);
40       vpx_idct4_columns_add_blk_dspr2(&out[0], dest, stride);
41       break;
42     case ADST_DCT:  // ADST in vertical, DCT in horizontal
43       vpx_idct4_rows_dspr2(input, outptr);
44 
45       outptr = out;
46 
47       for (i = 0; i < 4; ++i) {
48         iadst4_dspr2(outptr, temp_out);
49 
50         for (j = 0; j < 4; ++j)
51           dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) +
52                                             dest[j * stride + i]);
53 
54         outptr += 4;
55       }
56       break;
57     case DCT_ADST:  // DCT in vertical, ADST in horizontal
58       for (i = 0; i < 4; ++i) {
59         iadst4_dspr2(input, outptr);
60         input += 4;
61         outptr += 4;
62       }
63 
64       for (i = 0; i < 4; ++i) {
65         for (j = 0; j < 4; ++j) {
66           temp_in[i * 4 + j] = out[j * 4 + i];
67         }
68       }
69       vpx_idct4_columns_add_blk_dspr2(&temp_in[0], dest, stride);
70       break;
71     case ADST_ADST:  // ADST in both directions
72       for (i = 0; i < 4; ++i) {
73         iadst4_dspr2(input, outptr);
74         input += 4;
75         outptr += 4;
76       }
77 
78       for (i = 0; i < 4; ++i) {
79         for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
80         iadst4_dspr2(temp_in, temp_out);
81 
82         for (j = 0; j < 4; ++j)
83           dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) +
84                                             dest[j * stride + i]);
85       }
86       break;
87     default: printf("vp9_short_iht4x4_add_dspr2 : Invalid tx_type\n"); break;
88   }
89 }
90 #endif  // #if HAVE_DSPR2
91