1 /*
2  *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <assert.h>
12 #include <stdio.h>
13 
14 #include "./vpx_config.h"
15 #include "./vp9_rtcd.h"
16 #include "vp9/common/vp9_common.h"
17 #include "vp9/common/vp9_blockd.h"
18 #include "vp9/common/vp9_idct.h"
19 #include "vpx_dsp/mips/inv_txfm_dspr2.h"
20 #include "vpx_dsp/txfm_common.h"
21 #include "vpx_ports/mem.h"
22 
23 #if HAVE_DSPR2
vp9_iht16x16_256_add_dspr2(const int16_t * input,uint8_t * dest,int pitch,int tx_type)24 void vp9_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest,
25                                 int pitch, int tx_type) {
26   int i, j;
27   DECLARE_ALIGNED(32, int16_t,  out[16 * 16]);
28   int16_t *outptr = out;
29   int16_t temp_out[16];
30   uint32_t pos = 45;
31 
32   /* bit positon for extract from acc */
33   __asm__ __volatile__ (
34     "wrdsp    %[pos],    1    \n\t"
35     :
36     : [pos] "r" (pos)
37   );
38 
39   switch (tx_type) {
40     case DCT_DCT:     // DCT in both horizontal and vertical
41       idct16_rows_dspr2(input, outptr, 16);
42       idct16_cols_add_blk_dspr2(out, dest, pitch);
43       break;
44     case ADST_DCT:    // ADST in vertical, DCT in horizontal
45       idct16_rows_dspr2(input, outptr, 16);
46 
47       outptr = out;
48 
49       for (i = 0; i < 16; ++i) {
50         iadst16_dspr2(outptr, temp_out);
51 
52         for (j = 0; j < 16; ++j)
53           dest[j * pitch + i] =
54                     clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
55                                       + dest[j * pitch + i]);
56         outptr += 16;
57       }
58       break;
59     case DCT_ADST:    // DCT in vertical, ADST in horizontal
60     {
61       int16_t temp_in[16 * 16];
62 
63       for (i = 0; i < 16; ++i) {
64         /* prefetch row */
65         prefetch_load((const uint8_t *)(input + 16));
66 
67         iadst16_dspr2(input, outptr);
68         input += 16;
69         outptr += 16;
70       }
71 
72       for (i = 0; i < 16; ++i)
73         for (j = 0; j < 16; ++j)
74             temp_in[j * 16 + i] = out[i * 16 + j];
75 
76       idct16_cols_add_blk_dspr2(temp_in, dest, pitch);
77     }
78     break;
79     case ADST_ADST:   // ADST in both directions
80     {
81       int16_t temp_in[16];
82 
83       for (i = 0; i < 16; ++i) {
84         /* prefetch row */
85         prefetch_load((const uint8_t *)(input + 16));
86 
87         iadst16_dspr2(input, outptr);
88         input += 16;
89         outptr += 16;
90       }
91 
92       for (i = 0; i < 16; ++i) {
93         for (j = 0; j < 16; ++j)
94           temp_in[j] = out[j * 16 + i];
95         iadst16_dspr2(temp_in, temp_out);
96         for (j = 0; j < 16; ++j)
97           dest[j * pitch + i] =
98                     clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
99                                       + dest[j * pitch + i]);
100       }
101     }
102     break;
103     default:
104       printf("vp9_short_iht16x16_add_dspr2 : Invalid tx_type\n");
105       break;
106   }
107 }
108 #endif  // #if HAVE_DSPR2
109