1 /*
2  *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <assert.h>
12 
13 #include "vp9/common/vp9_enums.h"
14 #include "vpx_dsp/mips/inv_txfm_msa.h"
15 
vp9_iht8x8_64_add_msa(const int16_t * input,uint8_t * dst,int32_t dst_stride,int32_t tx_type)16 void vp9_iht8x8_64_add_msa(const int16_t *input, uint8_t *dst,
17                            int32_t dst_stride, int32_t tx_type) {
18   v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
19 
20   /* load vector elements of 8x8 block */
21   LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
22 
23   TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
24                      in0, in1, in2, in3, in4, in5, in6, in7);
25 
26   switch (tx_type) {
27     case DCT_DCT:
28       /* DCT in horizontal */
29       VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
30                      in0, in1, in2, in3, in4, in5, in6, in7);
31       /* DCT in vertical */
32       TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
33                          in0, in1, in2, in3, in4, in5, in6, in7);
34       VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
35                      in0, in1, in2, in3, in4, in5, in6, in7);
36       break;
37     case ADST_DCT:
38       /* DCT in horizontal */
39       VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
40                      in0, in1, in2, in3, in4, in5, in6, in7);
41       /* ADST in vertical */
42       TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
43                          in0, in1, in2, in3, in4, in5, in6, in7);
44       VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
45                 in0, in1, in2, in3, in4, in5, in6, in7);
46       break;
47     case DCT_ADST:
48       /* ADST in horizontal */
49       VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
50                 in0, in1, in2, in3, in4, in5, in6, in7);
51       /* DCT in vertical */
52       TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
53                          in0, in1, in2, in3, in4, in5, in6, in7);
54       VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
55                      in0, in1, in2, in3, in4, in5, in6, in7);
56       break;
57     case ADST_ADST:
58       /* ADST in horizontal */
59       VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
60                 in0, in1, in2, in3, in4, in5, in6, in7);
61       /* ADST in vertical */
62       TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
63                          in0, in1, in2, in3, in4, in5, in6, in7);
64       VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
65                 in0, in1, in2, in3, in4, in5, in6, in7);
66       break;
67     default:
68       assert(0);
69       break;
70   }
71 
72   /* final rounding (add 2^4, divide by 2^5) and shift */
73   SRARI_H4_SH(in0, in1, in2, in3, 5);
74   SRARI_H4_SH(in4, in5, in6, in7, 5);
75 
76   /* add block and store 8x8 */
77   VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
78   dst += (4 * dst_stride);
79   VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
80 }
81