1 /*
2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <assert.h>
12
13 #include "vp9/common/vp9_enums.h"
14 #include "vpx_dsp/mips/inv_txfm_msa.h"
15
vp9_iht8x8_64_add_msa(const int16_t * input,uint8_t * dst,int32_t dst_stride,int32_t tx_type)16 void vp9_iht8x8_64_add_msa(const int16_t *input, uint8_t *dst,
17 int32_t dst_stride, int32_t tx_type) {
18 v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
19
20 /* load vector elements of 8x8 block */
21 LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
22
23 TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
24 in0, in1, in2, in3, in4, in5, in6, in7);
25
26 switch (tx_type) {
27 case DCT_DCT:
28 /* DCT in horizontal */
29 VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
30 in0, in1, in2, in3, in4, in5, in6, in7);
31 /* DCT in vertical */
32 TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
33 in0, in1, in2, in3, in4, in5, in6, in7);
34 VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
35 in0, in1, in2, in3, in4, in5, in6, in7);
36 break;
37 case ADST_DCT:
38 /* DCT in horizontal */
39 VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
40 in0, in1, in2, in3, in4, in5, in6, in7);
41 /* ADST in vertical */
42 TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
43 in0, in1, in2, in3, in4, in5, in6, in7);
44 VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
45 in0, in1, in2, in3, in4, in5, in6, in7);
46 break;
47 case DCT_ADST:
48 /* ADST in horizontal */
49 VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
50 in0, in1, in2, in3, in4, in5, in6, in7);
51 /* DCT in vertical */
52 TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
53 in0, in1, in2, in3, in4, in5, in6, in7);
54 VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
55 in0, in1, in2, in3, in4, in5, in6, in7);
56 break;
57 case ADST_ADST:
58 /* ADST in horizontal */
59 VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
60 in0, in1, in2, in3, in4, in5, in6, in7);
61 /* ADST in vertical */
62 TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
63 in0, in1, in2, in3, in4, in5, in6, in7);
64 VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
65 in0, in1, in2, in3, in4, in5, in6, in7);
66 break;
67 default:
68 assert(0);
69 break;
70 }
71
72 /* final rounding (add 2^4, divide by 2^5) and shift */
73 SRARI_H4_SH(in0, in1, in2, in3, 5);
74 SRARI_H4_SH(in4, in5, in6, in7, 5);
75
76 /* add block and store 8x8 */
77 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
78 dst += (4 * dst_stride);
79 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
80 }
81