1 /*
2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <assert.h>
12
13 #include "./vp9_rtcd.h"
14 #include "vp9/common/vp9_enums.h"
15 #include "vpx_dsp/mips/inv_txfm_msa.h"
16
vp9_iht8x8_64_add_msa(const int16_t * input,uint8_t * dst,int32_t dst_stride,int32_t tx_type)17 void vp9_iht8x8_64_add_msa(const int16_t *input, uint8_t *dst,
18 int32_t dst_stride, int32_t tx_type) {
19 v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
20
21 /* load vector elements of 8x8 block */
22 LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
23
24 TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
25 in4, in5, in6, in7);
26
27 switch (tx_type) {
28 case DCT_DCT:
29 /* DCT in horizontal */
30 VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
31 in4, in5, in6, in7);
32 /* DCT in vertical */
33 TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
34 in3, in4, in5, in6, in7);
35 VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
36 in4, in5, in6, in7);
37 break;
38 case ADST_DCT:
39 /* DCT in horizontal */
40 VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
41 in4, in5, in6, in7);
42 /* ADST in vertical */
43 TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
44 in3, in4, in5, in6, in7);
45 VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
46 in5, in6, in7);
47 break;
48 case DCT_ADST:
49 /* ADST in horizontal */
50 VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
51 in5, in6, in7);
52 /* DCT in vertical */
53 TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
54 in3, in4, in5, in6, in7);
55 VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
56 in4, in5, in6, in7);
57 break;
58 case ADST_ADST:
59 /* ADST in horizontal */
60 VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
61 in5, in6, in7);
62 /* ADST in vertical */
63 TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
64 in3, in4, in5, in6, in7);
65 VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
66 in5, in6, in7);
67 break;
68 default: assert(0); break;
69 }
70
71 /* final rounding (add 2^4, divide by 2^5) and shift */
72 SRARI_H4_SH(in0, in1, in2, in3, 5);
73 SRARI_H4_SH(in4, in5, in6, in7, 5);
74
75 /* add block and store 8x8 */
76 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
77 dst += (4 * dst_stride);
78 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
79 }
80