1 /*
2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <assert.h>
12
13 #include "./vp9_rtcd.h"
14 #include "vp9/common/vp9_enums.h"
15 #include "vpx_dsp/mips/inv_txfm_msa.h"
16
vp9_iht4x4_16_add_msa(const int16_t * input,uint8_t * dst,int32_t dst_stride,int32_t tx_type)17 void vp9_iht4x4_16_add_msa(const int16_t *input, uint8_t *dst,
18 int32_t dst_stride, int32_t tx_type) {
19 v8i16 in0, in1, in2, in3;
20
21 /* load vector elements of 4x4 block */
22 LD4x4_SH(input, in0, in1, in2, in3);
23 TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
24
25 switch (tx_type) {
26 case DCT_DCT:
27 /* DCT in horizontal */
28 VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
29 /* DCT in vertical */
30 TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
31 VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
32 break;
33 case ADST_DCT:
34 /* DCT in horizontal */
35 VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
36 /* ADST in vertical */
37 TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
38 VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
39 break;
40 case DCT_ADST:
41 /* ADST in horizontal */
42 VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
43 /* DCT in vertical */
44 TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
45 VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
46 break;
47 case ADST_ADST:
48 /* ADST in horizontal */
49 VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
50 /* ADST in vertical */
51 TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
52 VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
53 break;
54 default: assert(0); break;
55 }
56
57 /* final rounding (add 2^3, divide by 2^4) and shift */
58 SRARI_H4_SH(in0, in1, in2, in3, 4);
59 /* add block and store 4x4 */
60 ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride);
61 }
62