1 /*
2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <assert.h>
12
13 #include "vp9/common/vp9_enums.h"
14 #include "vpx_dsp/mips/inv_txfm_msa.h"
15
vp9_iht4x4_16_add_msa(const int16_t * input,uint8_t * dst,int32_t dst_stride,int32_t tx_type)16 void vp9_iht4x4_16_add_msa(const int16_t *input, uint8_t *dst,
17 int32_t dst_stride, int32_t tx_type) {
18 v8i16 in0, in1, in2, in3;
19
20 /* load vector elements of 4x4 block */
21 LD4x4_SH(input, in0, in1, in2, in3);
22 TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
23
24 switch (tx_type) {
25 case DCT_DCT:
26 /* DCT in horizontal */
27 VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
28 /* DCT in vertical */
29 TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
30 VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
31 break;
32 case ADST_DCT:
33 /* DCT in horizontal */
34 VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
35 /* ADST in vertical */
36 TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
37 VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
38 break;
39 case DCT_ADST:
40 /* ADST in horizontal */
41 VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
42 /* DCT in vertical */
43 TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
44 VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
45 break;
46 case ADST_ADST:
47 /* ADST in horizontal */
48 VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
49 /* ADST in vertical */
50 TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
51 VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
52 break;
53 default:
54 assert(0);
55 break;
56 }
57
58 /* final rounding (add 2^3, divide by 2^4) and shift */
59 SRARI_H4_SH(in0, in1, in2, in3, 4);
60 /* add block and store 4x4 */
61 ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride);
62 }
63