1 /*
2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "./vp8_rtcd.h"
12 #include "vp8/common/mips/msa/vp8_macros_msa.h"
13 #include "vp8/encoder/block.h"
14
vp8_block_error_msa(int16_t * coeff_ptr,int16_t * dq_coeff_ptr)15 int32_t vp8_block_error_msa(int16_t *coeff_ptr, int16_t *dq_coeff_ptr) {
16 int32_t err = 0;
17 uint32_t loop_cnt;
18 v8i16 coeff, dq_coeff, coeff0, coeff1;
19 v4i32 diff0, diff1;
20 v2i64 err0 = { 0 };
21 v2i64 err1 = { 0 };
22
23 for (loop_cnt = 2; loop_cnt--;) {
24 coeff = LD_SH(coeff_ptr);
25 dq_coeff = LD_SH(dq_coeff_ptr);
26 ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1);
27 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
28 DPADD_SD2_SD(diff0, diff1, err0, err1);
29 coeff_ptr += 8;
30 dq_coeff_ptr += 8;
31 }
32
33 err0 += __msa_splati_d(err0, 1);
34 err1 += __msa_splati_d(err1, 1);
35 err = __msa_copy_s_d(err0, 0);
36 err += __msa_copy_s_d(err1, 0);
37
38 return err;
39 }
40
vp8_mbblock_error_msa(MACROBLOCK * mb,int32_t dc)41 int32_t vp8_mbblock_error_msa(MACROBLOCK *mb, int32_t dc) {
42 BLOCK *be;
43 BLOCKD *bd;
44 int16_t *coeff_ptr, *dq_coeff_ptr;
45 int32_t err = 0;
46 uint32_t loop_cnt;
47 v8i16 coeff, coeff0, coeff1, coeff2, coeff3, coeff4;
48 v8i16 dq_coeff, dq_coeff2, dq_coeff3, dq_coeff4;
49 v4i32 diff0, diff1;
50 v2i64 err0, err1;
51 v16u8 zero = { 0 };
52 v16u8 mask0 = (v16u8)__msa_ldi_b(255);
53
54 if (1 == dc) {
55 mask0 = (v16u8)__msa_insve_w((v4i32)mask0, 0, (v4i32)zero);
56 }
57
58 for (loop_cnt = 0; loop_cnt < 8; ++loop_cnt) {
59 be = &mb->block[2 * loop_cnt];
60 bd = &mb->e_mbd.block[2 * loop_cnt];
61 coeff_ptr = be->coeff;
62 dq_coeff_ptr = bd->dqcoeff;
63 coeff = LD_SH(coeff_ptr);
64 dq_coeff = LD_SH(dq_coeff_ptr);
65 coeff_ptr += 8;
66 dq_coeff_ptr += 8;
67 coeff2 = LD_SH(coeff_ptr);
68 dq_coeff2 = LD_SH(dq_coeff_ptr);
69 be = &mb->block[2 * loop_cnt + 1];
70 bd = &mb->e_mbd.block[2 * loop_cnt + 1];
71 coeff_ptr = be->coeff;
72 dq_coeff_ptr = bd->dqcoeff;
73 coeff3 = LD_SH(coeff_ptr);
74 dq_coeff3 = LD_SH(dq_coeff_ptr);
75 coeff_ptr += 8;
76 dq_coeff_ptr += 8;
77 coeff4 = LD_SH(coeff_ptr);
78 dq_coeff4 = LD_SH(dq_coeff_ptr);
79 ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1);
80 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
81 diff0 = (v4i32)__msa_bmnz_v(zero, (v16u8)diff0, mask0);
82 DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
83 ILVRL_H2_SH(coeff2, dq_coeff2, coeff0, coeff1);
84 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
85 DPADD_SD2_SD(diff0, diff1, err0, err1);
86 err0 += __msa_splati_d(err0, 1);
87 err1 += __msa_splati_d(err1, 1);
88 err += __msa_copy_s_d(err0, 0);
89 err += __msa_copy_s_d(err1, 0);
90
91 ILVRL_H2_SH(coeff3, dq_coeff3, coeff0, coeff1);
92 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
93 diff0 = (v4i32)__msa_bmnz_v(zero, (v16u8)diff0, mask0);
94 DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
95 ILVRL_H2_SH(coeff4, dq_coeff4, coeff0, coeff1);
96 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
97 DPADD_SD2_SD(diff0, diff1, err0, err1);
98 err0 += __msa_splati_d(err0, 1);
99 err1 += __msa_splati_d(err1, 1);
100 err += __msa_copy_s_d(err0, 0);
101 err += __msa_copy_s_d(err1, 0);
102 }
103
104 return err;
105 }
106
vp8_mbuverror_msa(MACROBLOCK * mb)107 int32_t vp8_mbuverror_msa(MACROBLOCK *mb) {
108 BLOCK *be;
109 BLOCKD *bd;
110 int16_t *coeff_ptr, *dq_coeff_ptr;
111 int32_t err = 0;
112 uint32_t loop_cnt;
113 v8i16 coeff, coeff0, coeff1, coeff2, coeff3, coeff4;
114 v8i16 dq_coeff, dq_coeff2, dq_coeff3, dq_coeff4;
115 v4i32 diff0, diff1;
116 v2i64 err0, err1, err_dup0, err_dup1;
117
118 for (loop_cnt = 16; loop_cnt < 24; loop_cnt += 2) {
119 be = &mb->block[loop_cnt];
120 bd = &mb->e_mbd.block[loop_cnt];
121 coeff_ptr = be->coeff;
122 dq_coeff_ptr = bd->dqcoeff;
123 coeff = LD_SH(coeff_ptr);
124 dq_coeff = LD_SH(dq_coeff_ptr);
125 coeff_ptr += 8;
126 dq_coeff_ptr += 8;
127 coeff2 = LD_SH(coeff_ptr);
128 dq_coeff2 = LD_SH(dq_coeff_ptr);
129 be = &mb->block[loop_cnt + 1];
130 bd = &mb->e_mbd.block[loop_cnt + 1];
131 coeff_ptr = be->coeff;
132 dq_coeff_ptr = bd->dqcoeff;
133 coeff3 = LD_SH(coeff_ptr);
134 dq_coeff3 = LD_SH(dq_coeff_ptr);
135 coeff_ptr += 8;
136 dq_coeff_ptr += 8;
137 coeff4 = LD_SH(coeff_ptr);
138 dq_coeff4 = LD_SH(dq_coeff_ptr);
139
140 ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1);
141 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
142 DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
143
144 ILVRL_H2_SH(coeff2, dq_coeff2, coeff0, coeff1);
145 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
146 DPADD_SD2_SD(diff0, diff1, err0, err1);
147 err_dup0 = __msa_splati_d(err0, 1);
148 err_dup1 = __msa_splati_d(err1, 1);
149 ADD2(err0, err_dup0, err1, err_dup1, err0, err1);
150 err += __msa_copy_s_d(err0, 0);
151 err += __msa_copy_s_d(err1, 0);
152
153 ILVRL_H2_SH(coeff3, dq_coeff3, coeff0, coeff1);
154 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
155 DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
156 ILVRL_H2_SH(coeff4, dq_coeff4, coeff0, coeff1);
157 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
158 DPADD_SD2_SD(diff0, diff1, err0, err1);
159 err_dup0 = __msa_splati_d(err0, 1);
160 err_dup1 = __msa_splati_d(err1, 1);
161 ADD2(err0, err_dup0, err1, err_dup1, err0, err1);
162 err += __msa_copy_s_d(err0, 0);
163 err += __msa_copy_s_d(err1, 0);
164 }
165
166 return err;
167 }
168