1 /*
2  *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "./vp8_rtcd.h"
12 #include "vp8/common/mips/msa/vp8_macros_msa.h"
13 #include "vp8/encoder/block.h"
14 
vp8_block_error_msa(int16_t * coeff_ptr,int16_t * dq_coeff_ptr)15 int32_t vp8_block_error_msa(int16_t *coeff_ptr, int16_t *dq_coeff_ptr) {
16   int32_t err = 0;
17   uint32_t loop_cnt;
18   v8i16 coeff, dq_coeff, coeff0, coeff1;
19   v4i32 diff0, diff1;
20   v2i64 err0 = { 0 };
21   v2i64 err1 = { 0 };
22 
23   for (loop_cnt = 2; loop_cnt--;) {
24     coeff = LD_SH(coeff_ptr);
25     dq_coeff = LD_SH(dq_coeff_ptr);
26     ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1);
27     HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
28     DPADD_SD2_SD(diff0, diff1, err0, err1);
29     coeff_ptr += 8;
30     dq_coeff_ptr += 8;
31   }
32 
33   err0 += __msa_splati_d(err0, 1);
34   err1 += __msa_splati_d(err1, 1);
35   err = __msa_copy_s_d(err0, 0);
36   err += __msa_copy_s_d(err1, 0);
37 
38   return err;
39 }
40 
vp8_mbblock_error_msa(MACROBLOCK * mb,int32_t dc)41 int32_t vp8_mbblock_error_msa(MACROBLOCK *mb, int32_t dc) {
42   BLOCK *be;
43   BLOCKD *bd;
44   int16_t *coeff_ptr, *dq_coeff_ptr;
45   int32_t err = 0;
46   uint32_t loop_cnt;
47   v8i16 coeff, coeff0, coeff1, coeff2, coeff3, coeff4;
48   v8i16 dq_coeff, dq_coeff2, dq_coeff3, dq_coeff4;
49   v4i32 diff0, diff1;
50   v2i64 err0, err1;
51   v16u8 zero = { 0 };
52   v16u8 mask0 = (v16u8)__msa_ldi_b(255);
53 
54   if (1 == dc) {
55     mask0 = (v16u8)__msa_insve_w((v4i32)mask0, 0, (v4i32)zero);
56   }
57 
58   for (loop_cnt = 0; loop_cnt < 8; ++loop_cnt) {
59     be = &mb->block[2 * loop_cnt];
60     bd = &mb->e_mbd.block[2 * loop_cnt];
61     coeff_ptr = be->coeff;
62     dq_coeff_ptr = bd->dqcoeff;
63     coeff = LD_SH(coeff_ptr);
64     dq_coeff = LD_SH(dq_coeff_ptr);
65     coeff_ptr += 8;
66     dq_coeff_ptr += 8;
67     coeff2 = LD_SH(coeff_ptr);
68     dq_coeff2 = LD_SH(dq_coeff_ptr);
69     be = &mb->block[2 * loop_cnt + 1];
70     bd = &mb->e_mbd.block[2 * loop_cnt + 1];
71     coeff_ptr = be->coeff;
72     dq_coeff_ptr = bd->dqcoeff;
73     coeff3 = LD_SH(coeff_ptr);
74     dq_coeff3 = LD_SH(dq_coeff_ptr);
75     coeff_ptr += 8;
76     dq_coeff_ptr += 8;
77     coeff4 = LD_SH(coeff_ptr);
78     dq_coeff4 = LD_SH(dq_coeff_ptr);
79     ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1);
80     HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
81     diff0 = (v4i32)__msa_bmnz_v(zero, (v16u8)diff0, mask0);
82     DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
83     ILVRL_H2_SH(coeff2, dq_coeff2, coeff0, coeff1);
84     HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
85     DPADD_SD2_SD(diff0, diff1, err0, err1);
86     err0 += __msa_splati_d(err0, 1);
87     err1 += __msa_splati_d(err1, 1);
88     err += __msa_copy_s_d(err0, 0);
89     err += __msa_copy_s_d(err1, 0);
90 
91     ILVRL_H2_SH(coeff3, dq_coeff3, coeff0, coeff1);
92     HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
93     diff0 = (v4i32)__msa_bmnz_v(zero, (v16u8)diff0, mask0);
94     DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
95     ILVRL_H2_SH(coeff4, dq_coeff4, coeff0, coeff1);
96     HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
97     DPADD_SD2_SD(diff0, diff1, err0, err1);
98     err0 += __msa_splati_d(err0, 1);
99     err1 += __msa_splati_d(err1, 1);
100     err += __msa_copy_s_d(err0, 0);
101     err += __msa_copy_s_d(err1, 0);
102   }
103 
104   return err;
105 }
106 
vp8_mbuverror_msa(MACROBLOCK * mb)107 int32_t vp8_mbuverror_msa(MACROBLOCK *mb) {
108   BLOCK *be;
109   BLOCKD *bd;
110   int16_t *coeff_ptr, *dq_coeff_ptr;
111   int32_t err = 0;
112   uint32_t loop_cnt;
113   v8i16 coeff, coeff0, coeff1, coeff2, coeff3, coeff4;
114   v8i16 dq_coeff, dq_coeff2, dq_coeff3, dq_coeff4;
115   v4i32 diff0, diff1;
116   v2i64 err0, err1, err_dup0, err_dup1;
117 
118   for (loop_cnt = 16; loop_cnt < 24; loop_cnt += 2) {
119     be = &mb->block[loop_cnt];
120     bd = &mb->e_mbd.block[loop_cnt];
121     coeff_ptr = be->coeff;
122     dq_coeff_ptr = bd->dqcoeff;
123     coeff = LD_SH(coeff_ptr);
124     dq_coeff = LD_SH(dq_coeff_ptr);
125     coeff_ptr += 8;
126     dq_coeff_ptr += 8;
127     coeff2 = LD_SH(coeff_ptr);
128     dq_coeff2 = LD_SH(dq_coeff_ptr);
129     be = &mb->block[loop_cnt + 1];
130     bd = &mb->e_mbd.block[loop_cnt + 1];
131     coeff_ptr = be->coeff;
132     dq_coeff_ptr = bd->dqcoeff;
133     coeff3 = LD_SH(coeff_ptr);
134     dq_coeff3 = LD_SH(dq_coeff_ptr);
135     coeff_ptr += 8;
136     dq_coeff_ptr += 8;
137     coeff4 = LD_SH(coeff_ptr);
138     dq_coeff4 = LD_SH(dq_coeff_ptr);
139 
140     ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1);
141     HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
142     DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
143 
144     ILVRL_H2_SH(coeff2, dq_coeff2, coeff0, coeff1);
145     HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
146     DPADD_SD2_SD(diff0, diff1, err0, err1);
147     err_dup0 = __msa_splati_d(err0, 1);
148     err_dup1 = __msa_splati_d(err1, 1);
149     ADD2(err0, err_dup0, err1, err_dup1, err0, err1);
150     err += __msa_copy_s_d(err0, 0);
151     err += __msa_copy_s_d(err1, 0);
152 
153     ILVRL_H2_SH(coeff3, dq_coeff3, coeff0, coeff1);
154     HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
155     DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
156     ILVRL_H2_SH(coeff4, dq_coeff4, coeff0, coeff1);
157     HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
158     DPADD_SD2_SD(diff0, diff1, err0, err1);
159     err_dup0 = __msa_splati_d(err0, 1);
160     err_dup1 = __msa_splati_d(err1, 1);
161     ADD2(err0, err_dup0, err1, err_dup1, err0, err1);
162     err += __msa_copy_s_d(err0, 0);
163     err += __msa_copy_s_d(err1, 0);
164   }
165 
166   return err;
167 }
168