1 /*
2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "./vp8_rtcd.h"
12 #include "vp8/common/postproc.h"
13 #include "vp8/common/mips/msa/vp8_macros_msa.h"
14
filter_by_weight8x8_msa(uint8_t * src_ptr,int32_t src_stride,uint8_t * dst_ptr,int32_t dst_stride,int32_t src_weight)15 static void filter_by_weight8x8_msa(uint8_t *src_ptr, int32_t src_stride,
16 uint8_t *dst_ptr, int32_t dst_stride,
17 int32_t src_weight)
18 {
19 int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
20 int32_t row;
21 uint64_t src0_d, src1_d, dst0_d, dst1_d;
22 v16i8 src0 = { 0 };
23 v16i8 src1 = { 0 };
24 v16i8 dst0 = { 0 };
25 v16i8 dst1 = { 0 };
26 v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l;
27
28 src_wt = __msa_fill_h(src_weight);
29 dst_wt = __msa_fill_h(dst_weight);
30
31 for (row = 2; row--;)
32 {
33 LD2(src_ptr, src_stride, src0_d, src1_d);
34 src_ptr += (2 * src_stride);
35 LD2(dst_ptr, dst_stride, dst0_d, dst1_d);
36 INSERT_D2_SB(src0_d, src1_d, src0);
37 INSERT_D2_SB(dst0_d, dst1_d, dst0);
38
39 LD2(src_ptr, src_stride, src0_d, src1_d);
40 src_ptr += (2 * src_stride);
41 LD2((dst_ptr + 2 * dst_stride), dst_stride, dst0_d, dst1_d);
42 INSERT_D2_SB(src0_d, src1_d, src1);
43 INSERT_D2_SB(dst0_d, dst1_d, dst1);
44
45 UNPCK_UB_SH(src0, src_r, src_l);
46 UNPCK_UB_SH(dst0, dst_r, dst_l);
47 res_h_r = (src_r * src_wt);
48 res_h_r += (dst_r * dst_wt);
49 res_h_l = (src_l * src_wt);
50 res_h_l += (dst_l * dst_wt);
51 SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
52 dst0 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
53 ST8x2_UB(dst0, dst_ptr, dst_stride);
54 dst_ptr += (2 * dst_stride);
55
56 UNPCK_UB_SH(src1, src_r, src_l);
57 UNPCK_UB_SH(dst1, dst_r, dst_l);
58 res_h_r = (src_r * src_wt);
59 res_h_r += (dst_r * dst_wt);
60 res_h_l = (src_l * src_wt);
61 res_h_l += (dst_l * dst_wt);
62 SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
63 dst1 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
64 ST8x2_UB(dst1, dst_ptr, dst_stride);
65 dst_ptr += (2 * dst_stride);
66 }
67 }
68
filter_by_weight16x16_msa(uint8_t * src_ptr,int32_t src_stride,uint8_t * dst_ptr,int32_t dst_stride,int32_t src_weight)69 static void filter_by_weight16x16_msa(uint8_t *src_ptr, int32_t src_stride,
70 uint8_t *dst_ptr, int32_t dst_stride,
71 int32_t src_weight)
72 {
73 int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
74 int32_t row;
75 v16i8 src0, src1, src2, src3;
76 v16i8 dst0, dst1, dst2, dst3;
77 v8i16 src_wt, dst_wt;
78 v8i16 res_h_r, res_h_l;
79 v8i16 src_r, src_l, dst_r, dst_l;
80
81 src_wt = __msa_fill_h(src_weight);
82 dst_wt = __msa_fill_h(dst_weight);
83
84 for (row = 4; row--;)
85 {
86 LD_SB4(src_ptr, src_stride, src0, src1, src2, src3);
87 src_ptr += (4 * src_stride);
88 LD_SB4(dst_ptr, dst_stride, dst0, dst1, dst2, dst3);
89
90 UNPCK_UB_SH(src0, src_r, src_l);
91 UNPCK_UB_SH(dst0, dst_r, dst_l);
92 res_h_r = (src_r * src_wt);
93 res_h_r += (dst_r * dst_wt);
94 res_h_l = (src_l * src_wt);
95 res_h_l += (dst_l * dst_wt);
96 SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
97 PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
98 dst_ptr += dst_stride;
99
100 UNPCK_UB_SH(src1, src_r, src_l);
101 UNPCK_UB_SH(dst1, dst_r, dst_l);
102 res_h_r = (src_r * src_wt);
103 res_h_r += (dst_r * dst_wt);
104 res_h_l = (src_l * src_wt);
105 res_h_l += (dst_l * dst_wt);
106 SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
107 PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
108 dst_ptr += dst_stride;
109
110 UNPCK_UB_SH(src2, src_r, src_l);
111 UNPCK_UB_SH(dst2, dst_r, dst_l);
112 res_h_r = (src_r * src_wt);
113 res_h_r += (dst_r * dst_wt);
114 res_h_l = (src_l * src_wt);
115 res_h_l += (dst_l * dst_wt);
116 SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
117 PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
118 dst_ptr += dst_stride;
119
120 UNPCK_UB_SH(src3, src_r, src_l);
121 UNPCK_UB_SH(dst3, dst_r, dst_l);
122 res_h_r = (src_r * src_wt);
123 res_h_r += (dst_r * dst_wt);
124 res_h_l = (src_l * src_wt);
125 res_h_l += (dst_l * dst_wt);
126 SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
127 PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
128 dst_ptr += dst_stride;
129 }
130 }
131
vp8_filter_by_weight16x16_msa(uint8_t * src_ptr,int32_t src_stride,uint8_t * dst_ptr,int32_t dst_stride,int32_t src_weight)132 void vp8_filter_by_weight16x16_msa(uint8_t *src_ptr, int32_t src_stride,
133 uint8_t *dst_ptr, int32_t dst_stride,
134 int32_t src_weight)
135 {
136 filter_by_weight16x16_msa(src_ptr, src_stride, dst_ptr, dst_stride,
137 src_weight);
138 }
139
vp8_filter_by_weight8x8_msa(uint8_t * src_ptr,int32_t src_stride,uint8_t * dst_ptr,int32_t dst_stride,int32_t src_weight)140 void vp8_filter_by_weight8x8_msa(uint8_t *src_ptr, int32_t src_stride,
141 uint8_t *dst_ptr, int32_t dst_stride,
142 int32_t src_weight)
143 {
144 filter_by_weight8x8_msa(src_ptr, src_stride, dst_ptr, dst_stride,
145 src_weight);
146 }
147