1sub vp8_common_forward_decls() {
2print <<EOF
3/*
4 * VP8
5 */
6
7struct blockd;
8struct macroblockd;
9struct loop_filter_info;
10
11/* Encoder forward decls */
12struct block;
13struct macroblock;
14struct variance_vtable;
15union int_mv;
16struct yv12_buffer_config;
17EOF
18}
19forward_decls qw/vp8_common_forward_decls/;
20
21#
22# Dequant
23#
24add_proto qw/void vp8_dequantize_b/, "struct blockd*, short *dqc";
25specialize qw/vp8_dequantize_b mmx neon msa/;
26
27add_proto qw/void vp8_dequant_idct_add/, "short *input, short *dq, unsigned char *output, int stride";
28specialize qw/vp8_dequant_idct_add mmx neon dspr2 msa/;
29
30add_proto qw/void vp8_dequant_idct_add_y_block/, "short *q, short *dq, unsigned char *dst, int stride, char *eobs";
31specialize qw/vp8_dequant_idct_add_y_block sse2 neon dspr2 msa/;
32
33add_proto qw/void vp8_dequant_idct_add_uv_block/, "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs";
34specialize qw/vp8_dequant_idct_add_uv_block sse2 neon dspr2 msa/;
35
36#
37# Loopfilter
38#
39add_proto qw/void vp8_loop_filter_mbv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
40specialize qw/vp8_loop_filter_mbv sse2 neon dspr2 msa/;
41
42add_proto qw/void vp8_loop_filter_bv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
43specialize qw/vp8_loop_filter_bv sse2 neon dspr2 msa/;
44
45add_proto qw/void vp8_loop_filter_mbh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
46specialize qw/vp8_loop_filter_mbh sse2 neon dspr2 msa/;
47
48add_proto qw/void vp8_loop_filter_bh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
49specialize qw/vp8_loop_filter_bh sse2 neon dspr2 msa/;
50
51
52add_proto qw/void vp8_loop_filter_simple_mbv/, "unsigned char *y, int ystride, const unsigned char *blimit";
53specialize qw/vp8_loop_filter_simple_mbv sse2 neon msa/;
54$vp8_loop_filter_simple_mbv_c=vp8_loop_filter_simple_vertical_edge_c;
55$vp8_loop_filter_simple_mbv_sse2=vp8_loop_filter_simple_vertical_edge_sse2;
56$vp8_loop_filter_simple_mbv_neon=vp8_loop_filter_mbvs_neon;
57$vp8_loop_filter_simple_mbv_msa=vp8_loop_filter_simple_vertical_edge_msa;
58
59add_proto qw/void vp8_loop_filter_simple_mbh/, "unsigned char *y, int ystride, const unsigned char *blimit";
60specialize qw/vp8_loop_filter_simple_mbh sse2 neon msa/;
61$vp8_loop_filter_simple_mbh_c=vp8_loop_filter_simple_horizontal_edge_c;
62$vp8_loop_filter_simple_mbh_sse2=vp8_loop_filter_simple_horizontal_edge_sse2;
63$vp8_loop_filter_simple_mbh_neon=vp8_loop_filter_mbhs_neon;
64$vp8_loop_filter_simple_mbh_msa=vp8_loop_filter_simple_horizontal_edge_msa;
65
66add_proto qw/void vp8_loop_filter_simple_bv/, "unsigned char *y, int ystride, const unsigned char *blimit";
67specialize qw/vp8_loop_filter_simple_bv sse2 neon msa/;
68$vp8_loop_filter_simple_bv_c=vp8_loop_filter_bvs_c;
69$vp8_loop_filter_simple_bv_sse2=vp8_loop_filter_bvs_sse2;
70$vp8_loop_filter_simple_bv_neon=vp8_loop_filter_bvs_neon;
71$vp8_loop_filter_simple_bv_msa=vp8_loop_filter_bvs_msa;
72
73add_proto qw/void vp8_loop_filter_simple_bh/, "unsigned char *y, int ystride, const unsigned char *blimit";
74specialize qw/vp8_loop_filter_simple_bh sse2 neon msa/;
75$vp8_loop_filter_simple_bh_c=vp8_loop_filter_bhs_c;
76$vp8_loop_filter_simple_bh_sse2=vp8_loop_filter_bhs_sse2;
77$vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon;
78$vp8_loop_filter_simple_bh_msa=vp8_loop_filter_bhs_msa;
79
80#
81# IDCT
82#
83#idct16
84add_proto qw/void vp8_short_idct4x4llm/, "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride";
85specialize qw/vp8_short_idct4x4llm mmx neon dspr2 msa/;
86
87#iwalsh1
88add_proto qw/void vp8_short_inv_walsh4x4_1/, "short *input, short *output";
89specialize qw/vp8_short_inv_walsh4x4_1 dspr2/;
90
91#iwalsh16
92add_proto qw/void vp8_short_inv_walsh4x4/, "short *input, short *output";
93specialize qw/vp8_short_inv_walsh4x4 sse2 neon dspr2 msa/;
94
95#idct1_scalar_add
96add_proto qw/void vp8_dc_only_idct_add/, "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride";
97specialize qw/vp8_dc_only_idct_add mmx neon dspr2 msa/;
98
99#
100# RECON
101#
102add_proto qw/void vp8_copy_mem16x16/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
103specialize qw/vp8_copy_mem16x16 sse2 neon dspr2 msa/;
104
105add_proto qw/void vp8_copy_mem8x8/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
106specialize qw/vp8_copy_mem8x8 mmx neon dspr2 msa/;
107
108add_proto qw/void vp8_copy_mem8x4/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
109specialize qw/vp8_copy_mem8x4 mmx neon dspr2 msa/;
110
111#
112# Postproc
113#
114if (vpx_config("CONFIG_POSTPROC") eq "yes") {
115
116    add_proto qw/void vp8_blend_mb_inner/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
117
118    add_proto qw/void vp8_blend_mb_outer/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
119
120    add_proto qw/void vp8_blend_b/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
121
122    add_proto qw/void vp8_filter_by_weight16x16/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
123    specialize qw/vp8_filter_by_weight16x16 sse2 msa/;
124
125    add_proto qw/void vp8_filter_by_weight8x8/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
126    specialize qw/vp8_filter_by_weight8x8 sse2 msa/;
127
128    add_proto qw/void vp8_filter_by_weight4x4/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
129}
130
131#
132# Subpixel
133#
134add_proto qw/void vp8_sixtap_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
135specialize qw/vp8_sixtap_predict16x16 sse2 ssse3 neon dspr2 msa/;
136
137add_proto qw/void vp8_sixtap_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
138specialize qw/vp8_sixtap_predict8x8 sse2 ssse3 neon dspr2 msa/;
139
140add_proto qw/void vp8_sixtap_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
141specialize qw/vp8_sixtap_predict8x4 sse2 ssse3 neon dspr2 msa/;
142
143add_proto qw/void vp8_sixtap_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
144specialize qw/vp8_sixtap_predict4x4 mmx ssse3 neon dspr2 msa/;
145
146add_proto qw/void vp8_bilinear_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
147specialize qw/vp8_bilinear_predict16x16 sse2 ssse3 neon msa/;
148
149add_proto qw/void vp8_bilinear_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
150specialize qw/vp8_bilinear_predict8x8 sse2 ssse3 neon msa/;
151
152add_proto qw/void vp8_bilinear_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
153specialize qw/vp8_bilinear_predict8x4 mmx neon msa/;
154
155add_proto qw/void vp8_bilinear_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
156specialize qw/vp8_bilinear_predict4x4 mmx neon msa/;
157
158#
159# Encoder functions below this point.
160#
161if (vpx_config("CONFIG_VP8_ENCODER") eq "yes") {
162
163#
164# Block copy
165#
166if ($opts{arch} =~ /x86/) {
167    add_proto qw/void vp8_copy32xn/, "const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n";
168    specialize qw/vp8_copy32xn sse2 sse3/;
169}
170
171#
172# Forward DCT
173#
174add_proto qw/void vp8_short_fdct4x4/, "short *input, short *output, int pitch";
175specialize qw/vp8_short_fdct4x4 sse2 neon msa/;
176
177add_proto qw/void vp8_short_fdct8x4/, "short *input, short *output, int pitch";
178specialize qw/vp8_short_fdct8x4 sse2 neon msa/;
179
180add_proto qw/void vp8_short_walsh4x4/, "short *input, short *output, int pitch";
181specialize qw/vp8_short_walsh4x4 sse2 neon msa/;
182
183#
184# Quantizer
185#
186add_proto qw/void vp8_regular_quantize_b/, "struct block *, struct blockd *";
187specialize qw/vp8_regular_quantize_b sse2 sse4_1 msa/;
188
189add_proto qw/void vp8_fast_quantize_b/, "struct block *, struct blockd *";
190specialize qw/vp8_fast_quantize_b sse2 ssse3 neon msa/;
191
192#
193# Block subtraction
194#
195add_proto qw/int vp8_block_error/, "short *coeff, short *dqcoeff";
196specialize qw/vp8_block_error sse2 msa/;
197
198add_proto qw/int vp8_mbblock_error/, "struct macroblock *mb, int dc";
199specialize qw/vp8_mbblock_error sse2 msa/;
200
201add_proto qw/int vp8_mbuverror/, "struct macroblock *mb";
202specialize qw/vp8_mbuverror sse2 msa/;
203
204#
205# Motion search
206#
207add_proto qw/int vp8_full_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv";
208specialize qw/vp8_full_search_sad sse3 sse4_1/;
209$vp8_full_search_sad_sse3=vp8_full_search_sadx3;
210$vp8_full_search_sad_sse4_1=vp8_full_search_sadx8;
211
212add_proto qw/int vp8_refining_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv";
213specialize qw/vp8_refining_search_sad sse2 msa/;
214$vp8_refining_search_sad_sse2=vp8_refining_search_sadx4;
215$vp8_refining_search_sad_msa=vp8_refining_search_sadx4;
216
217add_proto qw/int vp8_diamond_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv";
218specialize qw/vp8_diamond_search_sad sse2 msa/;
219$vp8_diamond_search_sad_sse2=vp8_diamond_search_sadx4;
220$vp8_diamond_search_sad_msa=vp8_diamond_search_sadx4;
221
222#
223# Alt-ref Noise Reduction (ARNR)
224#
225if (vpx_config("CONFIG_REALTIME_ONLY") ne "yes") {
226    add_proto qw/void vp8_temporal_filter_apply/, "unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count";
227    specialize qw/vp8_temporal_filter_apply sse2 msa/;
228}
229
230#
231# Denoiser filter
232#
233if (vpx_config("CONFIG_TEMPORAL_DENOISING") eq "yes") {
234    add_proto qw/int vp8_denoiser_filter/, "unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising";
235    specialize qw/vp8_denoiser_filter sse2 neon msa/;
236    add_proto qw/int vp8_denoiser_filter_uv/, "unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising";
237    specialize qw/vp8_denoiser_filter_uv sse2 neon msa/;
238}
239
240# End of encoder only functions
241}
2421;
243