1sub vp8_common_forward_decls() { 2print <<EOF 3/* 4 * VP8 5 */ 6 7struct blockd; 8struct macroblockd; 9struct loop_filter_info; 10 11/* Encoder forward decls */ 12struct block; 13struct macroblock; 14struct variance_vtable; 15union int_mv; 16struct yv12_buffer_config; 17EOF 18} 19forward_decls qw/vp8_common_forward_decls/; 20 21# 22# Dequant 23# 24add_proto qw/void vp8_dequantize_b/, "struct blockd*, short *dqc"; 25specialize qw/vp8_dequantize_b mmx neon msa/; 26 27add_proto qw/void vp8_dequant_idct_add/, "short *input, short *dq, unsigned char *output, int stride"; 28specialize qw/vp8_dequant_idct_add mmx neon dspr2 msa/; 29 30add_proto qw/void vp8_dequant_idct_add_y_block/, "short *q, short *dq, unsigned char *dst, int stride, char *eobs"; 31specialize qw/vp8_dequant_idct_add_y_block sse2 neon dspr2 msa/; 32 33add_proto qw/void vp8_dequant_idct_add_uv_block/, "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs"; 34specialize qw/vp8_dequant_idct_add_uv_block sse2 neon dspr2 msa/; 35 36# 37# Loopfilter 38# 39add_proto qw/void vp8_loop_filter_mbv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"; 40specialize qw/vp8_loop_filter_mbv sse2 neon dspr2 msa/; 41 42add_proto qw/void vp8_loop_filter_bv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"; 43specialize qw/vp8_loop_filter_bv sse2 neon dspr2 msa/; 44 45add_proto qw/void vp8_loop_filter_mbh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"; 46specialize qw/vp8_loop_filter_mbh sse2 neon dspr2 msa/; 47 48add_proto qw/void vp8_loop_filter_bh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"; 49specialize qw/vp8_loop_filter_bh sse2 neon dspr2 msa/; 50 51 52add_proto qw/void vp8_loop_filter_simple_mbv/, "unsigned char *y, int ystride, const unsigned char *blimit"; 53specialize qw/vp8_loop_filter_simple_mbv sse2 neon msa/; 54$vp8_loop_filter_simple_mbv_c=vp8_loop_filter_simple_vertical_edge_c; 55$vp8_loop_filter_simple_mbv_sse2=vp8_loop_filter_simple_vertical_edge_sse2; 56$vp8_loop_filter_simple_mbv_neon=vp8_loop_filter_mbvs_neon; 57$vp8_loop_filter_simple_mbv_msa=vp8_loop_filter_simple_vertical_edge_msa; 58 59add_proto qw/void vp8_loop_filter_simple_mbh/, "unsigned char *y, int ystride, const unsigned char *blimit"; 60specialize qw/vp8_loop_filter_simple_mbh sse2 neon msa/; 61$vp8_loop_filter_simple_mbh_c=vp8_loop_filter_simple_horizontal_edge_c; 62$vp8_loop_filter_simple_mbh_sse2=vp8_loop_filter_simple_horizontal_edge_sse2; 63$vp8_loop_filter_simple_mbh_neon=vp8_loop_filter_mbhs_neon; 64$vp8_loop_filter_simple_mbh_msa=vp8_loop_filter_simple_horizontal_edge_msa; 65 66add_proto qw/void vp8_loop_filter_simple_bv/, "unsigned char *y, int ystride, const unsigned char *blimit"; 67specialize qw/vp8_loop_filter_simple_bv sse2 neon msa/; 68$vp8_loop_filter_simple_bv_c=vp8_loop_filter_bvs_c; 69$vp8_loop_filter_simple_bv_sse2=vp8_loop_filter_bvs_sse2; 70$vp8_loop_filter_simple_bv_neon=vp8_loop_filter_bvs_neon; 71$vp8_loop_filter_simple_bv_msa=vp8_loop_filter_bvs_msa; 72 73add_proto qw/void vp8_loop_filter_simple_bh/, "unsigned char *y, int ystride, const unsigned char *blimit"; 74specialize qw/vp8_loop_filter_simple_bh sse2 neon msa/; 75$vp8_loop_filter_simple_bh_c=vp8_loop_filter_bhs_c; 76$vp8_loop_filter_simple_bh_sse2=vp8_loop_filter_bhs_sse2; 77$vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon; 78$vp8_loop_filter_simple_bh_msa=vp8_loop_filter_bhs_msa; 79 80# 81# IDCT 82# 83#idct16 84add_proto qw/void vp8_short_idct4x4llm/, "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride"; 85specialize qw/vp8_short_idct4x4llm mmx neon dspr2 msa/; 86 87#iwalsh1 88add_proto qw/void vp8_short_inv_walsh4x4_1/, "short *input, short *output"; 89specialize qw/vp8_short_inv_walsh4x4_1 dspr2/; 90 91#iwalsh16 92add_proto qw/void vp8_short_inv_walsh4x4/, "short *input, short *output"; 93specialize qw/vp8_short_inv_walsh4x4 sse2 neon dspr2 msa/; 94 95#idct1_scalar_add 96add_proto qw/void vp8_dc_only_idct_add/, "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride"; 97specialize qw/vp8_dc_only_idct_add mmx neon dspr2 msa/; 98 99# 100# RECON 101# 102add_proto qw/void vp8_copy_mem16x16/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"; 103specialize qw/vp8_copy_mem16x16 sse2 neon dspr2 msa/; 104 105add_proto qw/void vp8_copy_mem8x8/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"; 106specialize qw/vp8_copy_mem8x8 mmx neon dspr2 msa/; 107 108add_proto qw/void vp8_copy_mem8x4/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"; 109specialize qw/vp8_copy_mem8x4 mmx neon dspr2 msa/; 110 111# 112# Postproc 113# 114if (vpx_config("CONFIG_POSTPROC") eq "yes") { 115 116 add_proto qw/void vp8_blend_mb_inner/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"; 117 118 add_proto qw/void vp8_blend_mb_outer/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"; 119 120 add_proto qw/void vp8_blend_b/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"; 121 122 add_proto qw/void vp8_filter_by_weight16x16/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"; 123 specialize qw/vp8_filter_by_weight16x16 sse2 msa/; 124 125 add_proto qw/void vp8_filter_by_weight8x8/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"; 126 specialize qw/vp8_filter_by_weight8x8 sse2 msa/; 127 128 add_proto qw/void vp8_filter_by_weight4x4/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"; 129} 130 131# 132# Subpixel 133# 134add_proto qw/void vp8_sixtap_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; 135specialize qw/vp8_sixtap_predict16x16 sse2 ssse3 neon dspr2 msa/; 136 137add_proto qw/void vp8_sixtap_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; 138specialize qw/vp8_sixtap_predict8x8 sse2 ssse3 neon dspr2 msa/; 139 140add_proto qw/void vp8_sixtap_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; 141specialize qw/vp8_sixtap_predict8x4 sse2 ssse3 neon dspr2 msa/; 142 143add_proto qw/void vp8_sixtap_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; 144specialize qw/vp8_sixtap_predict4x4 mmx ssse3 neon dspr2 msa/; 145 146add_proto qw/void vp8_bilinear_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; 147specialize qw/vp8_bilinear_predict16x16 sse2 ssse3 neon msa/; 148 149add_proto qw/void vp8_bilinear_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; 150specialize qw/vp8_bilinear_predict8x8 sse2 ssse3 neon msa/; 151 152add_proto qw/void vp8_bilinear_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; 153specialize qw/vp8_bilinear_predict8x4 mmx neon msa/; 154 155add_proto qw/void vp8_bilinear_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; 156specialize qw/vp8_bilinear_predict4x4 mmx neon msa/; 157 158# 159# Encoder functions below this point. 160# 161if (vpx_config("CONFIG_VP8_ENCODER") eq "yes") { 162 163# 164# Block copy 165# 166if ($opts{arch} =~ /x86/) { 167 add_proto qw/void vp8_copy32xn/, "const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n"; 168 specialize qw/vp8_copy32xn sse2 sse3/; 169} 170 171# 172# Forward DCT 173# 174add_proto qw/void vp8_short_fdct4x4/, "short *input, short *output, int pitch"; 175specialize qw/vp8_short_fdct4x4 sse2 neon msa/; 176 177add_proto qw/void vp8_short_fdct8x4/, "short *input, short *output, int pitch"; 178specialize qw/vp8_short_fdct8x4 sse2 neon msa/; 179 180add_proto qw/void vp8_short_walsh4x4/, "short *input, short *output, int pitch"; 181specialize qw/vp8_short_walsh4x4 sse2 neon msa/; 182 183# 184# Quantizer 185# 186add_proto qw/void vp8_regular_quantize_b/, "struct block *, struct blockd *"; 187specialize qw/vp8_regular_quantize_b sse2 sse4_1 msa/; 188 189add_proto qw/void vp8_fast_quantize_b/, "struct block *, struct blockd *"; 190specialize qw/vp8_fast_quantize_b sse2 ssse3 neon msa/; 191 192# 193# Block subtraction 194# 195add_proto qw/int vp8_block_error/, "short *coeff, short *dqcoeff"; 196specialize qw/vp8_block_error sse2 msa/; 197 198add_proto qw/int vp8_mbblock_error/, "struct macroblock *mb, int dc"; 199specialize qw/vp8_mbblock_error sse2 msa/; 200 201add_proto qw/int vp8_mbuverror/, "struct macroblock *mb"; 202specialize qw/vp8_mbuverror sse2 msa/; 203 204# 205# Motion search 206# 207add_proto qw/int vp8_full_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"; 208specialize qw/vp8_full_search_sad sse3 sse4_1/; 209$vp8_full_search_sad_sse3=vp8_full_search_sadx3; 210$vp8_full_search_sad_sse4_1=vp8_full_search_sadx8; 211 212add_proto qw/int vp8_refining_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"; 213specialize qw/vp8_refining_search_sad sse2 msa/; 214$vp8_refining_search_sad_sse2=vp8_refining_search_sadx4; 215$vp8_refining_search_sad_msa=vp8_refining_search_sadx4; 216 217add_proto qw/int vp8_diamond_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"; 218specialize qw/vp8_diamond_search_sad sse2 msa/; 219$vp8_diamond_search_sad_sse2=vp8_diamond_search_sadx4; 220$vp8_diamond_search_sad_msa=vp8_diamond_search_sadx4; 221 222# 223# Alt-ref Noise Reduction (ARNR) 224# 225if (vpx_config("CONFIG_REALTIME_ONLY") ne "yes") { 226 add_proto qw/void vp8_temporal_filter_apply/, "unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count"; 227 specialize qw/vp8_temporal_filter_apply sse2 msa/; 228} 229 230# 231# Denoiser filter 232# 233if (vpx_config("CONFIG_TEMPORAL_DENOISING") eq "yes") { 234 add_proto qw/int vp8_denoiser_filter/, "unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising"; 235 specialize qw/vp8_denoiser_filter sse2 neon msa/; 236 add_proto qw/int vp8_denoiser_filter_uv/, "unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising"; 237 specialize qw/vp8_denoiser_filter_uv sse2 neon msa/; 238} 239 240# End of encoder only functions 241} 2421; 243