1sub vp8_common_forward_decls() { 2print <<EOF 3/* 4 * VP8 5 */ 6 7struct blockd; 8struct macroblockd; 9struct loop_filter_info; 10 11/* Encoder forward decls */ 12struct block; 13struct macroblock; 14struct variance_vtable; 15union int_mv; 16struct yv12_buffer_config; 17EOF 18} 19forward_decls qw/vp8_common_forward_decls/; 20 21# 22# system state 23# 24add_proto qw/void vp8_clear_system_state/, ""; 25specialize qw/vp8_clear_system_state mmx/; 26$vp8_clear_system_state_mmx=vpx_reset_mmx_state; 27 28# 29# Dequant 30# 31add_proto qw/void vp8_dequantize_b/, "struct blockd*, short *dqc"; 32specialize qw/vp8_dequantize_b mmx media neon msa/; 33$vp8_dequantize_b_media=vp8_dequantize_b_v6; 34 35add_proto qw/void vp8_dequant_idct_add/, "short *input, short *dq, unsigned char *output, int stride"; 36specialize qw/vp8_dequant_idct_add mmx media neon dspr2 msa/; 37$vp8_dequant_idct_add_media=vp8_dequant_idct_add_v6; 38$vp8_dequant_idct_add_dspr2=vp8_dequant_idct_add_dspr2; 39 40add_proto qw/void vp8_dequant_idct_add_y_block/, "short *q, short *dq, unsigned char *dst, int stride, char *eobs"; 41specialize qw/vp8_dequant_idct_add_y_block mmx sse2 media neon dspr2 msa/; 42$vp8_dequant_idct_add_y_block_media=vp8_dequant_idct_add_y_block_v6; 43$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2; 44 45add_proto qw/void vp8_dequant_idct_add_uv_block/, "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs"; 46specialize qw/vp8_dequant_idct_add_uv_block mmx sse2 media neon dspr2 msa/; 47$vp8_dequant_idct_add_uv_block_media=vp8_dequant_idct_add_uv_block_v6; 48$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2; 49 50# 51# Loopfilter 52# 53add_proto qw/void vp8_loop_filter_mbv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"; 54specialize qw/vp8_loop_filter_mbv mmx sse2 media neon dspr2 msa/; 55$vp8_loop_filter_mbv_media=vp8_loop_filter_mbv_armv6; 56$vp8_loop_filter_mbv_dspr2=vp8_loop_filter_mbv_dspr2; 57 58add_proto qw/void vp8_loop_filter_bv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"; 59specialize qw/vp8_loop_filter_bv mmx sse2 media neon dspr2 msa/; 60$vp8_loop_filter_bv_media=vp8_loop_filter_bv_armv6; 61$vp8_loop_filter_bv_dspr2=vp8_loop_filter_bv_dspr2; 62 63add_proto qw/void vp8_loop_filter_mbh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"; 64specialize qw/vp8_loop_filter_mbh mmx sse2 media neon dspr2 msa/; 65$vp8_loop_filter_mbh_media=vp8_loop_filter_mbh_armv6; 66$vp8_loop_filter_mbh_dspr2=vp8_loop_filter_mbh_dspr2; 67 68add_proto qw/void vp8_loop_filter_bh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"; 69specialize qw/vp8_loop_filter_bh mmx sse2 media neon dspr2 msa/; 70$vp8_loop_filter_bh_media=vp8_loop_filter_bh_armv6; 71$vp8_loop_filter_bh_dspr2=vp8_loop_filter_bh_dspr2; 72 73 74add_proto qw/void vp8_loop_filter_simple_mbv/, "unsigned char *y, int ystride, const unsigned char *blimit"; 75specialize qw/vp8_loop_filter_simple_mbv mmx sse2 media neon msa/; 76$vp8_loop_filter_simple_mbv_c=vp8_loop_filter_simple_vertical_edge_c; 77$vp8_loop_filter_simple_mbv_mmx=vp8_loop_filter_simple_vertical_edge_mmx; 78$vp8_loop_filter_simple_mbv_sse2=vp8_loop_filter_simple_vertical_edge_sse2; 79$vp8_loop_filter_simple_mbv_media=vp8_loop_filter_simple_vertical_edge_armv6; 80$vp8_loop_filter_simple_mbv_neon=vp8_loop_filter_mbvs_neon; 81$vp8_loop_filter_simple_mbv_msa=vp8_loop_filter_simple_vertical_edge_msa; 82 83add_proto qw/void vp8_loop_filter_simple_mbh/, "unsigned char *y, int ystride, const unsigned char *blimit"; 84specialize qw/vp8_loop_filter_simple_mbh mmx sse2 media neon msa/; 85$vp8_loop_filter_simple_mbh_c=vp8_loop_filter_simple_horizontal_edge_c; 86$vp8_loop_filter_simple_mbh_mmx=vp8_loop_filter_simple_horizontal_edge_mmx; 87$vp8_loop_filter_simple_mbh_sse2=vp8_loop_filter_simple_horizontal_edge_sse2; 88$vp8_loop_filter_simple_mbh_media=vp8_loop_filter_simple_horizontal_edge_armv6; 89$vp8_loop_filter_simple_mbh_neon=vp8_loop_filter_mbhs_neon; 90$vp8_loop_filter_simple_mbh_msa=vp8_loop_filter_simple_horizontal_edge_msa; 91 92add_proto qw/void vp8_loop_filter_simple_bv/, "unsigned char *y, int ystride, const unsigned char *blimit"; 93specialize qw/vp8_loop_filter_simple_bv mmx sse2 media neon msa/; 94$vp8_loop_filter_simple_bv_c=vp8_loop_filter_bvs_c; 95$vp8_loop_filter_simple_bv_mmx=vp8_loop_filter_bvs_mmx; 96$vp8_loop_filter_simple_bv_sse2=vp8_loop_filter_bvs_sse2; 97$vp8_loop_filter_simple_bv_media=vp8_loop_filter_bvs_armv6; 98$vp8_loop_filter_simple_bv_neon=vp8_loop_filter_bvs_neon; 99$vp8_loop_filter_simple_bv_msa=vp8_loop_filter_bvs_msa; 100 101add_proto qw/void vp8_loop_filter_simple_bh/, "unsigned char *y, int ystride, const unsigned char *blimit"; 102specialize qw/vp8_loop_filter_simple_bh mmx sse2 media neon msa/; 103$vp8_loop_filter_simple_bh_c=vp8_loop_filter_bhs_c; 104$vp8_loop_filter_simple_bh_mmx=vp8_loop_filter_bhs_mmx; 105$vp8_loop_filter_simple_bh_sse2=vp8_loop_filter_bhs_sse2; 106$vp8_loop_filter_simple_bh_media=vp8_loop_filter_bhs_armv6; 107$vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon; 108$vp8_loop_filter_simple_bh_msa=vp8_loop_filter_bhs_msa; 109 110# 111# IDCT 112# 113#idct16 114add_proto qw/void vp8_short_idct4x4llm/, "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride"; 115specialize qw/vp8_short_idct4x4llm mmx media neon dspr2 msa/; 116$vp8_short_idct4x4llm_media=vp8_short_idct4x4llm_v6_dual; 117$vp8_short_idct4x4llm_dspr2=vp8_short_idct4x4llm_dspr2; 118 119#iwalsh1 120add_proto qw/void vp8_short_inv_walsh4x4_1/, "short *input, short *output"; 121specialize qw/vp8_short_inv_walsh4x4_1 dspr2/; 122$vp8_short_inv_walsh4x4_1_dspr2=vp8_short_inv_walsh4x4_1_dspr2; 123# no asm yet 124 125#iwalsh16 126add_proto qw/void vp8_short_inv_walsh4x4/, "short *input, short *output"; 127specialize qw/vp8_short_inv_walsh4x4 mmx sse2 media neon dspr2 msa/; 128$vp8_short_inv_walsh4x4_media=vp8_short_inv_walsh4x4_v6; 129$vp8_short_inv_walsh4x4_dspr2=vp8_short_inv_walsh4x4_dspr2; 130 131#idct1_scalar_add 132add_proto qw/void vp8_dc_only_idct_add/, "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride"; 133specialize qw/vp8_dc_only_idct_add mmx media neon dspr2 msa/; 134$vp8_dc_only_idct_add_media=vp8_dc_only_idct_add_v6; 135$vp8_dc_only_idct_add_dspr2=vp8_dc_only_idct_add_dspr2; 136 137# 138# RECON 139# 140add_proto qw/void vp8_copy_mem16x16/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"; 141specialize qw/vp8_copy_mem16x16 mmx sse2 media neon dspr2 msa/; 142$vp8_copy_mem16x16_media=vp8_copy_mem16x16_v6; 143$vp8_copy_mem16x16_dspr2=vp8_copy_mem16x16_dspr2; 144 145add_proto qw/void vp8_copy_mem8x8/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"; 146specialize qw/vp8_copy_mem8x8 mmx media neon dspr2 msa/; 147$vp8_copy_mem8x8_media=vp8_copy_mem8x8_v6; 148$vp8_copy_mem8x8_dspr2=vp8_copy_mem8x8_dspr2; 149 150add_proto qw/void vp8_copy_mem8x4/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"; 151specialize qw/vp8_copy_mem8x4 mmx media neon dspr2 msa/; 152$vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6; 153$vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2; 154 155# 156# Postproc 157# 158if (vpx_config("CONFIG_POSTPROC") eq "yes") { 159 add_proto qw/void vp8_mbpost_proc_down/, "unsigned char *dst, int pitch, int rows, int cols,int flimit"; 160 specialize qw/vp8_mbpost_proc_down mmx sse2 msa/; 161 $vp8_mbpost_proc_down_sse2=vp8_mbpost_proc_down_xmm; 162 163 add_proto qw/void vp8_mbpost_proc_across_ip/, "unsigned char *dst, int pitch, int rows, int cols,int flimit"; 164 specialize qw/vp8_mbpost_proc_across_ip sse2 msa/; 165 $vp8_mbpost_proc_across_ip_sse2=vp8_mbpost_proc_across_ip_xmm; 166 167 add_proto qw/void vp8_post_proc_down_and_across_mb_row/, "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size"; 168 specialize qw/vp8_post_proc_down_and_across_mb_row sse2 msa/; 169 170 add_proto qw/void vp8_plane_add_noise/, "unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch"; 171 specialize qw/vp8_plane_add_noise mmx sse2 msa/; 172 $vp8_plane_add_noise_sse2=vp8_plane_add_noise_wmt; 173 174 add_proto qw/void vp8_blend_mb_inner/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"; 175 # no asm yet 176 177 add_proto qw/void vp8_blend_mb_outer/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"; 178 # no asm yet 179 180 add_proto qw/void vp8_blend_b/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"; 181 # no asm yet 182 183 add_proto qw/void vp8_filter_by_weight16x16/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"; 184 specialize qw/vp8_filter_by_weight16x16 sse2 msa/; 185 186 add_proto qw/void vp8_filter_by_weight8x8/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"; 187 specialize qw/vp8_filter_by_weight8x8 sse2 msa/; 188 189 add_proto qw/void vp8_filter_by_weight4x4/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"; 190 # no asm yet 191} 192 193# 194# Subpixel 195# 196add_proto qw/void vp8_sixtap_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; 197specialize qw/vp8_sixtap_predict16x16 mmx sse2 ssse3 media neon dspr2 msa/; 198$vp8_sixtap_predict16x16_media=vp8_sixtap_predict16x16_armv6; 199$vp8_sixtap_predict16x16_dspr2=vp8_sixtap_predict16x16_dspr2; 200 201add_proto qw/void vp8_sixtap_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; 202specialize qw/vp8_sixtap_predict8x8 mmx sse2 ssse3 media neon dspr2 msa/; 203$vp8_sixtap_predict8x8_media=vp8_sixtap_predict8x8_armv6; 204$vp8_sixtap_predict8x8_dspr2=vp8_sixtap_predict8x8_dspr2; 205 206add_proto qw/void vp8_sixtap_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; 207specialize qw/vp8_sixtap_predict8x4 mmx sse2 ssse3 media neon dspr2 msa/; 208$vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6; 209$vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2; 210 211add_proto qw/void vp8_sixtap_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; 212#TODO(johannkoenig): fix the neon version https://code.google.com/p/webm/issues/detail?id=817 213specialize qw/vp8_sixtap_predict4x4 mmx ssse3 media dspr2 msa/; 214$vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6; 215$vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2; 216 217add_proto qw/void vp8_bilinear_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; 218specialize qw/vp8_bilinear_predict16x16 mmx sse2 ssse3 media neon msa/; 219$vp8_bilinear_predict16x16_media=vp8_bilinear_predict16x16_armv6; 220 221add_proto qw/void vp8_bilinear_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; 222specialize qw/vp8_bilinear_predict8x8 mmx sse2 ssse3 media neon msa/; 223$vp8_bilinear_predict8x8_media=vp8_bilinear_predict8x8_armv6; 224 225add_proto qw/void vp8_bilinear_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; 226specialize qw/vp8_bilinear_predict8x4 mmx media neon msa/; 227$vp8_bilinear_predict8x4_media=vp8_bilinear_predict8x4_armv6; 228 229add_proto qw/void vp8_bilinear_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; 230#TODO(johannkoenig): fix the neon version https://code.google.com/p/webm/issues/detail?id=892 231specialize qw/vp8_bilinear_predict4x4 mmx media msa/; 232$vp8_bilinear_predict4x4_media=vp8_bilinear_predict4x4_armv6; 233 234# 235# Encoder functions below this point. 236# 237if (vpx_config("CONFIG_VP8_ENCODER") eq "yes") { 238 239# 240# Block copy 241# 242if ($opts{arch} =~ /x86/) { 243 add_proto qw/void vp8_copy32xn/, "const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n"; 244 specialize qw/vp8_copy32xn sse2 sse3/; 245} 246 247# 248# Forward DCT 249# 250add_proto qw/void vp8_short_fdct4x4/, "short *input, short *output, int pitch"; 251specialize qw/vp8_short_fdct4x4 mmx sse2 media neon msa/; 252$vp8_short_fdct4x4_media=vp8_short_fdct4x4_armv6; 253 254add_proto qw/void vp8_short_fdct8x4/, "short *input, short *output, int pitch"; 255specialize qw/vp8_short_fdct8x4 mmx sse2 media neon msa/; 256$vp8_short_fdct8x4_media=vp8_short_fdct8x4_armv6; 257 258add_proto qw/void vp8_short_walsh4x4/, "short *input, short *output, int pitch"; 259specialize qw/vp8_short_walsh4x4 sse2 media neon msa/; 260$vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6; 261 262# 263# Quantizer 264# 265add_proto qw/void vp8_regular_quantize_b/, "struct block *, struct blockd *"; 266specialize qw/vp8_regular_quantize_b sse2 sse4_1 msa/; 267 268add_proto qw/void vp8_fast_quantize_b/, "struct block *, struct blockd *"; 269specialize qw/vp8_fast_quantize_b sse2 ssse3 neon msa/; 270 271# 272# Block subtraction 273# 274add_proto qw/int vp8_block_error/, "short *coeff, short *dqcoeff"; 275specialize qw/vp8_block_error mmx sse2 msa/; 276$vp8_block_error_sse2=vp8_block_error_xmm; 277 278add_proto qw/int vp8_mbblock_error/, "struct macroblock *mb, int dc"; 279specialize qw/vp8_mbblock_error mmx sse2 msa/; 280$vp8_mbblock_error_sse2=vp8_mbblock_error_xmm; 281 282add_proto qw/int vp8_mbuverror/, "struct macroblock *mb"; 283specialize qw/vp8_mbuverror mmx sse2 msa/; 284$vp8_mbuverror_sse2=vp8_mbuverror_xmm; 285 286# 287# Motion search 288# 289add_proto qw/int vp8_full_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"; 290specialize qw/vp8_full_search_sad sse3 sse4_1/; 291$vp8_full_search_sad_sse3=vp8_full_search_sadx3; 292$vp8_full_search_sad_sse4_1=vp8_full_search_sadx8; 293 294add_proto qw/int vp8_refining_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"; 295specialize qw/vp8_refining_search_sad sse3/; 296$vp8_refining_search_sad_sse3=vp8_refining_search_sadx4; 297 298add_proto qw/int vp8_diamond_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"; 299$vp8_diamond_search_sad_sse3=vp8_diamond_search_sadx4; 300 301# 302# Alt-ref Noise Reduction (ARNR) 303# 304if (vpx_config("CONFIG_REALTIME_ONLY") ne "yes") { 305 add_proto qw/void vp8_temporal_filter_apply/, "unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count"; 306 specialize qw/vp8_temporal_filter_apply sse2 msa/; 307} 308 309# 310# Denoiser filter 311# 312if (vpx_config("CONFIG_TEMPORAL_DENOISING") eq "yes") { 313 add_proto qw/int vp8_denoiser_filter/, "unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising"; 314 specialize qw/vp8_denoiser_filter sse2 neon msa/; 315 add_proto qw/int vp8_denoiser_filter_uv/, "unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising"; 316 specialize qw/vp8_denoiser_filter_uv sse2 neon msa/; 317} 318 319# End of encoder only functions 320} 3211; 322