1sub vp9_common_forward_decls() { 2print <<EOF 3/* 4 * VP9 5 */ 6 7#include "vpx/vpx_integer.h" 8#include "vp9/common/vp9_enums.h" 9 10struct macroblockd; 11 12/* Encoder forward decls */ 13struct macroblock; 14struct vp9_variance_vtable; 15 16#define DEC_MVCOSTS int *mvjcost, int *mvcost[2] 17struct mv; 18union int_mv; 19struct yv12_buffer_config; 20EOF 21} 22forward_decls qw/vp9_common_forward_decls/; 23 24# x86inc.asm doesn't work if pic is enabled on 32 bit platforms so no assembly. 25if (vpx_config("CONFIG_USE_X86INC") eq "yes") { 26 $mmx_x86inc = 'mmx'; 27 $sse_x86inc = 'sse'; 28 $sse2_x86inc = 'sse2'; 29 $ssse3_x86inc = 'ssse3'; 30 $avx_x86inc = 'avx'; 31 $avx2_x86inc = 'avx2'; 32} else { 33 $mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc = 34 $avx_x86inc = $avx2_x86inc = ''; 35} 36 37# this variable is for functions that are 64 bit only. 38if ($opts{arch} eq "x86_64") { 39 $mmx_x86_64 = 'mmx'; 40 $sse2_x86_64 = 'sse2'; 41 $ssse3_x86_64 = 'ssse3'; 42 $avx_x86_64 = 'avx'; 43 $avx2_x86_64 = 'avx2'; 44} else { 45 $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = 46 $avx_x86_64 = $avx2_x86_64 = ''; 47} 48 49# 50# RECON 51# 52add_proto qw/void vp9_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 53specialize qw/vp9_d207_predictor_4x4/, "$ssse3_x86inc"; 54 55add_proto qw/void vp9_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 56specialize qw/vp9_d45_predictor_4x4/, "$ssse3_x86inc"; 57 58add_proto qw/void vp9_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 59specialize qw/vp9_d63_predictor_4x4/, "$ssse3_x86inc"; 60 61add_proto qw/void vp9_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 62specialize qw/vp9_h_predictor_4x4 neon dspr2/, "$ssse3_x86inc"; 63 64add_proto qw/void vp9_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 65specialize qw/vp9_d117_predictor_4x4/; 66 67add_proto qw/void vp9_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 68specialize qw/vp9_d135_predictor_4x4/; 69 70add_proto qw/void vp9_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 71specialize qw/vp9_d153_predictor_4x4/, "$ssse3_x86inc"; 72 73add_proto qw/void vp9_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 74specialize qw/vp9_v_predictor_4x4 neon/, "$sse_x86inc"; 75 76add_proto qw/void vp9_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 77specialize qw/vp9_tm_predictor_4x4 neon dspr2/, "$sse_x86inc"; 78 79add_proto qw/void vp9_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 80specialize qw/vp9_dc_predictor_4x4 dspr2/, "$sse_x86inc"; 81 82add_proto qw/void vp9_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 83specialize qw/vp9_dc_top_predictor_4x4/; 84 85add_proto qw/void vp9_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 86specialize qw/vp9_dc_left_predictor_4x4/; 87 88add_proto qw/void vp9_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 89specialize qw/vp9_dc_128_predictor_4x4/; 90 91add_proto qw/void vp9_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 92specialize qw/vp9_d207_predictor_8x8/, "$ssse3_x86inc"; 93 94add_proto qw/void vp9_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 95specialize qw/vp9_d45_predictor_8x8/, "$ssse3_x86inc"; 96 97add_proto qw/void vp9_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 98specialize qw/vp9_d63_predictor_8x8/, "$ssse3_x86inc"; 99 100add_proto qw/void vp9_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 101specialize qw/vp9_h_predictor_8x8 neon dspr2/, "$ssse3_x86inc"; 102 103add_proto qw/void vp9_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 104specialize qw/vp9_d117_predictor_8x8/; 105 106add_proto qw/void vp9_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 107specialize qw/vp9_d135_predictor_8x8/; 108 109add_proto qw/void vp9_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 110specialize qw/vp9_d153_predictor_8x8/, "$ssse3_x86inc"; 111 112add_proto qw/void vp9_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 113specialize qw/vp9_v_predictor_8x8 neon/, "$sse_x86inc"; 114 115add_proto qw/void vp9_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 116specialize qw/vp9_tm_predictor_8x8 neon dspr2/, "$sse2_x86inc"; 117 118add_proto qw/void vp9_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 119specialize qw/vp9_dc_predictor_8x8 dspr2/, "$sse_x86inc"; 120 121add_proto qw/void vp9_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 122specialize qw/vp9_dc_top_predictor_8x8/; 123 124add_proto qw/void vp9_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 125specialize qw/vp9_dc_left_predictor_8x8/; 126 127add_proto qw/void vp9_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 128specialize qw/vp9_dc_128_predictor_8x8/; 129 130add_proto qw/void vp9_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 131specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc"; 132 133add_proto qw/void vp9_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 134specialize qw/vp9_d45_predictor_16x16/, "$ssse3_x86inc"; 135 136add_proto qw/void vp9_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 137specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc"; 138 139add_proto qw/void vp9_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 140specialize qw/vp9_h_predictor_16x16 neon dspr2/, "$ssse3_x86inc"; 141 142add_proto qw/void vp9_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 143specialize qw/vp9_d117_predictor_16x16/; 144 145add_proto qw/void vp9_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 146specialize qw/vp9_d135_predictor_16x16/; 147 148add_proto qw/void vp9_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 149specialize qw/vp9_d153_predictor_16x16/, "$ssse3_x86inc"; 150 151add_proto qw/void vp9_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 152specialize qw/vp9_v_predictor_16x16 neon/, "$sse2_x86inc"; 153 154add_proto qw/void vp9_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 155specialize qw/vp9_tm_predictor_16x16 neon/, "$sse2_x86inc"; 156 157add_proto qw/void vp9_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 158specialize qw/vp9_dc_predictor_16x16 dspr2/, "$sse2_x86inc"; 159 160add_proto qw/void vp9_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 161specialize qw/vp9_dc_top_predictor_16x16/; 162 163add_proto qw/void vp9_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 164specialize qw/vp9_dc_left_predictor_16x16/; 165 166add_proto qw/void vp9_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 167specialize qw/vp9_dc_128_predictor_16x16/; 168 169add_proto qw/void vp9_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 170specialize qw/vp9_d207_predictor_32x32/, "$ssse3_x86inc"; 171 172add_proto qw/void vp9_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 173specialize qw/vp9_d45_predictor_32x32/, "$ssse3_x86inc"; 174 175add_proto qw/void vp9_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 176specialize qw/vp9_d63_predictor_32x32/, "$ssse3_x86inc"; 177 178add_proto qw/void vp9_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 179specialize qw/vp9_h_predictor_32x32 neon/, "$ssse3_x86inc"; 180 181add_proto qw/void vp9_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 182specialize qw/vp9_d117_predictor_32x32/; 183 184add_proto qw/void vp9_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 185specialize qw/vp9_d135_predictor_32x32/; 186 187add_proto qw/void vp9_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 188specialize qw/vp9_d153_predictor_32x32/; 189 190add_proto qw/void vp9_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 191specialize qw/vp9_v_predictor_32x32 neon/, "$sse2_x86inc"; 192 193add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 194specialize qw/vp9_tm_predictor_32x32 neon/, "$sse2_x86_64"; 195 196add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 197specialize qw/vp9_dc_predictor_32x32/, "$sse2_x86inc"; 198 199add_proto qw/void vp9_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 200specialize qw/vp9_dc_top_predictor_32x32/; 201 202add_proto qw/void vp9_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 203specialize qw/vp9_dc_left_predictor_32x32/; 204 205add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 206specialize qw/vp9_dc_128_predictor_32x32/; 207 208# 209# Loopfilter 210# 211add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 212specialize qw/vp9_lpf_vertical_16 sse2 neon dspr2/; 213 214add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 215specialize qw/vp9_lpf_vertical_16_dual sse2 neon dspr2/; 216 217add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; 218specialize qw/vp9_lpf_vertical_8 sse2 neon dspr2/; 219 220add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 221specialize qw/vp9_lpf_vertical_8_dual sse2 neon dspr2/; 222 223add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; 224specialize qw/vp9_lpf_vertical_4 mmx neon dspr2/; 225 226add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 227specialize qw/vp9_lpf_vertical_4_dual sse2 neon dspr2/; 228 229add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; 230specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon dspr2/; 231 232add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; 233specialize qw/vp9_lpf_horizontal_8 sse2 neon dspr2/; 234 235add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 236specialize qw/vp9_lpf_horizontal_8_dual sse2 neon dspr2/; 237 238add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; 239specialize qw/vp9_lpf_horizontal_4 mmx neon dspr2/; 240 241add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 242specialize qw/vp9_lpf_horizontal_4_dual sse2 neon dspr2/; 243 244# 245# post proc 246# 247if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") { 248add_proto qw/void vp9_mbpost_proc_down/, "uint8_t *dst, int pitch, int rows, int cols, int flimit"; 249specialize qw/vp9_mbpost_proc_down mmx sse2/; 250$vp9_mbpost_proc_down_sse2=vp9_mbpost_proc_down_xmm; 251 252add_proto qw/void vp9_mbpost_proc_across_ip/, "uint8_t *src, int pitch, int rows, int cols, int flimit"; 253specialize qw/vp9_mbpost_proc_across_ip sse2/; 254$vp9_mbpost_proc_across_ip_sse2=vp9_mbpost_proc_across_ip_xmm; 255 256add_proto qw/void vp9_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit"; 257specialize qw/vp9_post_proc_down_and_across mmx sse2/; 258$vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm; 259 260add_proto qw/void vp9_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch"; 261specialize qw/vp9_plane_add_noise mmx sse2/; 262$vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt; 263} 264 265add_proto qw/void vp9_blend_mb_inner/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"; 266specialize qw/vp9_blend_mb_inner/; 267 268add_proto qw/void vp9_blend_mb_outer/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"; 269specialize qw/vp9_blend_mb_outer/; 270 271add_proto qw/void vp9_blend_b/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"; 272specialize qw/vp9_blend_b/; 273 274# 275# Sub Pixel Filters 276# 277add_proto qw/void vp9_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; 278specialize qw/vp9_convolve_copy neon dspr2/, "$sse2_x86inc"; 279 280add_proto qw/void vp9_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; 281specialize qw/vp9_convolve_avg neon dspr2/, "$sse2_x86inc"; 282 283add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; 284specialize qw/vp9_convolve8 sse2 ssse3 avx2 neon dspr2/; 285 286add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; 287specialize qw/vp9_convolve8_horiz sse2 ssse3 avx2 neon dspr2/; 288 289add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; 290specialize qw/vp9_convolve8_vert sse2 ssse3 avx2 neon dspr2/; 291 292add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; 293specialize qw/vp9_convolve8_avg sse2 ssse3 neon dspr2/; 294 295add_proto qw/void vp9_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; 296specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2/; 297 298add_proto qw/void vp9_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; 299specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon dspr2/; 300 301# 302# dct 303# 304add_proto qw/void vp9_idct4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 305specialize qw/vp9_idct4x4_1_add sse2 neon dspr2/; 306 307add_proto qw/void vp9_idct4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 308specialize qw/vp9_idct4x4_16_add sse2 neon dspr2/; 309 310add_proto qw/void vp9_idct8x8_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 311specialize qw/vp9_idct8x8_1_add sse2 neon dspr2/; 312 313add_proto qw/void vp9_idct8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 314specialize qw/vp9_idct8x8_64_add sse2 neon dspr2/; 315 316add_proto qw/void vp9_idct8x8_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 317specialize qw/vp9_idct8x8_10_add sse2 neon dspr2/; 318 319add_proto qw/void vp9_idct16x16_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 320specialize qw/vp9_idct16x16_1_add sse2 neon dspr2/; 321 322add_proto qw/void vp9_idct16x16_256_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 323specialize qw/vp9_idct16x16_256_add sse2 neon dspr2/; 324 325add_proto qw/void vp9_idct16x16_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 326specialize qw/vp9_idct16x16_10_add sse2 neon dspr2/; 327 328add_proto qw/void vp9_idct32x32_1024_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 329specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2/; 330 331add_proto qw/void vp9_idct32x32_34_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 332specialize qw/vp9_idct32x32_34_add sse2 neon dspr2/; 333$vp9_idct32x32_34_add_neon=vp9_idct32x32_1024_add_neon; 334 335add_proto qw/void vp9_idct32x32_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 336specialize qw/vp9_idct32x32_1_add sse2 neon dspr2/; 337 338add_proto qw/void vp9_iht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type"; 339specialize qw/vp9_iht4x4_16_add sse2 neon dspr2/; 340 341add_proto qw/void vp9_iht8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type"; 342specialize qw/vp9_iht8x8_64_add sse2 neon dspr2/; 343 344add_proto qw/void vp9_iht16x16_256_add/, "const int16_t *input, uint8_t *output, int pitch, int tx_type"; 345specialize qw/vp9_iht16x16_256_add sse2 dspr2/; 346 347# dct and add 348 349add_proto qw/void vp9_iwht4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 350specialize qw/vp9_iwht4x4_1_add/; 351 352add_proto qw/void vp9_iwht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 353specialize qw/vp9_iwht4x4_16_add/; 354 355# 356# Encoder functions below this point. 357# 358if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { 359 360 361# variance 362add_proto qw/unsigned int vp9_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 363specialize qw/vp9_variance32x16/, "$sse2_x86inc", "$avx2_x86inc"; 364 365add_proto qw/unsigned int vp9_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 366specialize qw/vp9_variance16x32/, "$sse2_x86inc"; 367 368add_proto qw/unsigned int vp9_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 369specialize qw/vp9_variance64x32/, "$sse2_x86inc", "$avx2_x86inc"; 370 371add_proto qw/unsigned int vp9_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 372specialize qw/vp9_variance32x64/, "$sse2_x86inc"; 373 374add_proto qw/unsigned int vp9_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 375specialize qw/vp9_variance32x32/, "$sse2_x86inc", "$avx2_x86inc"; 376 377add_proto qw/unsigned int vp9_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 378specialize qw/vp9_variance64x64/, "$sse2_x86inc", "$avx2_x86inc"; 379 380add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 381specialize qw/vp9_variance16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc"; 382 383add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 384specialize qw/vp9_variance16x8 mmx/, "$sse2_x86inc"; 385 386add_proto qw/unsigned int vp9_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 387specialize qw/vp9_variance8x16 mmx/, "$sse2_x86inc"; 388 389add_proto qw/unsigned int vp9_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 390specialize qw/vp9_variance8x8 mmx/, "$sse2_x86inc"; 391 392add_proto qw/void vp9_get_sse_sum_8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; 393specialize qw/vp9_get_sse_sum_8x8 sse2/; 394$vp9_get_sse_sum_8x8_sse2=vp9_get8x8var_sse2; 395 396add_proto qw/unsigned int vp9_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 397specialize qw/vp9_variance8x4/, "$sse2_x86inc"; 398 399add_proto qw/unsigned int vp9_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 400specialize qw/vp9_variance4x8/, "$sse2_x86inc"; 401 402add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 403specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc"; 404 405add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 406specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; 407 408add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 409specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; 410 411add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 412specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc"; 413 414add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 415specialize qw/vp9_sub_pixel_avg_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc"; 416 417add_proto qw/unsigned int vp9_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 418specialize qw/vp9_sub_pixel_variance64x32/, "$sse2_x86inc", "$ssse3_x86inc"; 419 420add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 421specialize qw/vp9_sub_pixel_avg_variance64x32/, "$sse2_x86inc", "$ssse3_x86inc"; 422 423add_proto qw/unsigned int vp9_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 424specialize qw/vp9_sub_pixel_variance32x16/, "$sse2_x86inc", "$ssse3_x86inc"; 425 426add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 427specialize qw/vp9_sub_pixel_avg_variance32x16/, "$sse2_x86inc", "$ssse3_x86inc"; 428 429add_proto qw/unsigned int vp9_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 430specialize qw/vp9_sub_pixel_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc"; 431 432add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 433specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc"; 434 435add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 436specialize qw/vp9_sub_pixel_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; 437 438add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 439specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; 440 441add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 442specialize qw/vp9_sub_pixel_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc"; 443 444add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 445specialize qw/vp9_sub_pixel_avg_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc"; 446 447add_proto qw/unsigned int vp9_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 448specialize qw/vp9_sub_pixel_variance8x16/, "$sse2_x86inc", "$ssse3_x86inc"; 449 450add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 451specialize qw/vp9_sub_pixel_avg_variance8x16/, "$sse2_x86inc", "$ssse3_x86inc"; 452 453add_proto qw/unsigned int vp9_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 454specialize qw/vp9_sub_pixel_variance16x8/, "$sse2_x86inc", "$ssse3_x86inc"; 455 456add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 457specialize qw/vp9_sub_pixel_avg_variance16x8/, "$sse2_x86inc", "$ssse3_x86inc"; 458 459add_proto qw/unsigned int vp9_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 460specialize qw/vp9_sub_pixel_variance8x8/, "$sse2_x86inc", "$ssse3_x86inc"; 461 462add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 463specialize qw/vp9_sub_pixel_avg_variance8x8/, "$sse2_x86inc", "$ssse3_x86inc"; 464 465# TODO(jingning): need to convert 8x4/4x8 functions into mmx/sse form 466add_proto qw/unsigned int vp9_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 467specialize qw/vp9_sub_pixel_variance8x4/, "$sse2_x86inc", "$ssse3_x86inc"; 468 469add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 470specialize qw/vp9_sub_pixel_avg_variance8x4/, "$sse2_x86inc", "$ssse3_x86inc"; 471 472add_proto qw/unsigned int vp9_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 473specialize qw/vp9_sub_pixel_variance4x8/, "$sse_x86inc", "$ssse3_x86inc"; 474 475add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 476specialize qw/vp9_sub_pixel_avg_variance4x8/, "$sse_x86inc", "$ssse3_x86inc"; 477 478add_proto qw/unsigned int vp9_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 479specialize qw/vp9_sub_pixel_variance4x4/, "$sse_x86inc", "$ssse3_x86inc"; 480#vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt 481 482add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 483specialize qw/vp9_sub_pixel_avg_variance4x4/, "$sse_x86inc", "$ssse3_x86inc"; 484 485add_proto qw/unsigned int vp9_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 486specialize qw/vp9_sad64x64/, "$sse2_x86inc"; 487 488add_proto qw/unsigned int vp9_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 489specialize qw/vp9_sad32x64/, "$sse2_x86inc"; 490 491add_proto qw/unsigned int vp9_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 492specialize qw/vp9_sad64x32/, "$sse2_x86inc"; 493 494add_proto qw/unsigned int vp9_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 495specialize qw/vp9_sad32x16/, "$sse2_x86inc"; 496 497add_proto qw/unsigned int vp9_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 498specialize qw/vp9_sad16x32/, "$sse2_x86inc"; 499 500add_proto qw/unsigned int vp9_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 501specialize qw/vp9_sad32x32/, "$sse2_x86inc"; 502 503add_proto qw/unsigned int vp9_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 504specialize qw/vp9_sad16x16 mmx/, "$sse2_x86inc"; 505 506add_proto qw/unsigned int vp9_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 507specialize qw/vp9_sad16x8 mmx/, "$sse2_x86inc"; 508 509add_proto qw/unsigned int vp9_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 510specialize qw/vp9_sad8x16 mmx/, "$sse2_x86inc"; 511 512add_proto qw/unsigned int vp9_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 513specialize qw/vp9_sad8x8 mmx/, "$sse2_x86inc"; 514 515add_proto qw/unsigned int vp9_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 516specialize qw/vp9_sad8x4/, "$sse2_x86inc"; 517 518add_proto qw/unsigned int vp9_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 519specialize qw/vp9_sad4x8/, "$sse_x86inc"; 520 521add_proto qw/unsigned int vp9_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 522specialize qw/vp9_sad4x4 mmx/, "$sse_x86inc"; 523 524add_proto qw/unsigned int vp9_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 525specialize qw/vp9_sad64x64_avg/, "$sse2_x86inc"; 526 527add_proto qw/unsigned int vp9_sad32x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 528specialize qw/vp9_sad32x64_avg/, "$sse2_x86inc"; 529 530add_proto qw/unsigned int vp9_sad64x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 531specialize qw/vp9_sad64x32_avg/, "$sse2_x86inc"; 532 533add_proto qw/unsigned int vp9_sad32x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 534specialize qw/vp9_sad32x16_avg/, "$sse2_x86inc"; 535 536add_proto qw/unsigned int vp9_sad16x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 537specialize qw/vp9_sad16x32_avg/, "$sse2_x86inc"; 538 539add_proto qw/unsigned int vp9_sad32x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 540specialize qw/vp9_sad32x32_avg/, "$sse2_x86inc"; 541 542add_proto qw/unsigned int vp9_sad16x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 543specialize qw/vp9_sad16x16_avg/, "$sse2_x86inc"; 544 545add_proto qw/unsigned int vp9_sad16x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 546specialize qw/vp9_sad16x8_avg/, "$sse2_x86inc"; 547 548add_proto qw/unsigned int vp9_sad8x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 549specialize qw/vp9_sad8x16_avg/, "$sse2_x86inc"; 550 551add_proto qw/unsigned int vp9_sad8x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 552specialize qw/vp9_sad8x8_avg/, "$sse2_x86inc"; 553 554add_proto qw/unsigned int vp9_sad8x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 555specialize qw/vp9_sad8x4_avg/, "$sse2_x86inc"; 556 557add_proto qw/unsigned int vp9_sad4x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 558specialize qw/vp9_sad4x8_avg/, "$sse_x86inc"; 559 560add_proto qw/unsigned int vp9_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 561specialize qw/vp9_sad4x4_avg/, "$sse_x86inc"; 562 563add_proto qw/unsigned int vp9_variance_halfpixvar16x16_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 564specialize qw/vp9_variance_halfpixvar16x16_h/, "$sse2_x86inc"; 565 566add_proto qw/unsigned int vp9_variance_halfpixvar16x16_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 567specialize qw/vp9_variance_halfpixvar16x16_v/, "$sse2_x86inc"; 568 569add_proto qw/unsigned int vp9_variance_halfpixvar16x16_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 570specialize qw/vp9_variance_halfpixvar16x16_hv/, "$sse2_x86inc"; 571 572add_proto qw/unsigned int vp9_variance_halfpixvar64x64_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 573specialize qw/vp9_variance_halfpixvar64x64_h/; 574 575add_proto qw/unsigned int vp9_variance_halfpixvar64x64_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 576specialize qw/vp9_variance_halfpixvar64x64_v/; 577 578add_proto qw/unsigned int vp9_variance_halfpixvar64x64_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 579specialize qw/vp9_variance_halfpixvar64x64_hv/; 580 581add_proto qw/unsigned int vp9_variance_halfpixvar32x32_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 582specialize qw/vp9_variance_halfpixvar32x32_h/; 583 584add_proto qw/unsigned int vp9_variance_halfpixvar32x32_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 585specialize qw/vp9_variance_halfpixvar32x32_v/; 586 587add_proto qw/unsigned int vp9_variance_halfpixvar32x32_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 588specialize qw/vp9_variance_halfpixvar32x32_hv/; 589 590add_proto qw/void vp9_sad64x64x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; 591specialize qw/vp9_sad64x64x3/; 592 593add_proto qw/void vp9_sad32x32x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; 594specialize qw/vp9_sad32x32x3/; 595 596add_proto qw/void vp9_sad16x16x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; 597specialize qw/vp9_sad16x16x3 sse3 ssse3/; 598 599add_proto qw/void vp9_sad16x8x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; 600specialize qw/vp9_sad16x8x3 sse3 ssse3/; 601 602add_proto qw/void vp9_sad8x16x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; 603specialize qw/vp9_sad8x16x3 sse3/; 604 605add_proto qw/void vp9_sad8x8x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; 606specialize qw/vp9_sad8x8x3 sse3/; 607 608add_proto qw/void vp9_sad4x4x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; 609specialize qw/vp9_sad4x4x3 sse3/; 610 611add_proto qw/void vp9_sad64x64x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; 612specialize qw/vp9_sad64x64x8/; 613 614add_proto qw/void vp9_sad32x32x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; 615specialize qw/vp9_sad32x32x8/; 616 617add_proto qw/void vp9_sad16x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; 618specialize qw/vp9_sad16x16x8 sse4/; 619 620add_proto qw/void vp9_sad16x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; 621specialize qw/vp9_sad16x8x8 sse4/; 622 623add_proto qw/void vp9_sad8x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; 624specialize qw/vp9_sad8x16x8 sse4/; 625 626add_proto qw/void vp9_sad8x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; 627specialize qw/vp9_sad8x8x8 sse4/; 628 629add_proto qw/void vp9_sad8x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; 630specialize qw/vp9_sad8x4x8/; 631 632add_proto qw/void vp9_sad4x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; 633specialize qw/vp9_sad4x8x8/; 634 635add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; 636specialize qw/vp9_sad4x4x8 sse4/; 637 638add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 639specialize qw/vp9_sad64x64x4d sse2 avx2/; 640 641add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 642specialize qw/vp9_sad32x64x4d sse2/; 643 644add_proto qw/void vp9_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 645specialize qw/vp9_sad64x32x4d sse2/; 646 647add_proto qw/void vp9_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 648specialize qw/vp9_sad32x16x4d sse2/; 649 650add_proto qw/void vp9_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 651specialize qw/vp9_sad16x32x4d sse2/; 652 653add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 654specialize qw/vp9_sad32x32x4d sse2 avx2/; 655 656add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 657specialize qw/vp9_sad16x16x4d sse2/; 658 659add_proto qw/void vp9_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 660specialize qw/vp9_sad16x8x4d sse2/; 661 662add_proto qw/void vp9_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 663specialize qw/vp9_sad8x16x4d sse2/; 664 665add_proto qw/void vp9_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 666specialize qw/vp9_sad8x8x4d sse2/; 667 668# TODO(jingning): need to convert these 4x8/8x4 functions into sse2 form 669add_proto qw/void vp9_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 670specialize qw/vp9_sad8x4x4d sse2/; 671 672add_proto qw/void vp9_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 673specialize qw/vp9_sad4x8x4d sse/; 674 675add_proto qw/void vp9_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 676specialize qw/vp9_sad4x4x4d sse/; 677 678#add_proto qw/unsigned int vp9_sub_pixel_mse16x16/, "const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse"; 679#specialize qw/vp9_sub_pixel_mse16x16 sse2 mmx/; 680 681add_proto qw/unsigned int vp9_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 682specialize qw/vp9_mse16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc"; 683 684add_proto qw/unsigned int vp9_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 685specialize qw/vp9_mse8x16/; 686 687add_proto qw/unsigned int vp9_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 688specialize qw/vp9_mse16x8/; 689 690add_proto qw/unsigned int vp9_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 691specialize qw/vp9_mse8x8/; 692 693add_proto qw/unsigned int vp9_sub_pixel_mse64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 694specialize qw/vp9_sub_pixel_mse64x64/; 695 696add_proto qw/unsigned int vp9_sub_pixel_mse32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 697specialize qw/vp9_sub_pixel_mse32x32/; 698 699add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *"; 700specialize qw/vp9_get_mb_ss mmx sse2/; 701# ENCODEMB INVOKE 702 703add_proto qw/int64_t vp9_block_error/, "const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size, int64_t *ssz"; 704specialize qw/vp9_block_error/, "$sse2_x86inc"; 705 706add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"; 707specialize qw/vp9_subtract_block/, "$sse2_x86inc"; 708 709add_proto qw/void vp9_quantize_b/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 710specialize qw/vp9_quantize_b/, "$ssse3_x86_64"; 711 712add_proto qw/void vp9_quantize_b_32x32/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 713specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64"; 714 715# 716# Structured Similarity (SSIM) 717# 718if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") { 719 add_proto qw/void vp9_ssim_parms_8x8/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"; 720 specialize qw/vp9_ssim_parms_8x8/, "$sse2_x86_64"; 721 722 add_proto qw/void vp9_ssim_parms_16x16/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"; 723 specialize qw/vp9_ssim_parms_16x16/, "$sse2_x86_64"; 724} 725 726# fdct functions 727add_proto qw/void vp9_fht4x4/, "const int16_t *input, int16_t *output, int stride, int tx_type"; 728specialize qw/vp9_fht4x4 sse2 avx2/; 729 730add_proto qw/void vp9_fht8x8/, "const int16_t *input, int16_t *output, int stride, int tx_type"; 731specialize qw/vp9_fht8x8 sse2 avx2/; 732 733add_proto qw/void vp9_fht16x16/, "const int16_t *input, int16_t *output, int stride, int tx_type"; 734specialize qw/vp9_fht16x16 sse2 avx2/; 735 736add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride"; 737specialize qw/vp9_fwht4x4/; 738 739add_proto qw/void vp9_fdct4x4/, "const int16_t *input, int16_t *output, int stride"; 740specialize qw/vp9_fdct4x4 sse2 avx2/; 741 742add_proto qw/void vp9_fdct8x8/, "const int16_t *input, int16_t *output, int stride"; 743specialize qw/vp9_fdct8x8 sse2 avx2/; 744 745add_proto qw/void vp9_fdct16x16/, "const int16_t *input, int16_t *output, int stride"; 746specialize qw/vp9_fdct16x16 sse2 avx2/; 747 748add_proto qw/void vp9_fdct32x32/, "const int16_t *input, int16_t *output, int stride"; 749specialize qw/vp9_fdct32x32 sse2 avx2/; 750 751add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, int16_t *output, int stride"; 752specialize qw/vp9_fdct32x32_rd sse2 avx2/; 753 754# 755# Motion search 756# 757add_proto qw/int vp9_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv, struct mv *best_mv"; 758specialize qw/vp9_full_search_sad sse3 sse4_1/; 759$vp9_full_search_sad_sse3=vp9_full_search_sadx3; 760$vp9_full_search_sad_sse4_1=vp9_full_search_sadx8; 761 762add_proto qw/int vp9_refining_search_sad/, "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"; 763specialize qw/vp9_refining_search_sad sse3/; 764$vp9_refining_search_sad_sse3=vp9_refining_search_sadx4; 765 766add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"; 767specialize qw/vp9_diamond_search_sad sse3/; 768$vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4; 769 770add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"; 771specialize qw/vp9_full_range_search/; 772 773add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"; 774specialize qw/vp9_temporal_filter_apply sse2/; 775 776} 777# end encoder functions 7781; 779