1sub vp9_common_forward_decls() {
2print <<EOF
3/*
4 * VP9
5 */
6
7#include "vpx/vpx_integer.h"
8#include "vp9/common/vp9_enums.h"
9
10struct macroblockd;
11
12/* Encoder forward decls */
13struct macroblock;
14struct vp9_variance_vtable;
15
16#define DEC_MVCOSTS int *mvjcost, int *mvcost[2]
17struct mv;
18union int_mv;
19struct yv12_buffer_config;
20EOF
21}
22forward_decls qw/vp9_common_forward_decls/;
23
24# x86inc.asm doesn't work if pic is enabled on 32 bit platforms so no assembly.
25if (vpx_config("CONFIG_USE_X86INC") eq "yes") {
26  $mmx_x86inc = 'mmx';
27  $sse_x86inc = 'sse';
28  $sse2_x86inc = 'sse2';
29  $ssse3_x86inc = 'ssse3';
30  $avx_x86inc = 'avx';
31  $avx2_x86inc = 'avx2';
32} else {
33  $mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc =
34  $avx_x86inc = $avx2_x86inc = '';
35}
36
37# this variable is for functions that are 64 bit only.
38if ($opts{arch} eq "x86_64") {
39  $mmx_x86_64 = 'mmx';
40  $sse2_x86_64 = 'sse2';
41  $ssse3_x86_64 = 'ssse3';
42  $avx_x86_64 = 'avx';
43  $avx2_x86_64 = 'avx2';
44} else {
45  $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 =
46  $avx_x86_64 = $avx2_x86_64 = '';
47}
48
49#
50# RECON
51#
52add_proto qw/void vp9_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
53specialize qw/vp9_d207_predictor_4x4/, "$ssse3_x86inc";
54
55add_proto qw/void vp9_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
56specialize qw/vp9_d45_predictor_4x4/, "$ssse3_x86inc";
57
58add_proto qw/void vp9_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
59specialize qw/vp9_d63_predictor_4x4/, "$ssse3_x86inc";
60
61add_proto qw/void vp9_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
62specialize qw/vp9_h_predictor_4x4 neon dspr2/, "$ssse3_x86inc";
63
64add_proto qw/void vp9_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
65specialize qw/vp9_d117_predictor_4x4/;
66
67add_proto qw/void vp9_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
68specialize qw/vp9_d135_predictor_4x4/;
69
70add_proto qw/void vp9_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
71specialize qw/vp9_d153_predictor_4x4/, "$ssse3_x86inc";
72
73add_proto qw/void vp9_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
74specialize qw/vp9_v_predictor_4x4 neon/, "$sse_x86inc";
75
76add_proto qw/void vp9_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
77specialize qw/vp9_tm_predictor_4x4 neon dspr2/, "$sse_x86inc";
78
79add_proto qw/void vp9_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
80specialize qw/vp9_dc_predictor_4x4 dspr2/, "$sse_x86inc";
81
82add_proto qw/void vp9_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
83specialize qw/vp9_dc_top_predictor_4x4/;
84
85add_proto qw/void vp9_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
86specialize qw/vp9_dc_left_predictor_4x4/;
87
88add_proto qw/void vp9_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
89specialize qw/vp9_dc_128_predictor_4x4/;
90
91add_proto qw/void vp9_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
92specialize qw/vp9_d207_predictor_8x8/, "$ssse3_x86inc";
93
94add_proto qw/void vp9_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
95specialize qw/vp9_d45_predictor_8x8/, "$ssse3_x86inc";
96
97add_proto qw/void vp9_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
98specialize qw/vp9_d63_predictor_8x8/, "$ssse3_x86inc";
99
100add_proto qw/void vp9_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
101specialize qw/vp9_h_predictor_8x8 neon dspr2/, "$ssse3_x86inc";
102
103add_proto qw/void vp9_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
104specialize qw/vp9_d117_predictor_8x8/;
105
106add_proto qw/void vp9_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
107specialize qw/vp9_d135_predictor_8x8/;
108
109add_proto qw/void vp9_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
110specialize qw/vp9_d153_predictor_8x8/, "$ssse3_x86inc";
111
112add_proto qw/void vp9_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
113specialize qw/vp9_v_predictor_8x8 neon/, "$sse_x86inc";
114
115add_proto qw/void vp9_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
116specialize qw/vp9_tm_predictor_8x8 neon dspr2/, "$sse2_x86inc";
117
118add_proto qw/void vp9_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
119specialize qw/vp9_dc_predictor_8x8 dspr2/, "$sse_x86inc";
120
121add_proto qw/void vp9_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
122specialize qw/vp9_dc_top_predictor_8x8/;
123
124add_proto qw/void vp9_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
125specialize qw/vp9_dc_left_predictor_8x8/;
126
127add_proto qw/void vp9_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
128specialize qw/vp9_dc_128_predictor_8x8/;
129
130add_proto qw/void vp9_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
131specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc";
132
133add_proto qw/void vp9_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
134specialize qw/vp9_d45_predictor_16x16/, "$ssse3_x86inc";
135
136add_proto qw/void vp9_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
137specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc";
138
139add_proto qw/void vp9_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
140specialize qw/vp9_h_predictor_16x16 neon dspr2/, "$ssse3_x86inc";
141
142add_proto qw/void vp9_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
143specialize qw/vp9_d117_predictor_16x16/;
144
145add_proto qw/void vp9_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
146specialize qw/vp9_d135_predictor_16x16/;
147
148add_proto qw/void vp9_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
149specialize qw/vp9_d153_predictor_16x16/, "$ssse3_x86inc";
150
151add_proto qw/void vp9_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
152specialize qw/vp9_v_predictor_16x16 neon/, "$sse2_x86inc";
153
154add_proto qw/void vp9_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
155specialize qw/vp9_tm_predictor_16x16 neon/, "$sse2_x86inc";
156
157add_proto qw/void vp9_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
158specialize qw/vp9_dc_predictor_16x16 dspr2/, "$sse2_x86inc";
159
160add_proto qw/void vp9_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
161specialize qw/vp9_dc_top_predictor_16x16/;
162
163add_proto qw/void vp9_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
164specialize qw/vp9_dc_left_predictor_16x16/;
165
166add_proto qw/void vp9_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
167specialize qw/vp9_dc_128_predictor_16x16/;
168
169add_proto qw/void vp9_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
170specialize qw/vp9_d207_predictor_32x32/, "$ssse3_x86inc";
171
172add_proto qw/void vp9_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
173specialize qw/vp9_d45_predictor_32x32/, "$ssse3_x86inc";
174
175add_proto qw/void vp9_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
176specialize qw/vp9_d63_predictor_32x32/, "$ssse3_x86inc";
177
178add_proto qw/void vp9_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
179specialize qw/vp9_h_predictor_32x32 neon/, "$ssse3_x86inc";
180
181add_proto qw/void vp9_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
182specialize qw/vp9_d117_predictor_32x32/;
183
184add_proto qw/void vp9_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
185specialize qw/vp9_d135_predictor_32x32/;
186
187add_proto qw/void vp9_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
188specialize qw/vp9_d153_predictor_32x32/;
189
190add_proto qw/void vp9_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
191specialize qw/vp9_v_predictor_32x32 neon/, "$sse2_x86inc";
192
193add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
194specialize qw/vp9_tm_predictor_32x32 neon/, "$sse2_x86_64";
195
196add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
197specialize qw/vp9_dc_predictor_32x32/, "$sse2_x86inc";
198
199add_proto qw/void vp9_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
200specialize qw/vp9_dc_top_predictor_32x32/;
201
202add_proto qw/void vp9_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
203specialize qw/vp9_dc_left_predictor_32x32/;
204
205add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
206specialize qw/vp9_dc_128_predictor_32x32/;
207
208#
209# Loopfilter
210#
211add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
212specialize qw/vp9_lpf_vertical_16 sse2 neon dspr2/;
213
214add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
215specialize qw/vp9_lpf_vertical_16_dual sse2 neon dspr2/;
216
217add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
218specialize qw/vp9_lpf_vertical_8 sse2 neon dspr2/;
219
220add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
221specialize qw/vp9_lpf_vertical_8_dual sse2 neon dspr2/;
222
223add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
224specialize qw/vp9_lpf_vertical_4 mmx neon dspr2/;
225
226add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
227specialize qw/vp9_lpf_vertical_4_dual sse2 neon dspr2/;
228
229add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
230specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon dspr2/;
231
232add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
233specialize qw/vp9_lpf_horizontal_8 sse2 neon dspr2/;
234
235add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
236specialize qw/vp9_lpf_horizontal_8_dual sse2 neon dspr2/;
237
238add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
239specialize qw/vp9_lpf_horizontal_4 mmx neon dspr2/;
240
241add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
242specialize qw/vp9_lpf_horizontal_4_dual sse2 neon dspr2/;
243
244#
245# post proc
246#
247if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
248add_proto qw/void vp9_mbpost_proc_down/, "uint8_t *dst, int pitch, int rows, int cols, int flimit";
249specialize qw/vp9_mbpost_proc_down mmx sse2/;
250$vp9_mbpost_proc_down_sse2=vp9_mbpost_proc_down_xmm;
251
252add_proto qw/void vp9_mbpost_proc_across_ip/, "uint8_t *src, int pitch, int rows, int cols, int flimit";
253specialize qw/vp9_mbpost_proc_across_ip sse2/;
254$vp9_mbpost_proc_across_ip_sse2=vp9_mbpost_proc_across_ip_xmm;
255
256add_proto qw/void vp9_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
257specialize qw/vp9_post_proc_down_and_across mmx sse2/;
258$vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm;
259
260add_proto qw/void vp9_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
261specialize qw/vp9_plane_add_noise mmx sse2/;
262$vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt;
263}
264
265add_proto qw/void vp9_blend_mb_inner/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride";
266specialize qw/vp9_blend_mb_inner/;
267
268add_proto qw/void vp9_blend_mb_outer/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride";
269specialize qw/vp9_blend_mb_outer/;
270
271add_proto qw/void vp9_blend_b/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride";
272specialize qw/vp9_blend_b/;
273
274#
275# Sub Pixel Filters
276#
277add_proto qw/void vp9_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
278specialize qw/vp9_convolve_copy neon dspr2/, "$sse2_x86inc";
279
280add_proto qw/void vp9_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
281specialize qw/vp9_convolve_avg neon dspr2/, "$sse2_x86inc";
282
283add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
284specialize qw/vp9_convolve8 sse2 ssse3 avx2 neon dspr2/;
285
286add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
287specialize qw/vp9_convolve8_horiz sse2 ssse3 avx2 neon dspr2/;
288
289add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
290specialize qw/vp9_convolve8_vert sse2 ssse3 avx2 neon dspr2/;
291
292add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
293specialize qw/vp9_convolve8_avg sse2 ssse3 neon dspr2/;
294
295add_proto qw/void vp9_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
296specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2/;
297
298add_proto qw/void vp9_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
299specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon dspr2/;
300
301#
302# dct
303#
304add_proto qw/void vp9_idct4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
305specialize qw/vp9_idct4x4_1_add sse2 neon dspr2/;
306
307add_proto qw/void vp9_idct4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
308specialize qw/vp9_idct4x4_16_add sse2 neon dspr2/;
309
310add_proto qw/void vp9_idct8x8_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
311specialize qw/vp9_idct8x8_1_add sse2 neon dspr2/;
312
313add_proto qw/void vp9_idct8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
314specialize qw/vp9_idct8x8_64_add sse2 neon dspr2/;
315
316add_proto qw/void vp9_idct8x8_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
317specialize qw/vp9_idct8x8_10_add sse2 neon dspr2/;
318
319add_proto qw/void vp9_idct16x16_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
320specialize qw/vp9_idct16x16_1_add sse2 neon dspr2/;
321
322add_proto qw/void vp9_idct16x16_256_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
323specialize qw/vp9_idct16x16_256_add sse2 neon dspr2/;
324
325add_proto qw/void vp9_idct16x16_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
326specialize qw/vp9_idct16x16_10_add sse2 neon dspr2/;
327
328add_proto qw/void vp9_idct32x32_1024_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
329specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2/;
330
331add_proto qw/void vp9_idct32x32_34_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
332specialize qw/vp9_idct32x32_34_add sse2 neon dspr2/;
333$vp9_idct32x32_34_add_neon=vp9_idct32x32_1024_add_neon;
334
335add_proto qw/void vp9_idct32x32_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
336specialize qw/vp9_idct32x32_1_add sse2 neon dspr2/;
337
338add_proto qw/void vp9_iht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type";
339specialize qw/vp9_iht4x4_16_add sse2 neon dspr2/;
340
341add_proto qw/void vp9_iht8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type";
342specialize qw/vp9_iht8x8_64_add sse2 neon dspr2/;
343
344add_proto qw/void vp9_iht16x16_256_add/, "const int16_t *input, uint8_t *output, int pitch, int tx_type";
345specialize qw/vp9_iht16x16_256_add sse2 dspr2/;
346
347# dct and add
348
349add_proto qw/void vp9_iwht4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
350specialize qw/vp9_iwht4x4_1_add/;
351
352add_proto qw/void vp9_iwht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
353specialize qw/vp9_iwht4x4_16_add/;
354
355#
356# Encoder functions below this point.
357#
358if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
359
360
361# variance
362add_proto qw/unsigned int vp9_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
363specialize qw/vp9_variance32x16/, "$sse2_x86inc", "$avx2_x86inc";
364
365add_proto qw/unsigned int vp9_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
366specialize qw/vp9_variance16x32/, "$sse2_x86inc";
367
368add_proto qw/unsigned int vp9_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
369specialize qw/vp9_variance64x32/, "$sse2_x86inc", "$avx2_x86inc";
370
371add_proto qw/unsigned int vp9_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
372specialize qw/vp9_variance32x64/, "$sse2_x86inc";
373
374add_proto qw/unsigned int vp9_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
375specialize qw/vp9_variance32x32/, "$sse2_x86inc", "$avx2_x86inc";
376
377add_proto qw/unsigned int vp9_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
378specialize qw/vp9_variance64x64/, "$sse2_x86inc", "$avx2_x86inc";
379
380add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
381specialize qw/vp9_variance16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";
382
383add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
384specialize qw/vp9_variance16x8 mmx/, "$sse2_x86inc";
385
386add_proto qw/unsigned int vp9_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
387specialize qw/vp9_variance8x16 mmx/, "$sse2_x86inc";
388
389add_proto qw/unsigned int vp9_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
390specialize qw/vp9_variance8x8 mmx/, "$sse2_x86inc";
391
392add_proto qw/void vp9_get_sse_sum_8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
393specialize qw/vp9_get_sse_sum_8x8 sse2/;
394$vp9_get_sse_sum_8x8_sse2=vp9_get8x8var_sse2;
395
396add_proto qw/unsigned int vp9_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
397specialize qw/vp9_variance8x4/, "$sse2_x86inc";
398
399add_proto qw/unsigned int vp9_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
400specialize qw/vp9_variance4x8/, "$sse2_x86inc";
401
402add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
403specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc";
404
405add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
406specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
407
408add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
409specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
410
411add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
412specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";
413
414add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
415specialize qw/vp9_sub_pixel_avg_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";
416
417add_proto qw/unsigned int vp9_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
418specialize qw/vp9_sub_pixel_variance64x32/, "$sse2_x86inc", "$ssse3_x86inc";
419
420add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
421specialize qw/vp9_sub_pixel_avg_variance64x32/, "$sse2_x86inc", "$ssse3_x86inc";
422
423add_proto qw/unsigned int vp9_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
424specialize qw/vp9_sub_pixel_variance32x16/, "$sse2_x86inc", "$ssse3_x86inc";
425
426add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
427specialize qw/vp9_sub_pixel_avg_variance32x16/, "$sse2_x86inc", "$ssse3_x86inc";
428
429add_proto qw/unsigned int vp9_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
430specialize qw/vp9_sub_pixel_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";
431
432add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
433specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";
434
435add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
436specialize qw/vp9_sub_pixel_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
437
438add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
439specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
440
441add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
442specialize qw/vp9_sub_pixel_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc";
443
444add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
445specialize qw/vp9_sub_pixel_avg_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc";
446
447add_proto qw/unsigned int vp9_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
448specialize qw/vp9_sub_pixel_variance8x16/, "$sse2_x86inc", "$ssse3_x86inc";
449
450add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
451specialize qw/vp9_sub_pixel_avg_variance8x16/, "$sse2_x86inc", "$ssse3_x86inc";
452
453add_proto qw/unsigned int vp9_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
454specialize qw/vp9_sub_pixel_variance16x8/, "$sse2_x86inc", "$ssse3_x86inc";
455
456add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
457specialize qw/vp9_sub_pixel_avg_variance16x8/, "$sse2_x86inc", "$ssse3_x86inc";
458
459add_proto qw/unsigned int vp9_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
460specialize qw/vp9_sub_pixel_variance8x8/, "$sse2_x86inc", "$ssse3_x86inc";
461
462add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
463specialize qw/vp9_sub_pixel_avg_variance8x8/, "$sse2_x86inc", "$ssse3_x86inc";
464
465# TODO(jingning): need to convert 8x4/4x8 functions into mmx/sse form
466add_proto qw/unsigned int vp9_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
467specialize qw/vp9_sub_pixel_variance8x4/, "$sse2_x86inc", "$ssse3_x86inc";
468
469add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
470specialize qw/vp9_sub_pixel_avg_variance8x4/, "$sse2_x86inc", "$ssse3_x86inc";
471
472add_proto qw/unsigned int vp9_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
473specialize qw/vp9_sub_pixel_variance4x8/, "$sse_x86inc", "$ssse3_x86inc";
474
475add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
476specialize qw/vp9_sub_pixel_avg_variance4x8/, "$sse_x86inc", "$ssse3_x86inc";
477
478add_proto qw/unsigned int vp9_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
479specialize qw/vp9_sub_pixel_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";
480#vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt
481
482add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
483specialize qw/vp9_sub_pixel_avg_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";
484
485add_proto qw/unsigned int vp9_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
486specialize qw/vp9_sad64x64/, "$sse2_x86inc";
487
488add_proto qw/unsigned int vp9_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
489specialize qw/vp9_sad32x64/, "$sse2_x86inc";
490
491add_proto qw/unsigned int vp9_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
492specialize qw/vp9_sad64x32/, "$sse2_x86inc";
493
494add_proto qw/unsigned int vp9_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
495specialize qw/vp9_sad32x16/, "$sse2_x86inc";
496
497add_proto qw/unsigned int vp9_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
498specialize qw/vp9_sad16x32/, "$sse2_x86inc";
499
500add_proto qw/unsigned int vp9_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
501specialize qw/vp9_sad32x32/, "$sse2_x86inc";
502
503add_proto qw/unsigned int vp9_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
504specialize qw/vp9_sad16x16 mmx/, "$sse2_x86inc";
505
506add_proto qw/unsigned int vp9_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
507specialize qw/vp9_sad16x8 mmx/, "$sse2_x86inc";
508
509add_proto qw/unsigned int vp9_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
510specialize qw/vp9_sad8x16 mmx/, "$sse2_x86inc";
511
512add_proto qw/unsigned int vp9_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
513specialize qw/vp9_sad8x8 mmx/, "$sse2_x86inc";
514
515add_proto qw/unsigned int vp9_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
516specialize qw/vp9_sad8x4/, "$sse2_x86inc";
517
518add_proto qw/unsigned int vp9_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
519specialize qw/vp9_sad4x8/, "$sse_x86inc";
520
521add_proto qw/unsigned int vp9_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
522specialize qw/vp9_sad4x4 mmx/, "$sse_x86inc";
523
524add_proto qw/unsigned int vp9_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
525specialize qw/vp9_sad64x64_avg/, "$sse2_x86inc";
526
527add_proto qw/unsigned int vp9_sad32x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
528specialize qw/vp9_sad32x64_avg/, "$sse2_x86inc";
529
530add_proto qw/unsigned int vp9_sad64x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
531specialize qw/vp9_sad64x32_avg/, "$sse2_x86inc";
532
533add_proto qw/unsigned int vp9_sad32x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
534specialize qw/vp9_sad32x16_avg/, "$sse2_x86inc";
535
536add_proto qw/unsigned int vp9_sad16x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
537specialize qw/vp9_sad16x32_avg/, "$sse2_x86inc";
538
539add_proto qw/unsigned int vp9_sad32x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
540specialize qw/vp9_sad32x32_avg/, "$sse2_x86inc";
541
542add_proto qw/unsigned int vp9_sad16x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
543specialize qw/vp9_sad16x16_avg/, "$sse2_x86inc";
544
545add_proto qw/unsigned int vp9_sad16x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
546specialize qw/vp9_sad16x8_avg/, "$sse2_x86inc";
547
548add_proto qw/unsigned int vp9_sad8x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
549specialize qw/vp9_sad8x16_avg/, "$sse2_x86inc";
550
551add_proto qw/unsigned int vp9_sad8x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
552specialize qw/vp9_sad8x8_avg/, "$sse2_x86inc";
553
554add_proto qw/unsigned int vp9_sad8x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
555specialize qw/vp9_sad8x4_avg/, "$sse2_x86inc";
556
557add_proto qw/unsigned int vp9_sad4x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
558specialize qw/vp9_sad4x8_avg/, "$sse_x86inc";
559
560add_proto qw/unsigned int vp9_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
561specialize qw/vp9_sad4x4_avg/, "$sse_x86inc";
562
563add_proto qw/unsigned int vp9_variance_halfpixvar16x16_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
564specialize qw/vp9_variance_halfpixvar16x16_h/, "$sse2_x86inc";
565
566add_proto qw/unsigned int vp9_variance_halfpixvar16x16_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
567specialize qw/vp9_variance_halfpixvar16x16_v/, "$sse2_x86inc";
568
569add_proto qw/unsigned int vp9_variance_halfpixvar16x16_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
570specialize qw/vp9_variance_halfpixvar16x16_hv/, "$sse2_x86inc";
571
572add_proto qw/unsigned int vp9_variance_halfpixvar64x64_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
573specialize qw/vp9_variance_halfpixvar64x64_h/;
574
575add_proto qw/unsigned int vp9_variance_halfpixvar64x64_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
576specialize qw/vp9_variance_halfpixvar64x64_v/;
577
578add_proto qw/unsigned int vp9_variance_halfpixvar64x64_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
579specialize qw/vp9_variance_halfpixvar64x64_hv/;
580
581add_proto qw/unsigned int vp9_variance_halfpixvar32x32_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
582specialize qw/vp9_variance_halfpixvar32x32_h/;
583
584add_proto qw/unsigned int vp9_variance_halfpixvar32x32_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
585specialize qw/vp9_variance_halfpixvar32x32_v/;
586
587add_proto qw/unsigned int vp9_variance_halfpixvar32x32_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
588specialize qw/vp9_variance_halfpixvar32x32_hv/;
589
590add_proto qw/void vp9_sad64x64x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
591specialize qw/vp9_sad64x64x3/;
592
593add_proto qw/void vp9_sad32x32x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
594specialize qw/vp9_sad32x32x3/;
595
596add_proto qw/void vp9_sad16x16x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
597specialize qw/vp9_sad16x16x3 sse3 ssse3/;
598
599add_proto qw/void vp9_sad16x8x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
600specialize qw/vp9_sad16x8x3 sse3 ssse3/;
601
602add_proto qw/void vp9_sad8x16x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
603specialize qw/vp9_sad8x16x3 sse3/;
604
605add_proto qw/void vp9_sad8x8x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
606specialize qw/vp9_sad8x8x3 sse3/;
607
608add_proto qw/void vp9_sad4x4x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
609specialize qw/vp9_sad4x4x3 sse3/;
610
611add_proto qw/void vp9_sad64x64x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
612specialize qw/vp9_sad64x64x8/;
613
614add_proto qw/void vp9_sad32x32x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
615specialize qw/vp9_sad32x32x8/;
616
617add_proto qw/void vp9_sad16x16x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
618specialize qw/vp9_sad16x16x8 sse4/;
619
620add_proto qw/void vp9_sad16x8x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
621specialize qw/vp9_sad16x8x8 sse4/;
622
623add_proto qw/void vp9_sad8x16x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
624specialize qw/vp9_sad8x16x8 sse4/;
625
626add_proto qw/void vp9_sad8x8x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
627specialize qw/vp9_sad8x8x8 sse4/;
628
629add_proto qw/void vp9_sad8x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
630specialize qw/vp9_sad8x4x8/;
631
632add_proto qw/void vp9_sad4x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
633specialize qw/vp9_sad4x8x8/;
634
635add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
636specialize qw/vp9_sad4x4x8 sse4/;
637
638add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
639specialize qw/vp9_sad64x64x4d sse2 avx2/;
640
641add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
642specialize qw/vp9_sad32x64x4d sse2/;
643
644add_proto qw/void vp9_sad64x32x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
645specialize qw/vp9_sad64x32x4d sse2/;
646
647add_proto qw/void vp9_sad32x16x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
648specialize qw/vp9_sad32x16x4d sse2/;
649
650add_proto qw/void vp9_sad16x32x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
651specialize qw/vp9_sad16x32x4d sse2/;
652
653add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
654specialize qw/vp9_sad32x32x4d sse2 avx2/;
655
656add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
657specialize qw/vp9_sad16x16x4d sse2/;
658
659add_proto qw/void vp9_sad16x8x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
660specialize qw/vp9_sad16x8x4d sse2/;
661
662add_proto qw/void vp9_sad8x16x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
663specialize qw/vp9_sad8x16x4d sse2/;
664
665add_proto qw/void vp9_sad8x8x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
666specialize qw/vp9_sad8x8x4d sse2/;
667
668# TODO(jingning): need to convert these 4x8/8x4 functions into sse2 form
669add_proto qw/void vp9_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
670specialize qw/vp9_sad8x4x4d sse2/;
671
672add_proto qw/void vp9_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
673specialize qw/vp9_sad4x8x4d sse/;
674
675add_proto qw/void vp9_sad4x4x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
676specialize qw/vp9_sad4x4x4d sse/;
677
678#add_proto qw/unsigned int vp9_sub_pixel_mse16x16/, "const uint8_t *src_ptr, int  src_pixels_per_line, int  xoffset, int  yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse";
679#specialize qw/vp9_sub_pixel_mse16x16 sse2 mmx/;
680
681add_proto qw/unsigned int vp9_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
682specialize qw/vp9_mse16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";
683
684add_proto qw/unsigned int vp9_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
685specialize qw/vp9_mse8x16/;
686
687add_proto qw/unsigned int vp9_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
688specialize qw/vp9_mse16x8/;
689
690add_proto qw/unsigned int vp9_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
691specialize qw/vp9_mse8x8/;
692
693add_proto qw/unsigned int vp9_sub_pixel_mse64x64/, "const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
694specialize qw/vp9_sub_pixel_mse64x64/;
695
696add_proto qw/unsigned int vp9_sub_pixel_mse32x32/, "const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
697specialize qw/vp9_sub_pixel_mse32x32/;
698
699add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *";
700specialize qw/vp9_get_mb_ss mmx sse2/;
701# ENCODEMB INVOKE
702
703add_proto qw/int64_t vp9_block_error/, "const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size, int64_t *ssz";
704specialize qw/vp9_block_error/, "$sse2_x86inc";
705
706add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
707specialize qw/vp9_subtract_block/, "$sse2_x86inc";
708
709add_proto qw/void vp9_quantize_b/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
710specialize qw/vp9_quantize_b/, "$ssse3_x86_64";
711
712add_proto qw/void vp9_quantize_b_32x32/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
713specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64";
714
715#
716# Structured Similarity (SSIM)
717#
718if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
719    add_proto qw/void vp9_ssim_parms_8x8/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
720    specialize qw/vp9_ssim_parms_8x8/, "$sse2_x86_64";
721
722    add_proto qw/void vp9_ssim_parms_16x16/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
723    specialize qw/vp9_ssim_parms_16x16/, "$sse2_x86_64";
724}
725
726# fdct functions
727add_proto qw/void vp9_fht4x4/, "const int16_t *input, int16_t *output, int stride, int tx_type";
728specialize qw/vp9_fht4x4 sse2 avx2/;
729
730add_proto qw/void vp9_fht8x8/, "const int16_t *input, int16_t *output, int stride, int tx_type";
731specialize qw/vp9_fht8x8 sse2 avx2/;
732
733add_proto qw/void vp9_fht16x16/, "const int16_t *input, int16_t *output, int stride, int tx_type";
734specialize qw/vp9_fht16x16 sse2 avx2/;
735
736add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride";
737specialize qw/vp9_fwht4x4/;
738
739add_proto qw/void vp9_fdct4x4/, "const int16_t *input, int16_t *output, int stride";
740specialize qw/vp9_fdct4x4 sse2 avx2/;
741
742add_proto qw/void vp9_fdct8x8/, "const int16_t *input, int16_t *output, int stride";
743specialize qw/vp9_fdct8x8 sse2 avx2/;
744
745add_proto qw/void vp9_fdct16x16/, "const int16_t *input, int16_t *output, int stride";
746specialize qw/vp9_fdct16x16 sse2 avx2/;
747
748add_proto qw/void vp9_fdct32x32/, "const int16_t *input, int16_t *output, int stride";
749specialize qw/vp9_fdct32x32 sse2 avx2/;
750
751add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, int16_t *output, int stride";
752specialize qw/vp9_fdct32x32_rd sse2 avx2/;
753
754#
755# Motion search
756#
757add_proto qw/int vp9_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv, struct mv *best_mv";
758specialize qw/vp9_full_search_sad sse3 sse4_1/;
759$vp9_full_search_sad_sse3=vp9_full_search_sadx3;
760$vp9_full_search_sad_sse4_1=vp9_full_search_sadx8;
761
762add_proto qw/int vp9_refining_search_sad/, "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv";
763specialize qw/vp9_refining_search_sad sse3/;
764$vp9_refining_search_sad_sse3=vp9_refining_search_sadx4;
765
766add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv";
767specialize qw/vp9_diamond_search_sad sse3/;
768$vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4;
769
770add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv";
771specialize qw/vp9_full_range_search/;
772
773add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
774specialize qw/vp9_temporal_filter_apply sse2/;
775
776}
777# end encoder functions
7781;
779