1 /*
2  *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 #include <stdlib.h>
11 
12 #include "./vp9_rtcd.h"
13 #include "vp9/common/vp9_common.h"
14 #include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
15 
16 #if HAVE_DSPR2
vp9_h_predictor_8x8_dspr2(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)17 void vp9_h_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride,
18                                const uint8_t *above, const uint8_t *left) {
19   int32_t  tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
20 
21   __asm__ __volatile__ (
22       "lb         %[tmp1],      (%[left])                   \n\t"
23       "lb         %[tmp2],      1(%[left])                  \n\t"
24       "lb         %[tmp3],      2(%[left])                  \n\t"
25       "lb         %[tmp4],      3(%[left])                  \n\t"
26       "lb         %[tmp5],      4(%[left])                  \n\t"
27       "lb         %[tmp6],      5(%[left])                  \n\t"
28       "lb         %[tmp7],      6(%[left])                  \n\t"
29       "lb         %[tmp8],      7(%[left])                  \n\t"
30 
31       "replv.qb   %[tmp1],      %[tmp1]                     \n\t"
32       "replv.qb   %[tmp2],      %[tmp2]                     \n\t"
33       "replv.qb   %[tmp3],      %[tmp3]                     \n\t"
34       "replv.qb   %[tmp4],      %[tmp4]                     \n\t"
35       "replv.qb   %[tmp5],      %[tmp5]                     \n\t"
36       "replv.qb   %[tmp6],      %[tmp6]                     \n\t"
37       "replv.qb   %[tmp7],      %[tmp7]                     \n\t"
38       "replv.qb   %[tmp8],      %[tmp8]                     \n\t"
39 
40       "sw         %[tmp1],      (%[dst])                    \n\t"
41       "sw         %[tmp1],      4(%[dst])                   \n\t"
42       "add        %[dst],       %[dst],         %[stride]   \n\t"
43       "sw         %[tmp2],      (%[dst])                    \n\t"
44       "sw         %[tmp2],      4(%[dst])                   \n\t"
45       "add        %[dst],       %[dst],         %[stride]   \n\t"
46       "sw         %[tmp3],      (%[dst])                    \n\t"
47       "sw         %[tmp3],      4(%[dst])                   \n\t"
48       "add        %[dst],       %[dst],         %[stride]   \n\t"
49       "sw         %[tmp4],      (%[dst])                    \n\t"
50       "sw         %[tmp4],      4(%[dst])                   \n\t"
51       "add        %[dst],       %[dst],         %[stride]   \n\t"
52       "sw         %[tmp5],      (%[dst])                    \n\t"
53       "sw         %[tmp5],      4(%[dst])                   \n\t"
54       "add        %[dst],       %[dst],         %[stride]   \n\t"
55       "sw         %[tmp6],      (%[dst])                    \n\t"
56       "sw         %[tmp6],      4(%[dst])                   \n\t"
57       "add        %[dst],       %[dst],         %[stride]   \n\t"
58       "sw         %[tmp7],      (%[dst])                    \n\t"
59       "sw         %[tmp7],      4(%[dst])                   \n\t"
60       "add        %[dst],       %[dst],         %[stride]   \n\t"
61       "sw         %[tmp8],      (%[dst])                    \n\t"
62       "sw         %[tmp8],      4(%[dst])                   \n\t"
63 
64       : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2),
65         [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4),
66         [tmp5] "=&r" (tmp5), [tmp7] "=&r" (tmp7),
67         [tmp6] "=&r" (tmp6), [tmp8] "=&r" (tmp8)
68       : [left] "r" (left), [dst] "r" (dst),
69         [stride] "r" (stride)
70   );
71 }
72 
vp9_dc_predictor_8x8_dspr2(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)73 void vp9_dc_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride,
74                                 const uint8_t *above, const uint8_t *left) {
75   int32_t  expected_dc;
76   int32_t  average;
77   int32_t  tmp, above1, above_l1, above_r1, left1, left_r1, left_l1;
78   int32_t  above2, above_l2, above_r2, left2, left_r2, left_l2;
79 
80   __asm__ __volatile__ (
81       "lw              %[above1],         (%[above])                      \n\t"
82       "lw              %[above2],         4(%[above])                     \n\t"
83       "lw              %[left1],          (%[left])                       \n\t"
84       "lw              %[left2],          4(%[left])                      \n\t"
85 
86       "preceu.ph.qbl   %[above_l1],       %[above1]                       \n\t"
87       "preceu.ph.qbr   %[above_r1],       %[above1]                       \n\t"
88       "preceu.ph.qbl   %[left_l1],        %[left1]                        \n\t"
89       "preceu.ph.qbr   %[left_r1],        %[left1]                        \n\t"
90 
91       "preceu.ph.qbl   %[above_l2],       %[above2]                       \n\t"
92       "preceu.ph.qbr   %[above_r2],       %[above2]                       \n\t"
93       "preceu.ph.qbl   %[left_l2],        %[left2]                        \n\t"
94       "preceu.ph.qbr   %[left_r2],        %[left2]                        \n\t"
95 
96       "addu.ph         %[average],        %[above_r1],      %[above_l1]   \n\t"
97       "addu.ph         %[average],        %[average],       %[left_l1]    \n\t"
98       "addu.ph         %[average],        %[average],       %[left_r1]    \n\t"
99 
100       "addu.ph         %[average],        %[average],       %[above_l2]   \n\t"
101       "addu.ph         %[average],        %[average],       %[above_r2]   \n\t"
102       "addu.ph         %[average],        %[average],       %[left_l2]    \n\t"
103       "addu.ph         %[average],        %[average],       %[left_r2]    \n\t"
104 
105       "addiu           %[average],        %[average],       8             \n\t"
106 
107       "srl             %[tmp],            %[average],       16            \n\t"
108       "addu.ph         %[average],        %[tmp],           %[average]    \n\t"
109       "srl             %[expected_dc],    %[average],       4             \n\t"
110       "replv.qb        %[expected_dc],    %[expected_dc]                  \n\t"
111 
112       "sw              %[expected_dc],    (%[dst])                        \n\t"
113       "sw              %[expected_dc],    4(%[dst])                       \n\t"
114 
115       "add             %[dst],             %[dst],          %[stride]     \n\t"
116       "sw              %[expected_dc],    (%[dst])                        \n\t"
117       "sw              %[expected_dc],    4(%[dst])                       \n\t"
118 
119       "add             %[dst],             %[dst],          %[stride]     \n\t"
120       "sw              %[expected_dc],    (%[dst])                        \n\t"
121       "sw              %[expected_dc],    4(%[dst])                       \n\t"
122 
123       "add             %[dst],             %[dst],          %[stride]     \n\t"
124       "sw              %[expected_dc],    (%[dst])                        \n\t"
125       "sw              %[expected_dc],    4(%[dst])                       \n\t"
126 
127       "add             %[dst],             %[dst],          %[stride]     \n\t"
128       "sw              %[expected_dc],    (%[dst])                        \n\t"
129       "sw              %[expected_dc],    4(%[dst])                       \n\t"
130 
131       "add             %[dst],             %[dst],          %[stride]     \n\t"
132       "sw              %[expected_dc],    (%[dst])                        \n\t"
133       "sw              %[expected_dc],    4(%[dst])                       \n\t"
134 
135       "add             %[dst],             %[dst],          %[stride]     \n\t"
136       "sw              %[expected_dc],    (%[dst])                        \n\t"
137       "sw              %[expected_dc],    4(%[dst])                       \n\t"
138 
139       "add             %[dst],             %[dst],          %[stride]     \n\t"
140       "sw              %[expected_dc],    (%[dst])                        \n\t"
141       "sw              %[expected_dc],    4(%[dst])                       \n\t"
142 
143       : [above1] "=&r" (above1), [above_l1] "=&r" (above_l1),
144         [above_r1] "=&r" (above_r1), [left1] "=&r" (left1),
145         [left_l1] "=&r" (left_l1), [left_r1] "=&r" (left_r1),
146         [above2] "=&r" (above2), [above_l2] "=&r" (above_l2),
147         [above_r2] "=&r" (above_r2), [left2] "=&r" (left2),
148         [left_l2] "=&r" (left_l2), [left_r2] "=&r" (left_r2),
149         [average] "=&r" (average), [tmp] "=&r" (tmp),
150         [expected_dc] "=&r" (expected_dc)
151       : [above] "r" (above), [left] "r" (left), [dst] "r" (dst),
152         [stride] "r" (stride)
153   );
154 }
155 
vp9_tm_predictor_8x8_dspr2(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)156 void vp9_tm_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride,
157                                 const uint8_t *above, const uint8_t *left) {
158   int32_t   abovel, abover;
159   int32_t   abovel_1, abover_1;
160   int32_t   left0;
161   int32_t   res0, res1, res2, res3;
162   int32_t   reshw;
163   int32_t   top_left;
164   uint8_t   *cm = vp9_ff_cropTbl;
165 
166   __asm__ __volatile__ (
167       "ulw             %[reshw],       (%[above])                         \n\t"
168       "ulw             %[top_left],    4(%[above])                        \n\t"
169 
170       "lbu             %[left0],       (%[left])                          \n\t"
171 
172       "preceu.ph.qbl   %[abovel],      %[reshw]                           \n\t"
173       "preceu.ph.qbr   %[abover],      %[reshw]                           \n\t"
174       "preceu.ph.qbl   %[abovel_1],    %[top_left]                        \n\t"
175       "preceu.ph.qbr   %[abover_1],    %[top_left]                        \n\t"
176 
177       "lbu             %[top_left],    -1(%[above])                       \n\t"
178       "replv.ph        %[left0],       %[left0]                           \n\t"
179 
180       "replv.ph        %[top_left],    %[top_left]                        \n\t"
181 
182       "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
183       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
184 
185       "sll             %[res2],        %[reshw],            16            \n\t"
186       "sra             %[res2],        %[res2],             16            \n\t"
187       "sra             %[res3],        %[reshw],            16            \n\t"
188 
189       "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
190       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
191 
192       "sll             %[res0],        %[reshw],            16            \n\t"
193       "sra             %[res0],        %[res0],             16            \n\t"
194       "sra             %[res1],        %[reshw],            16            \n\t"
195 
196       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
197       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
198       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
199       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
200 
201       "sb              %[res0],        (%[dst])                           \n\t"
202       "sb              %[res1],        1(%[dst])                          \n\t"
203       "sb              %[res2],        2(%[dst])                          \n\t"
204       "sb              %[res3],        3(%[dst])                          \n\t"
205 
206       "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
207       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
208 
209       "sll             %[res2],        %[reshw],            16            \n\t"
210       "sra             %[res2],        %[res2],             16            \n\t"
211       "sra             %[res3],        %[reshw],            16            \n\t"
212 
213       "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
214       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
215 
216       "sll             %[res0],        %[reshw],            16            \n\t"
217       "sra             %[res0],        %[res0],             16            \n\t"
218       "sra             %[res1],        %[reshw],            16            \n\t"
219 
220       "lbu             %[left0],       1(%[left])                         \n\t"
221 
222       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
223       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
224       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
225       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
226 
227       "sb              %[res0],        4(%[dst])                          \n\t"
228       "sb              %[res1],        5(%[dst])                          \n\t"
229       "sb              %[res2],        6(%[dst])                          \n\t"
230       "sb              %[res3],        7(%[dst])                          \n\t"
231 
232       "replv.ph        %[left0],       %[left0]                           \n\t"
233       "add             %[dst],          %[dst],             %[stride]     \n\t"
234 
235       "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
236       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
237 
238       "sll             %[res2],        %[reshw],            16            \n\t"
239       "sra             %[res2],        %[res2],             16            \n\t"
240       "sra             %[res3],        %[reshw],            16            \n\t"
241 
242       "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
243       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
244 
245       "sll             %[res0],        %[reshw],            16            \n\t"
246       "sra             %[res0],        %[res0],             16            \n\t"
247       "sra             %[res1],        %[reshw],            16            \n\t"
248 
249       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
250       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
251       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
252       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
253 
254       "sb              %[res0],        (%[dst])                           \n\t"
255       "sb              %[res1],        1(%[dst])                          \n\t"
256       "sb              %[res2],        2(%[dst])                          \n\t"
257       "sb              %[res3],        3(%[dst])                          \n\t"
258 
259       "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
260       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
261 
262       "sll             %[res2],        %[reshw],            16            \n\t"
263       "sra             %[res2],        %[res2],             16            \n\t"
264       "sra             %[res3],        %[reshw],            16            \n\t"
265 
266       "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
267       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
268 
269       "sll             %[res0],        %[reshw],            16            \n\t"
270       "sra             %[res0],        %[res0],             16            \n\t"
271       "sra             %[res1],        %[reshw],            16            \n\t"
272 
273       "lbu             %[left0],       2(%[left])                         \n\t"
274 
275       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
276       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
277       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
278       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
279 
280       "sb              %[res0],        4(%[dst])                          \n\t"
281       "sb              %[res1],        5(%[dst])                          \n\t"
282       "sb              %[res2],        6(%[dst])                          \n\t"
283       "sb              %[res3],        7(%[dst])                          \n\t"
284 
285       "replv.ph        %[left0],       %[left0]                           \n\t"
286       "add             %[dst],          %[dst],             %[stride]     \n\t"
287 
288       "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
289       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
290 
291       "sll             %[res2],        %[reshw],            16            \n\t"
292       "sra             %[res2],        %[res2],             16            \n\t"
293       "sra             %[res3],        %[reshw],            16            \n\t"
294 
295       "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
296       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
297 
298       "sll             %[res0],        %[reshw],            16            \n\t"
299       "sra             %[res0],        %[res0],             16            \n\t"
300       "sra             %[res1],        %[reshw],            16            \n\t"
301 
302       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
303       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
304       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
305       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
306 
307       "sb              %[res0],        (%[dst])                           \n\t"
308       "sb              %[res1],        1(%[dst])                          \n\t"
309       "sb              %[res2],        2(%[dst])                          \n\t"
310       "sb              %[res3],        3(%[dst])                          \n\t"
311 
312       "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
313       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
314 
315       "sll             %[res2],        %[reshw],            16            \n\t"
316       "sra             %[res2],        %[res2],             16            \n\t"
317       "sra             %[res3],        %[reshw],            16            \n\t"
318 
319       "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
320       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
321 
322       "sll             %[res0],        %[reshw],            16            \n\t"
323       "sra             %[res0],        %[res0],             16            \n\t"
324       "sra             %[res1],        %[reshw],            16            \n\t"
325 
326       "lbu             %[left0],       3(%[left])                         \n\t"
327 
328       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
329       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
330       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
331       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
332 
333       "sb              %[res0],        4(%[dst])                          \n\t"
334       "sb              %[res1],        5(%[dst])                          \n\t"
335       "sb              %[res2],        6(%[dst])                          \n\t"
336       "sb              %[res3],        7(%[dst])                          \n\t"
337 
338       "replv.ph        %[left0],       %[left0]                           \n\t"
339       "add             %[dst],          %[dst],             %[stride]     \n\t"
340 
341       "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
342       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
343 
344       "sll             %[res2],        %[reshw],            16            \n\t"
345       "sra             %[res2],        %[res2],             16            \n\t"
346       "sra             %[res3],        %[reshw],            16            \n\t"
347 
348       "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
349       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
350 
351       "sll             %[res0],        %[reshw],            16            \n\t"
352       "sra             %[res0],        %[res0],             16            \n\t"
353       "sra             %[res1],        %[reshw],            16            \n\t"
354 
355       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
356       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
357       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
358       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
359 
360       "sb              %[res0],        (%[dst])                           \n\t"
361       "sb              %[res1],        1(%[dst])                          \n\t"
362       "sb              %[res2],        2(%[dst])                          \n\t"
363       "sb              %[res3],        3(%[dst])                          \n\t"
364 
365       "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
366       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
367 
368       "sll             %[res2],        %[reshw],            16            \n\t"
369       "sra             %[res2],        %[res2],             16            \n\t"
370       "sra             %[res3],        %[reshw],            16            \n\t"
371 
372       "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
373       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
374 
375       "sll             %[res0],        %[reshw],            16            \n\t"
376       "sra             %[res0],        %[res0],             16            \n\t"
377       "sra             %[res1],        %[reshw],            16            \n\t"
378 
379       "lbu             %[left0],       4(%[left])                         \n\t"
380 
381       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
382       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
383       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
384       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
385 
386       "sb              %[res0],        4(%[dst])                          \n\t"
387       "sb              %[res1],        5(%[dst])                          \n\t"
388       "sb              %[res2],        6(%[dst])                          \n\t"
389       "sb              %[res3],        7(%[dst])                          \n\t"
390 
391       "replv.ph        %[left0],       %[left0]                           \n\t"
392       "add             %[dst],          %[dst],             %[stride]     \n\t"
393 
394       "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
395       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
396 
397       "sll             %[res2],        %[reshw],            16            \n\t"
398       "sra             %[res2],        %[res2],             16            \n\t"
399       "sra             %[res3],        %[reshw],            16            \n\t"
400 
401       "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
402       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
403 
404       "sll             %[res0],        %[reshw],            16            \n\t"
405       "sra             %[res0],        %[res0],             16            \n\t"
406       "sra             %[res1],        %[reshw],            16            \n\t"
407 
408       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
409       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
410       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
411       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
412 
413       "sb              %[res0],        (%[dst])                           \n\t"
414       "sb              %[res1],        1(%[dst])                          \n\t"
415       "sb              %[res2],        2(%[dst])                          \n\t"
416       "sb              %[res3],        3(%[dst])                          \n\t"
417 
418       "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
419       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
420 
421       "sll             %[res2],        %[reshw],            16            \n\t"
422       "sra             %[res2],        %[res2],             16            \n\t"
423       "sra             %[res3],        %[reshw],            16            \n\t"
424 
425       "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
426       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
427 
428       "sll             %[res0],        %[reshw],            16            \n\t"
429       "sra             %[res0],        %[res0],             16            \n\t"
430       "sra             %[res1],        %[reshw],            16            \n\t"
431 
432       "lbu             %[left0],       5(%[left])                         \n\t"
433 
434       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
435       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
436       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
437       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
438 
439       "sb              %[res0],        4(%[dst])                          \n\t"
440       "sb              %[res1],        5(%[dst])                          \n\t"
441       "sb              %[res2],        6(%[dst])                          \n\t"
442       "sb              %[res3],        7(%[dst])                          \n\t"
443 
444       "replv.ph        %[left0],       %[left0]                           \n\t"
445       "add             %[dst],          %[dst],             %[stride]     \n\t"
446 
447       "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
448       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
449 
450       "sll             %[res2],        %[reshw],            16            \n\t"
451       "sra             %[res2],        %[res2],             16            \n\t"
452       "sra             %[res3],        %[reshw],            16            \n\t"
453 
454       "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
455       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
456 
457       "sll             %[res0],        %[reshw],            16            \n\t"
458       "sra             %[res0],        %[res0],             16            \n\t"
459       "sra             %[res1],        %[reshw],            16            \n\t"
460 
461       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
462       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
463       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
464       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
465 
466       "sb              %[res0],        (%[dst])                           \n\t"
467       "sb              %[res1],        1(%[dst])                          \n\t"
468       "sb              %[res2],        2(%[dst])                          \n\t"
469       "sb              %[res3],        3(%[dst])                          \n\t"
470 
471       "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
472       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
473 
474       "sll             %[res2],        %[reshw],            16            \n\t"
475       "sra             %[res2],        %[res2],             16            \n\t"
476       "sra             %[res3],        %[reshw],            16            \n\t"
477 
478       "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
479       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
480 
481       "sll             %[res0],        %[reshw],            16            \n\t"
482       "sra             %[res0],        %[res0],             16            \n\t"
483       "sra             %[res1],        %[reshw],            16            \n\t"
484 
485       "lbu             %[left0],       6(%[left])                         \n\t"
486 
487       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
488       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
489       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
490       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
491 
492       "sb              %[res0],        4(%[dst])                          \n\t"
493       "sb              %[res1],        5(%[dst])                          \n\t"
494       "sb              %[res2],        6(%[dst])                          \n\t"
495       "sb              %[res3],        7(%[dst])                          \n\t"
496 
497       "replv.ph        %[left0],       %[left0]                           \n\t"
498       "add             %[dst],          %[dst],             %[stride]     \n\t"
499 
500       "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
501       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
502 
503       "sll             %[res2],        %[reshw],            16            \n\t"
504       "sra             %[res2],        %[res2],             16            \n\t"
505       "sra             %[res3],        %[reshw],            16            \n\t"
506 
507       "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
508       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
509 
510       "sll             %[res0],        %[reshw],            16            \n\t"
511       "sra             %[res0],        %[res0],             16            \n\t"
512       "sra             %[res1],        %[reshw],            16            \n\t"
513 
514       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
515       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
516       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
517       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
518 
519       "sb              %[res0],        (%[dst])                           \n\t"
520       "sb              %[res1],        1(%[dst])                          \n\t"
521       "sb              %[res2],        2(%[dst])                          \n\t"
522       "sb              %[res3],        3(%[dst])                          \n\t"
523 
524       "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
525       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
526 
527       "sll             %[res2],        %[reshw],            16            \n\t"
528       "sra             %[res2],        %[res2],             16            \n\t"
529       "sra             %[res3],        %[reshw],            16            \n\t"
530 
531       "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
532       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
533 
534       "sll             %[res0],        %[reshw],            16            \n\t"
535       "sra             %[res0],        %[res0],             16            \n\t"
536       "sra             %[res1],        %[reshw],            16            \n\t"
537 
538       "lbu             %[left0],       7(%[left])                         \n\t"
539 
540       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
541       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
542       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
543       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
544 
545       "sb              %[res0],        4(%[dst])                          \n\t"
546       "sb              %[res1],        5(%[dst])                          \n\t"
547       "sb              %[res2],        6(%[dst])                          \n\t"
548       "sb              %[res3],        7(%[dst])                          \n\t"
549 
550       "replv.ph        %[left0],       %[left0]                           \n\t"
551       "add             %[dst],          %[dst],             %[stride]     \n\t"
552 
553       "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
554       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
555 
556       "sll             %[res2],        %[reshw],            16            \n\t"
557       "sra             %[res2],        %[res2],             16            \n\t"
558       "sra             %[res3],        %[reshw],            16            \n\t"
559 
560       "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
561       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
562 
563       "sll             %[res0],        %[reshw],            16            \n\t"
564       "sra             %[res0],        %[res0],             16            \n\t"
565       "sra             %[res1],        %[reshw],            16            \n\t"
566 
567       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
568       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
569       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
570       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
571 
572       "sb              %[res0],        (%[dst])                           \n\t"
573       "sb              %[res1],        1(%[dst])                          \n\t"
574       "sb              %[res2],        2(%[dst])                          \n\t"
575       "sb              %[res3],        3(%[dst])                          \n\t"
576 
577       "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
578       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
579 
580       "sll             %[res2],        %[reshw],            16            \n\t"
581       "sra             %[res2],        %[res2],             16            \n\t"
582       "sra             %[res3],        %[reshw],            16            \n\t"
583 
584       "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
585       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
586 
587       "sll             %[res0],        %[reshw],            16            \n\t"
588       "sra             %[res0],        %[res0],             16            \n\t"
589       "sra             %[res1],        %[reshw],            16            \n\t"
590 
591       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
592       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
593       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
594       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
595 
596       "sb              %[res0],        4(%[dst])                          \n\t"
597       "sb              %[res1],        5(%[dst])                          \n\t"
598       "sb              %[res2],        6(%[dst])                          \n\t"
599       "sb              %[res3],        7(%[dst])                          \n\t"
600 
601       : [abovel] "=&r" (abovel), [abover] "=&r" (abover),
602         [abovel_1] "=&r" (abovel_1), [abover_1] "=&r" (abover_1),
603         [left0] "=&r" (left0), [res2] "=&r" (res2), [res3] "=&r" (res3),
604         [res0] "=&r" (res0), [res1] "=&r" (res1),
605         [reshw] "=&r" (reshw), [top_left] "=&r" (top_left)
606       : [above] "r" (above), [left] "r" (left),
607         [dst] "r" (dst), [stride] "r" (stride), [cm] "r" (cm)
608   );
609 }
610 #endif  // #if HAVE_DSPR2
611