1 /*
2  *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "vpx_dsp/mips/common_dspr2.h"
12 
13 #if HAVE_DSPR2
vpx_h_predictor_8x8_dspr2(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)14 void vpx_h_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride,
15                                const uint8_t *above, const uint8_t *left) {
16   int32_t  tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
17 
18   __asm__ __volatile__ (
19       "lb         %[tmp1],      (%[left])                   \n\t"
20       "lb         %[tmp2],      1(%[left])                  \n\t"
21       "lb         %[tmp3],      2(%[left])                  \n\t"
22       "lb         %[tmp4],      3(%[left])                  \n\t"
23       "lb         %[tmp5],      4(%[left])                  \n\t"
24       "lb         %[tmp6],      5(%[left])                  \n\t"
25       "lb         %[tmp7],      6(%[left])                  \n\t"
26       "lb         %[tmp8],      7(%[left])                  \n\t"
27 
28       "replv.qb   %[tmp1],      %[tmp1]                     \n\t"
29       "replv.qb   %[tmp2],      %[tmp2]                     \n\t"
30       "replv.qb   %[tmp3],      %[tmp3]                     \n\t"
31       "replv.qb   %[tmp4],      %[tmp4]                     \n\t"
32       "replv.qb   %[tmp5],      %[tmp5]                     \n\t"
33       "replv.qb   %[tmp6],      %[tmp6]                     \n\t"
34       "replv.qb   %[tmp7],      %[tmp7]                     \n\t"
35       "replv.qb   %[tmp8],      %[tmp8]                     \n\t"
36 
37       "sw         %[tmp1],      (%[dst])                    \n\t"
38       "sw         %[tmp1],      4(%[dst])                   \n\t"
39       "add        %[dst],       %[dst],         %[stride]   \n\t"
40       "sw         %[tmp2],      (%[dst])                    \n\t"
41       "sw         %[tmp2],      4(%[dst])                   \n\t"
42       "add        %[dst],       %[dst],         %[stride]   \n\t"
43       "sw         %[tmp3],      (%[dst])                    \n\t"
44       "sw         %[tmp3],      4(%[dst])                   \n\t"
45       "add        %[dst],       %[dst],         %[stride]   \n\t"
46       "sw         %[tmp4],      (%[dst])                    \n\t"
47       "sw         %[tmp4],      4(%[dst])                   \n\t"
48       "add        %[dst],       %[dst],         %[stride]   \n\t"
49       "sw         %[tmp5],      (%[dst])                    \n\t"
50       "sw         %[tmp5],      4(%[dst])                   \n\t"
51       "add        %[dst],       %[dst],         %[stride]   \n\t"
52       "sw         %[tmp6],      (%[dst])                    \n\t"
53       "sw         %[tmp6],      4(%[dst])                   \n\t"
54       "add        %[dst],       %[dst],         %[stride]   \n\t"
55       "sw         %[tmp7],      (%[dst])                    \n\t"
56       "sw         %[tmp7],      4(%[dst])                   \n\t"
57       "add        %[dst],       %[dst],         %[stride]   \n\t"
58       "sw         %[tmp8],      (%[dst])                    \n\t"
59       "sw         %[tmp8],      4(%[dst])                   \n\t"
60 
61       : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2),
62         [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4),
63         [tmp5] "=&r" (tmp5), [tmp7] "=&r" (tmp7),
64         [tmp6] "=&r" (tmp6), [tmp8] "=&r" (tmp8)
65       : [left] "r" (left), [dst] "r" (dst),
66         [stride] "r" (stride)
67   );
68 }
69 
vpx_dc_predictor_8x8_dspr2(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)70 void vpx_dc_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride,
71                                 const uint8_t *above, const uint8_t *left) {
72   int32_t  expected_dc;
73   int32_t  average;
74   int32_t  tmp, above1, above_l1, above_r1, left1, left_r1, left_l1;
75   int32_t  above2, above_l2, above_r2, left2, left_r2, left_l2;
76 
77   __asm__ __volatile__ (
78       "lw              %[above1],         (%[above])                      \n\t"
79       "lw              %[above2],         4(%[above])                     \n\t"
80       "lw              %[left1],          (%[left])                       \n\t"
81       "lw              %[left2],          4(%[left])                      \n\t"
82 
83       "preceu.ph.qbl   %[above_l1],       %[above1]                       \n\t"
84       "preceu.ph.qbr   %[above_r1],       %[above1]                       \n\t"
85       "preceu.ph.qbl   %[left_l1],        %[left1]                        \n\t"
86       "preceu.ph.qbr   %[left_r1],        %[left1]                        \n\t"
87 
88       "preceu.ph.qbl   %[above_l2],       %[above2]                       \n\t"
89       "preceu.ph.qbr   %[above_r2],       %[above2]                       \n\t"
90       "preceu.ph.qbl   %[left_l2],        %[left2]                        \n\t"
91       "preceu.ph.qbr   %[left_r2],        %[left2]                        \n\t"
92 
93       "addu.ph         %[average],        %[above_r1],      %[above_l1]   \n\t"
94       "addu.ph         %[average],        %[average],       %[left_l1]    \n\t"
95       "addu.ph         %[average],        %[average],       %[left_r1]    \n\t"
96 
97       "addu.ph         %[average],        %[average],       %[above_l2]   \n\t"
98       "addu.ph         %[average],        %[average],       %[above_r2]   \n\t"
99       "addu.ph         %[average],        %[average],       %[left_l2]    \n\t"
100       "addu.ph         %[average],        %[average],       %[left_r2]    \n\t"
101 
102       "addiu           %[average],        %[average],       8             \n\t"
103 
104       "srl             %[tmp],            %[average],       16            \n\t"
105       "addu.ph         %[average],        %[tmp],           %[average]    \n\t"
106       "srl             %[expected_dc],    %[average],       4             \n\t"
107       "replv.qb        %[expected_dc],    %[expected_dc]                  \n\t"
108 
109       "sw              %[expected_dc],    (%[dst])                        \n\t"
110       "sw              %[expected_dc],    4(%[dst])                       \n\t"
111 
112       "add             %[dst],             %[dst],          %[stride]     \n\t"
113       "sw              %[expected_dc],    (%[dst])                        \n\t"
114       "sw              %[expected_dc],    4(%[dst])                       \n\t"
115 
116       "add             %[dst],             %[dst],          %[stride]     \n\t"
117       "sw              %[expected_dc],    (%[dst])                        \n\t"
118       "sw              %[expected_dc],    4(%[dst])                       \n\t"
119 
120       "add             %[dst],             %[dst],          %[stride]     \n\t"
121       "sw              %[expected_dc],    (%[dst])                        \n\t"
122       "sw              %[expected_dc],    4(%[dst])                       \n\t"
123 
124       "add             %[dst],             %[dst],          %[stride]     \n\t"
125       "sw              %[expected_dc],    (%[dst])                        \n\t"
126       "sw              %[expected_dc],    4(%[dst])                       \n\t"
127 
128       "add             %[dst],             %[dst],          %[stride]     \n\t"
129       "sw              %[expected_dc],    (%[dst])                        \n\t"
130       "sw              %[expected_dc],    4(%[dst])                       \n\t"
131 
132       "add             %[dst],             %[dst],          %[stride]     \n\t"
133       "sw              %[expected_dc],    (%[dst])                        \n\t"
134       "sw              %[expected_dc],    4(%[dst])                       \n\t"
135 
136       "add             %[dst],             %[dst],          %[stride]     \n\t"
137       "sw              %[expected_dc],    (%[dst])                        \n\t"
138       "sw              %[expected_dc],    4(%[dst])                       \n\t"
139 
140       : [above1] "=&r" (above1), [above_l1] "=&r" (above_l1),
141         [above_r1] "=&r" (above_r1), [left1] "=&r" (left1),
142         [left_l1] "=&r" (left_l1), [left_r1] "=&r" (left_r1),
143         [above2] "=&r" (above2), [above_l2] "=&r" (above_l2),
144         [above_r2] "=&r" (above_r2), [left2] "=&r" (left2),
145         [left_l2] "=&r" (left_l2), [left_r2] "=&r" (left_r2),
146         [average] "=&r" (average), [tmp] "=&r" (tmp),
147         [expected_dc] "=&r" (expected_dc)
148       : [above] "r" (above), [left] "r" (left), [dst] "r" (dst),
149         [stride] "r" (stride)
150   );
151 }
152 
vpx_tm_predictor_8x8_dspr2(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)153 void vpx_tm_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride,
154                                 const uint8_t *above, const uint8_t *left) {
155   int32_t   abovel, abover;
156   int32_t   abovel_1, abover_1;
157   int32_t   left0;
158   int32_t   res0, res1, res2, res3;
159   int32_t   reshw;
160   int32_t   top_left;
161   uint8_t   *cm = vpx_ff_cropTbl;
162 
163   __asm__ __volatile__ (
164       "ulw             %[reshw],       (%[above])                         \n\t"
165       "ulw             %[top_left],    4(%[above])                        \n\t"
166 
167       "lbu             %[left0],       (%[left])                          \n\t"
168 
169       "preceu.ph.qbl   %[abovel],      %[reshw]                           \n\t"
170       "preceu.ph.qbr   %[abover],      %[reshw]                           \n\t"
171       "preceu.ph.qbl   %[abovel_1],    %[top_left]                        \n\t"
172       "preceu.ph.qbr   %[abover_1],    %[top_left]                        \n\t"
173 
174       "lbu             %[top_left],    -1(%[above])                       \n\t"
175       "replv.ph        %[left0],       %[left0]                           \n\t"
176 
177       "replv.ph        %[top_left],    %[top_left]                        \n\t"
178 
179       "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
180       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
181 
182       "sll             %[res2],        %[reshw],            16            \n\t"
183       "sra             %[res2],        %[res2],             16            \n\t"
184       "sra             %[res3],        %[reshw],            16            \n\t"
185 
186       "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
187       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
188 
189       "sll             %[res0],        %[reshw],            16            \n\t"
190       "sra             %[res0],        %[res0],             16            \n\t"
191       "sra             %[res1],        %[reshw],            16            \n\t"
192 
193       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
194       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
195       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
196       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
197 
198       "sb              %[res0],        (%[dst])                           \n\t"
199       "sb              %[res1],        1(%[dst])                          \n\t"
200       "sb              %[res2],        2(%[dst])                          \n\t"
201       "sb              %[res3],        3(%[dst])                          \n\t"
202 
203       "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
204       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
205 
206       "sll             %[res2],        %[reshw],            16            \n\t"
207       "sra             %[res2],        %[res2],             16            \n\t"
208       "sra             %[res3],        %[reshw],            16            \n\t"
209 
210       "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
211       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
212 
213       "sll             %[res0],        %[reshw],            16            \n\t"
214       "sra             %[res0],        %[res0],             16            \n\t"
215       "sra             %[res1],        %[reshw],            16            \n\t"
216 
217       "lbu             %[left0],       1(%[left])                         \n\t"
218 
219       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
220       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
221       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
222       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
223 
224       "sb              %[res0],        4(%[dst])                          \n\t"
225       "sb              %[res1],        5(%[dst])                          \n\t"
226       "sb              %[res2],        6(%[dst])                          \n\t"
227       "sb              %[res3],        7(%[dst])                          \n\t"
228 
229       "replv.ph        %[left0],       %[left0]                           \n\t"
230       "add             %[dst],          %[dst],             %[stride]     \n\t"
231 
232       "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
233       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
234 
235       "sll             %[res2],        %[reshw],            16            \n\t"
236       "sra             %[res2],        %[res2],             16            \n\t"
237       "sra             %[res3],        %[reshw],            16            \n\t"
238 
239       "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
240       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
241 
242       "sll             %[res0],        %[reshw],            16            \n\t"
243       "sra             %[res0],        %[res0],             16            \n\t"
244       "sra             %[res1],        %[reshw],            16            \n\t"
245 
246       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
247       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
248       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
249       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
250 
251       "sb              %[res0],        (%[dst])                           \n\t"
252       "sb              %[res1],        1(%[dst])                          \n\t"
253       "sb              %[res2],        2(%[dst])                          \n\t"
254       "sb              %[res3],        3(%[dst])                          \n\t"
255 
256       "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
257       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
258 
259       "sll             %[res2],        %[reshw],            16            \n\t"
260       "sra             %[res2],        %[res2],             16            \n\t"
261       "sra             %[res3],        %[reshw],            16            \n\t"
262 
263       "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
264       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
265 
266       "sll             %[res0],        %[reshw],            16            \n\t"
267       "sra             %[res0],        %[res0],             16            \n\t"
268       "sra             %[res1],        %[reshw],            16            \n\t"
269 
270       "lbu             %[left0],       2(%[left])                         \n\t"
271 
272       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
273       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
274       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
275       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
276 
277       "sb              %[res0],        4(%[dst])                          \n\t"
278       "sb              %[res1],        5(%[dst])                          \n\t"
279       "sb              %[res2],        6(%[dst])                          \n\t"
280       "sb              %[res3],        7(%[dst])                          \n\t"
281 
282       "replv.ph        %[left0],       %[left0]                           \n\t"
283       "add             %[dst],          %[dst],             %[stride]     \n\t"
284 
285       "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
286       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
287 
288       "sll             %[res2],        %[reshw],            16            \n\t"
289       "sra             %[res2],        %[res2],             16            \n\t"
290       "sra             %[res3],        %[reshw],            16            \n\t"
291 
292       "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
293       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
294 
295       "sll             %[res0],        %[reshw],            16            \n\t"
296       "sra             %[res0],        %[res0],             16            \n\t"
297       "sra             %[res1],        %[reshw],            16            \n\t"
298 
299       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
300       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
301       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
302       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
303 
304       "sb              %[res0],        (%[dst])                           \n\t"
305       "sb              %[res1],        1(%[dst])                          \n\t"
306       "sb              %[res2],        2(%[dst])                          \n\t"
307       "sb              %[res3],        3(%[dst])                          \n\t"
308 
309       "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
310       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
311 
312       "sll             %[res2],        %[reshw],            16            \n\t"
313       "sra             %[res2],        %[res2],             16            \n\t"
314       "sra             %[res3],        %[reshw],            16            \n\t"
315 
316       "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
317       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
318 
319       "sll             %[res0],        %[reshw],            16            \n\t"
320       "sra             %[res0],        %[res0],             16            \n\t"
321       "sra             %[res1],        %[reshw],            16            \n\t"
322 
323       "lbu             %[left0],       3(%[left])                         \n\t"
324 
325       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
326       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
327       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
328       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
329 
330       "sb              %[res0],        4(%[dst])                          \n\t"
331       "sb              %[res1],        5(%[dst])                          \n\t"
332       "sb              %[res2],        6(%[dst])                          \n\t"
333       "sb              %[res3],        7(%[dst])                          \n\t"
334 
335       "replv.ph        %[left0],       %[left0]                           \n\t"
336       "add             %[dst],          %[dst],             %[stride]     \n\t"
337 
338       "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
339       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
340 
341       "sll             %[res2],        %[reshw],            16            \n\t"
342       "sra             %[res2],        %[res2],             16            \n\t"
343       "sra             %[res3],        %[reshw],            16            \n\t"
344 
345       "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
346       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
347 
348       "sll             %[res0],        %[reshw],            16            \n\t"
349       "sra             %[res0],        %[res0],             16            \n\t"
350       "sra             %[res1],        %[reshw],            16            \n\t"
351 
352       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
353       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
354       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
355       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
356 
357       "sb              %[res0],        (%[dst])                           \n\t"
358       "sb              %[res1],        1(%[dst])                          \n\t"
359       "sb              %[res2],        2(%[dst])                          \n\t"
360       "sb              %[res3],        3(%[dst])                          \n\t"
361 
362       "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
363       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
364 
365       "sll             %[res2],        %[reshw],            16            \n\t"
366       "sra             %[res2],        %[res2],             16            \n\t"
367       "sra             %[res3],        %[reshw],            16            \n\t"
368 
369       "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
370       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
371 
372       "sll             %[res0],        %[reshw],            16            \n\t"
373       "sra             %[res0],        %[res0],             16            \n\t"
374       "sra             %[res1],        %[reshw],            16            \n\t"
375 
376       "lbu             %[left0],       4(%[left])                         \n\t"
377 
378       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
379       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
380       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
381       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
382 
383       "sb              %[res0],        4(%[dst])                          \n\t"
384       "sb              %[res1],        5(%[dst])                          \n\t"
385       "sb              %[res2],        6(%[dst])                          \n\t"
386       "sb              %[res3],        7(%[dst])                          \n\t"
387 
388       "replv.ph        %[left0],       %[left0]                           \n\t"
389       "add             %[dst],          %[dst],             %[stride]     \n\t"
390 
391       "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
392       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
393 
394       "sll             %[res2],        %[reshw],            16            \n\t"
395       "sra             %[res2],        %[res2],             16            \n\t"
396       "sra             %[res3],        %[reshw],            16            \n\t"
397 
398       "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
399       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
400 
401       "sll             %[res0],        %[reshw],            16            \n\t"
402       "sra             %[res0],        %[res0],             16            \n\t"
403       "sra             %[res1],        %[reshw],            16            \n\t"
404 
405       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
406       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
407       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
408       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
409 
410       "sb              %[res0],        (%[dst])                           \n\t"
411       "sb              %[res1],        1(%[dst])                          \n\t"
412       "sb              %[res2],        2(%[dst])                          \n\t"
413       "sb              %[res3],        3(%[dst])                          \n\t"
414 
415       "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
416       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
417 
418       "sll             %[res2],        %[reshw],            16            \n\t"
419       "sra             %[res2],        %[res2],             16            \n\t"
420       "sra             %[res3],        %[reshw],            16            \n\t"
421 
422       "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
423       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
424 
425       "sll             %[res0],        %[reshw],            16            \n\t"
426       "sra             %[res0],        %[res0],             16            \n\t"
427       "sra             %[res1],        %[reshw],            16            \n\t"
428 
429       "lbu             %[left0],       5(%[left])                         \n\t"
430 
431       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
432       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
433       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
434       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
435 
436       "sb              %[res0],        4(%[dst])                          \n\t"
437       "sb              %[res1],        5(%[dst])                          \n\t"
438       "sb              %[res2],        6(%[dst])                          \n\t"
439       "sb              %[res3],        7(%[dst])                          \n\t"
440 
441       "replv.ph        %[left0],       %[left0]                           \n\t"
442       "add             %[dst],          %[dst],             %[stride]     \n\t"
443 
444       "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
445       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
446 
447       "sll             %[res2],        %[reshw],            16            \n\t"
448       "sra             %[res2],        %[res2],             16            \n\t"
449       "sra             %[res3],        %[reshw],            16            \n\t"
450 
451       "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
452       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
453 
454       "sll             %[res0],        %[reshw],            16            \n\t"
455       "sra             %[res0],        %[res0],             16            \n\t"
456       "sra             %[res1],        %[reshw],            16            \n\t"
457 
458       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
459       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
460       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
461       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
462 
463       "sb              %[res0],        (%[dst])                           \n\t"
464       "sb              %[res1],        1(%[dst])                          \n\t"
465       "sb              %[res2],        2(%[dst])                          \n\t"
466       "sb              %[res3],        3(%[dst])                          \n\t"
467 
468       "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
469       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
470 
471       "sll             %[res2],        %[reshw],            16            \n\t"
472       "sra             %[res2],        %[res2],             16            \n\t"
473       "sra             %[res3],        %[reshw],            16            \n\t"
474 
475       "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
476       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
477 
478       "sll             %[res0],        %[reshw],            16            \n\t"
479       "sra             %[res0],        %[res0],             16            \n\t"
480       "sra             %[res1],        %[reshw],            16            \n\t"
481 
482       "lbu             %[left0],       6(%[left])                         \n\t"
483 
484       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
485       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
486       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
487       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
488 
489       "sb              %[res0],        4(%[dst])                          \n\t"
490       "sb              %[res1],        5(%[dst])                          \n\t"
491       "sb              %[res2],        6(%[dst])                          \n\t"
492       "sb              %[res3],        7(%[dst])                          \n\t"
493 
494       "replv.ph        %[left0],       %[left0]                           \n\t"
495       "add             %[dst],          %[dst],             %[stride]     \n\t"
496 
497       "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
498       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
499 
500       "sll             %[res2],        %[reshw],            16            \n\t"
501       "sra             %[res2],        %[res2],             16            \n\t"
502       "sra             %[res3],        %[reshw],            16            \n\t"
503 
504       "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
505       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
506 
507       "sll             %[res0],        %[reshw],            16            \n\t"
508       "sra             %[res0],        %[res0],             16            \n\t"
509       "sra             %[res1],        %[reshw],            16            \n\t"
510 
511       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
512       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
513       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
514       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
515 
516       "sb              %[res0],        (%[dst])                           \n\t"
517       "sb              %[res1],        1(%[dst])                          \n\t"
518       "sb              %[res2],        2(%[dst])                          \n\t"
519       "sb              %[res3],        3(%[dst])                          \n\t"
520 
521       "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
522       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
523 
524       "sll             %[res2],        %[reshw],            16            \n\t"
525       "sra             %[res2],        %[res2],             16            \n\t"
526       "sra             %[res3],        %[reshw],            16            \n\t"
527 
528       "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
529       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
530 
531       "sll             %[res0],        %[reshw],            16            \n\t"
532       "sra             %[res0],        %[res0],             16            \n\t"
533       "sra             %[res1],        %[reshw],            16            \n\t"
534 
535       "lbu             %[left0],       7(%[left])                         \n\t"
536 
537       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
538       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
539       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
540       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
541 
542       "sb              %[res0],        4(%[dst])                          \n\t"
543       "sb              %[res1],        5(%[dst])                          \n\t"
544       "sb              %[res2],        6(%[dst])                          \n\t"
545       "sb              %[res3],        7(%[dst])                          \n\t"
546 
547       "replv.ph        %[left0],       %[left0]                           \n\t"
548       "add             %[dst],          %[dst],             %[stride]     \n\t"
549 
550       "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
551       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
552 
553       "sll             %[res2],        %[reshw],            16            \n\t"
554       "sra             %[res2],        %[res2],             16            \n\t"
555       "sra             %[res3],        %[reshw],            16            \n\t"
556 
557       "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
558       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
559 
560       "sll             %[res0],        %[reshw],            16            \n\t"
561       "sra             %[res0],        %[res0],             16            \n\t"
562       "sra             %[res1],        %[reshw],            16            \n\t"
563 
564       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
565       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
566       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
567       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
568 
569       "sb              %[res0],        (%[dst])                           \n\t"
570       "sb              %[res1],        1(%[dst])                          \n\t"
571       "sb              %[res2],        2(%[dst])                          \n\t"
572       "sb              %[res3],        3(%[dst])                          \n\t"
573 
574       "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
575       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
576 
577       "sll             %[res2],        %[reshw],            16            \n\t"
578       "sra             %[res2],        %[res2],             16            \n\t"
579       "sra             %[res3],        %[reshw],            16            \n\t"
580 
581       "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
582       "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
583 
584       "sll             %[res0],        %[reshw],            16            \n\t"
585       "sra             %[res0],        %[res0],             16            \n\t"
586       "sra             %[res1],        %[reshw],            16            \n\t"
587 
588       "lbux            %[res0],        %[res0](%[cm])                     \n\t"
589       "lbux            %[res1],        %[res1](%[cm])                     \n\t"
590       "lbux            %[res2],        %[res2](%[cm])                     \n\t"
591       "lbux            %[res3],        %[res3](%[cm])                     \n\t"
592 
593       "sb              %[res0],        4(%[dst])                          \n\t"
594       "sb              %[res1],        5(%[dst])                          \n\t"
595       "sb              %[res2],        6(%[dst])                          \n\t"
596       "sb              %[res3],        7(%[dst])                          \n\t"
597 
598       : [abovel] "=&r" (abovel), [abover] "=&r" (abover),
599         [abovel_1] "=&r" (abovel_1), [abover_1] "=&r" (abover_1),
600         [left0] "=&r" (left0), [res2] "=&r" (res2), [res3] "=&r" (res3),
601         [res0] "=&r" (res0), [res1] "=&r" (res1),
602         [reshw] "=&r" (reshw), [top_left] "=&r" (top_left)
603       : [above] "r" (above), [left] "r" (left),
604         [dst] "r" (dst), [stride] "r" (stride), [cm] "r" (cm)
605   );
606 }
607 #endif  // #if HAVE_DSPR2
608