1 /*
2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "vpx_dsp/mips/common_dspr2.h"
12
13 #if HAVE_DSPR2
vpx_h_predictor_8x8_dspr2(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)14 void vpx_h_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride,
15 const uint8_t *above, const uint8_t *left) {
16 int32_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
17
18 __asm__ __volatile__ (
19 "lb %[tmp1], (%[left]) \n\t"
20 "lb %[tmp2], 1(%[left]) \n\t"
21 "lb %[tmp3], 2(%[left]) \n\t"
22 "lb %[tmp4], 3(%[left]) \n\t"
23 "lb %[tmp5], 4(%[left]) \n\t"
24 "lb %[tmp6], 5(%[left]) \n\t"
25 "lb %[tmp7], 6(%[left]) \n\t"
26 "lb %[tmp8], 7(%[left]) \n\t"
27
28 "replv.qb %[tmp1], %[tmp1] \n\t"
29 "replv.qb %[tmp2], %[tmp2] \n\t"
30 "replv.qb %[tmp3], %[tmp3] \n\t"
31 "replv.qb %[tmp4], %[tmp4] \n\t"
32 "replv.qb %[tmp5], %[tmp5] \n\t"
33 "replv.qb %[tmp6], %[tmp6] \n\t"
34 "replv.qb %[tmp7], %[tmp7] \n\t"
35 "replv.qb %[tmp8], %[tmp8] \n\t"
36
37 "sw %[tmp1], (%[dst]) \n\t"
38 "sw %[tmp1], 4(%[dst]) \n\t"
39 "add %[dst], %[dst], %[stride] \n\t"
40 "sw %[tmp2], (%[dst]) \n\t"
41 "sw %[tmp2], 4(%[dst]) \n\t"
42 "add %[dst], %[dst], %[stride] \n\t"
43 "sw %[tmp3], (%[dst]) \n\t"
44 "sw %[tmp3], 4(%[dst]) \n\t"
45 "add %[dst], %[dst], %[stride] \n\t"
46 "sw %[tmp4], (%[dst]) \n\t"
47 "sw %[tmp4], 4(%[dst]) \n\t"
48 "add %[dst], %[dst], %[stride] \n\t"
49 "sw %[tmp5], (%[dst]) \n\t"
50 "sw %[tmp5], 4(%[dst]) \n\t"
51 "add %[dst], %[dst], %[stride] \n\t"
52 "sw %[tmp6], (%[dst]) \n\t"
53 "sw %[tmp6], 4(%[dst]) \n\t"
54 "add %[dst], %[dst], %[stride] \n\t"
55 "sw %[tmp7], (%[dst]) \n\t"
56 "sw %[tmp7], 4(%[dst]) \n\t"
57 "add %[dst], %[dst], %[stride] \n\t"
58 "sw %[tmp8], (%[dst]) \n\t"
59 "sw %[tmp8], 4(%[dst]) \n\t"
60
61 : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2),
62 [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4),
63 [tmp5] "=&r" (tmp5), [tmp7] "=&r" (tmp7),
64 [tmp6] "=&r" (tmp6), [tmp8] "=&r" (tmp8)
65 : [left] "r" (left), [dst] "r" (dst),
66 [stride] "r" (stride)
67 );
68 }
69
vpx_dc_predictor_8x8_dspr2(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)70 void vpx_dc_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride,
71 const uint8_t *above, const uint8_t *left) {
72 int32_t expected_dc;
73 int32_t average;
74 int32_t tmp, above1, above_l1, above_r1, left1, left_r1, left_l1;
75 int32_t above2, above_l2, above_r2, left2, left_r2, left_l2;
76
77 __asm__ __volatile__ (
78 "lw %[above1], (%[above]) \n\t"
79 "lw %[above2], 4(%[above]) \n\t"
80 "lw %[left1], (%[left]) \n\t"
81 "lw %[left2], 4(%[left]) \n\t"
82
83 "preceu.ph.qbl %[above_l1], %[above1] \n\t"
84 "preceu.ph.qbr %[above_r1], %[above1] \n\t"
85 "preceu.ph.qbl %[left_l1], %[left1] \n\t"
86 "preceu.ph.qbr %[left_r1], %[left1] \n\t"
87
88 "preceu.ph.qbl %[above_l2], %[above2] \n\t"
89 "preceu.ph.qbr %[above_r2], %[above2] \n\t"
90 "preceu.ph.qbl %[left_l2], %[left2] \n\t"
91 "preceu.ph.qbr %[left_r2], %[left2] \n\t"
92
93 "addu.ph %[average], %[above_r1], %[above_l1] \n\t"
94 "addu.ph %[average], %[average], %[left_l1] \n\t"
95 "addu.ph %[average], %[average], %[left_r1] \n\t"
96
97 "addu.ph %[average], %[average], %[above_l2] \n\t"
98 "addu.ph %[average], %[average], %[above_r2] \n\t"
99 "addu.ph %[average], %[average], %[left_l2] \n\t"
100 "addu.ph %[average], %[average], %[left_r2] \n\t"
101
102 "addiu %[average], %[average], 8 \n\t"
103
104 "srl %[tmp], %[average], 16 \n\t"
105 "addu.ph %[average], %[tmp], %[average] \n\t"
106 "srl %[expected_dc], %[average], 4 \n\t"
107 "replv.qb %[expected_dc], %[expected_dc] \n\t"
108
109 "sw %[expected_dc], (%[dst]) \n\t"
110 "sw %[expected_dc], 4(%[dst]) \n\t"
111
112 "add %[dst], %[dst], %[stride] \n\t"
113 "sw %[expected_dc], (%[dst]) \n\t"
114 "sw %[expected_dc], 4(%[dst]) \n\t"
115
116 "add %[dst], %[dst], %[stride] \n\t"
117 "sw %[expected_dc], (%[dst]) \n\t"
118 "sw %[expected_dc], 4(%[dst]) \n\t"
119
120 "add %[dst], %[dst], %[stride] \n\t"
121 "sw %[expected_dc], (%[dst]) \n\t"
122 "sw %[expected_dc], 4(%[dst]) \n\t"
123
124 "add %[dst], %[dst], %[stride] \n\t"
125 "sw %[expected_dc], (%[dst]) \n\t"
126 "sw %[expected_dc], 4(%[dst]) \n\t"
127
128 "add %[dst], %[dst], %[stride] \n\t"
129 "sw %[expected_dc], (%[dst]) \n\t"
130 "sw %[expected_dc], 4(%[dst]) \n\t"
131
132 "add %[dst], %[dst], %[stride] \n\t"
133 "sw %[expected_dc], (%[dst]) \n\t"
134 "sw %[expected_dc], 4(%[dst]) \n\t"
135
136 "add %[dst], %[dst], %[stride] \n\t"
137 "sw %[expected_dc], (%[dst]) \n\t"
138 "sw %[expected_dc], 4(%[dst]) \n\t"
139
140 : [above1] "=&r" (above1), [above_l1] "=&r" (above_l1),
141 [above_r1] "=&r" (above_r1), [left1] "=&r" (left1),
142 [left_l1] "=&r" (left_l1), [left_r1] "=&r" (left_r1),
143 [above2] "=&r" (above2), [above_l2] "=&r" (above_l2),
144 [above_r2] "=&r" (above_r2), [left2] "=&r" (left2),
145 [left_l2] "=&r" (left_l2), [left_r2] "=&r" (left_r2),
146 [average] "=&r" (average), [tmp] "=&r" (tmp),
147 [expected_dc] "=&r" (expected_dc)
148 : [above] "r" (above), [left] "r" (left), [dst] "r" (dst),
149 [stride] "r" (stride)
150 );
151 }
152
vpx_tm_predictor_8x8_dspr2(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)153 void vpx_tm_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride,
154 const uint8_t *above, const uint8_t *left) {
155 int32_t abovel, abover;
156 int32_t abovel_1, abover_1;
157 int32_t left0;
158 int32_t res0, res1, res2, res3;
159 int32_t reshw;
160 int32_t top_left;
161 uint8_t *cm = vpx_ff_cropTbl;
162
163 __asm__ __volatile__ (
164 "ulw %[reshw], (%[above]) \n\t"
165 "ulw %[top_left], 4(%[above]) \n\t"
166
167 "lbu %[left0], (%[left]) \n\t"
168
169 "preceu.ph.qbl %[abovel], %[reshw] \n\t"
170 "preceu.ph.qbr %[abover], %[reshw] \n\t"
171 "preceu.ph.qbl %[abovel_1], %[top_left] \n\t"
172 "preceu.ph.qbr %[abover_1], %[top_left] \n\t"
173
174 "lbu %[top_left], -1(%[above]) \n\t"
175 "replv.ph %[left0], %[left0] \n\t"
176
177 "replv.ph %[top_left], %[top_left] \n\t"
178
179 "addu.ph %[reshw], %[abovel], %[left0] \n\t"
180 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
181
182 "sll %[res2], %[reshw], 16 \n\t"
183 "sra %[res2], %[res2], 16 \n\t"
184 "sra %[res3], %[reshw], 16 \n\t"
185
186 "addu.ph %[reshw], %[abover], %[left0] \n\t"
187 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
188
189 "sll %[res0], %[reshw], 16 \n\t"
190 "sra %[res0], %[res0], 16 \n\t"
191 "sra %[res1], %[reshw], 16 \n\t"
192
193 "lbux %[res0], %[res0](%[cm]) \n\t"
194 "lbux %[res1], %[res1](%[cm]) \n\t"
195 "lbux %[res2], %[res2](%[cm]) \n\t"
196 "lbux %[res3], %[res3](%[cm]) \n\t"
197
198 "sb %[res0], (%[dst]) \n\t"
199 "sb %[res1], 1(%[dst]) \n\t"
200 "sb %[res2], 2(%[dst]) \n\t"
201 "sb %[res3], 3(%[dst]) \n\t"
202
203 "addu.ph %[reshw], %[abovel_1], %[left0] \n\t"
204 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
205
206 "sll %[res2], %[reshw], 16 \n\t"
207 "sra %[res2], %[res2], 16 \n\t"
208 "sra %[res3], %[reshw], 16 \n\t"
209
210 "addu.ph %[reshw], %[abover_1], %[left0] \n\t"
211 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
212
213 "sll %[res0], %[reshw], 16 \n\t"
214 "sra %[res0], %[res0], 16 \n\t"
215 "sra %[res1], %[reshw], 16 \n\t"
216
217 "lbu %[left0], 1(%[left]) \n\t"
218
219 "lbux %[res0], %[res0](%[cm]) \n\t"
220 "lbux %[res1], %[res1](%[cm]) \n\t"
221 "lbux %[res2], %[res2](%[cm]) \n\t"
222 "lbux %[res3], %[res3](%[cm]) \n\t"
223
224 "sb %[res0], 4(%[dst]) \n\t"
225 "sb %[res1], 5(%[dst]) \n\t"
226 "sb %[res2], 6(%[dst]) \n\t"
227 "sb %[res3], 7(%[dst]) \n\t"
228
229 "replv.ph %[left0], %[left0] \n\t"
230 "add %[dst], %[dst], %[stride] \n\t"
231
232 "addu.ph %[reshw], %[abovel], %[left0] \n\t"
233 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
234
235 "sll %[res2], %[reshw], 16 \n\t"
236 "sra %[res2], %[res2], 16 \n\t"
237 "sra %[res3], %[reshw], 16 \n\t"
238
239 "addu.ph %[reshw], %[abover], %[left0] \n\t"
240 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
241
242 "sll %[res0], %[reshw], 16 \n\t"
243 "sra %[res0], %[res0], 16 \n\t"
244 "sra %[res1], %[reshw], 16 \n\t"
245
246 "lbux %[res0], %[res0](%[cm]) \n\t"
247 "lbux %[res1], %[res1](%[cm]) \n\t"
248 "lbux %[res2], %[res2](%[cm]) \n\t"
249 "lbux %[res3], %[res3](%[cm]) \n\t"
250
251 "sb %[res0], (%[dst]) \n\t"
252 "sb %[res1], 1(%[dst]) \n\t"
253 "sb %[res2], 2(%[dst]) \n\t"
254 "sb %[res3], 3(%[dst]) \n\t"
255
256 "addu.ph %[reshw], %[abovel_1], %[left0] \n\t"
257 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
258
259 "sll %[res2], %[reshw], 16 \n\t"
260 "sra %[res2], %[res2], 16 \n\t"
261 "sra %[res3], %[reshw], 16 \n\t"
262
263 "addu.ph %[reshw], %[abover_1], %[left0] \n\t"
264 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
265
266 "sll %[res0], %[reshw], 16 \n\t"
267 "sra %[res0], %[res0], 16 \n\t"
268 "sra %[res1], %[reshw], 16 \n\t"
269
270 "lbu %[left0], 2(%[left]) \n\t"
271
272 "lbux %[res0], %[res0](%[cm]) \n\t"
273 "lbux %[res1], %[res1](%[cm]) \n\t"
274 "lbux %[res2], %[res2](%[cm]) \n\t"
275 "lbux %[res3], %[res3](%[cm]) \n\t"
276
277 "sb %[res0], 4(%[dst]) \n\t"
278 "sb %[res1], 5(%[dst]) \n\t"
279 "sb %[res2], 6(%[dst]) \n\t"
280 "sb %[res3], 7(%[dst]) \n\t"
281
282 "replv.ph %[left0], %[left0] \n\t"
283 "add %[dst], %[dst], %[stride] \n\t"
284
285 "addu.ph %[reshw], %[abovel], %[left0] \n\t"
286 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
287
288 "sll %[res2], %[reshw], 16 \n\t"
289 "sra %[res2], %[res2], 16 \n\t"
290 "sra %[res3], %[reshw], 16 \n\t"
291
292 "addu.ph %[reshw], %[abover], %[left0] \n\t"
293 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
294
295 "sll %[res0], %[reshw], 16 \n\t"
296 "sra %[res0], %[res0], 16 \n\t"
297 "sra %[res1], %[reshw], 16 \n\t"
298
299 "lbux %[res0], %[res0](%[cm]) \n\t"
300 "lbux %[res1], %[res1](%[cm]) \n\t"
301 "lbux %[res2], %[res2](%[cm]) \n\t"
302 "lbux %[res3], %[res3](%[cm]) \n\t"
303
304 "sb %[res0], (%[dst]) \n\t"
305 "sb %[res1], 1(%[dst]) \n\t"
306 "sb %[res2], 2(%[dst]) \n\t"
307 "sb %[res3], 3(%[dst]) \n\t"
308
309 "addu.ph %[reshw], %[abovel_1], %[left0] \n\t"
310 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
311
312 "sll %[res2], %[reshw], 16 \n\t"
313 "sra %[res2], %[res2], 16 \n\t"
314 "sra %[res3], %[reshw], 16 \n\t"
315
316 "addu.ph %[reshw], %[abover_1], %[left0] \n\t"
317 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
318
319 "sll %[res0], %[reshw], 16 \n\t"
320 "sra %[res0], %[res0], 16 \n\t"
321 "sra %[res1], %[reshw], 16 \n\t"
322
323 "lbu %[left0], 3(%[left]) \n\t"
324
325 "lbux %[res0], %[res0](%[cm]) \n\t"
326 "lbux %[res1], %[res1](%[cm]) \n\t"
327 "lbux %[res2], %[res2](%[cm]) \n\t"
328 "lbux %[res3], %[res3](%[cm]) \n\t"
329
330 "sb %[res0], 4(%[dst]) \n\t"
331 "sb %[res1], 5(%[dst]) \n\t"
332 "sb %[res2], 6(%[dst]) \n\t"
333 "sb %[res3], 7(%[dst]) \n\t"
334
335 "replv.ph %[left0], %[left0] \n\t"
336 "add %[dst], %[dst], %[stride] \n\t"
337
338 "addu.ph %[reshw], %[abovel], %[left0] \n\t"
339 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
340
341 "sll %[res2], %[reshw], 16 \n\t"
342 "sra %[res2], %[res2], 16 \n\t"
343 "sra %[res3], %[reshw], 16 \n\t"
344
345 "addu.ph %[reshw], %[abover], %[left0] \n\t"
346 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
347
348 "sll %[res0], %[reshw], 16 \n\t"
349 "sra %[res0], %[res0], 16 \n\t"
350 "sra %[res1], %[reshw], 16 \n\t"
351
352 "lbux %[res0], %[res0](%[cm]) \n\t"
353 "lbux %[res1], %[res1](%[cm]) \n\t"
354 "lbux %[res2], %[res2](%[cm]) \n\t"
355 "lbux %[res3], %[res3](%[cm]) \n\t"
356
357 "sb %[res0], (%[dst]) \n\t"
358 "sb %[res1], 1(%[dst]) \n\t"
359 "sb %[res2], 2(%[dst]) \n\t"
360 "sb %[res3], 3(%[dst]) \n\t"
361
362 "addu.ph %[reshw], %[abovel_1], %[left0] \n\t"
363 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
364
365 "sll %[res2], %[reshw], 16 \n\t"
366 "sra %[res2], %[res2], 16 \n\t"
367 "sra %[res3], %[reshw], 16 \n\t"
368
369 "addu.ph %[reshw], %[abover_1], %[left0] \n\t"
370 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
371
372 "sll %[res0], %[reshw], 16 \n\t"
373 "sra %[res0], %[res0], 16 \n\t"
374 "sra %[res1], %[reshw], 16 \n\t"
375
376 "lbu %[left0], 4(%[left]) \n\t"
377
378 "lbux %[res0], %[res0](%[cm]) \n\t"
379 "lbux %[res1], %[res1](%[cm]) \n\t"
380 "lbux %[res2], %[res2](%[cm]) \n\t"
381 "lbux %[res3], %[res3](%[cm]) \n\t"
382
383 "sb %[res0], 4(%[dst]) \n\t"
384 "sb %[res1], 5(%[dst]) \n\t"
385 "sb %[res2], 6(%[dst]) \n\t"
386 "sb %[res3], 7(%[dst]) \n\t"
387
388 "replv.ph %[left0], %[left0] \n\t"
389 "add %[dst], %[dst], %[stride] \n\t"
390
391 "addu.ph %[reshw], %[abovel], %[left0] \n\t"
392 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
393
394 "sll %[res2], %[reshw], 16 \n\t"
395 "sra %[res2], %[res2], 16 \n\t"
396 "sra %[res3], %[reshw], 16 \n\t"
397
398 "addu.ph %[reshw], %[abover], %[left0] \n\t"
399 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
400
401 "sll %[res0], %[reshw], 16 \n\t"
402 "sra %[res0], %[res0], 16 \n\t"
403 "sra %[res1], %[reshw], 16 \n\t"
404
405 "lbux %[res0], %[res0](%[cm]) \n\t"
406 "lbux %[res1], %[res1](%[cm]) \n\t"
407 "lbux %[res2], %[res2](%[cm]) \n\t"
408 "lbux %[res3], %[res3](%[cm]) \n\t"
409
410 "sb %[res0], (%[dst]) \n\t"
411 "sb %[res1], 1(%[dst]) \n\t"
412 "sb %[res2], 2(%[dst]) \n\t"
413 "sb %[res3], 3(%[dst]) \n\t"
414
415 "addu.ph %[reshw], %[abovel_1], %[left0] \n\t"
416 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
417
418 "sll %[res2], %[reshw], 16 \n\t"
419 "sra %[res2], %[res2], 16 \n\t"
420 "sra %[res3], %[reshw], 16 \n\t"
421
422 "addu.ph %[reshw], %[abover_1], %[left0] \n\t"
423 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
424
425 "sll %[res0], %[reshw], 16 \n\t"
426 "sra %[res0], %[res0], 16 \n\t"
427 "sra %[res1], %[reshw], 16 \n\t"
428
429 "lbu %[left0], 5(%[left]) \n\t"
430
431 "lbux %[res0], %[res0](%[cm]) \n\t"
432 "lbux %[res1], %[res1](%[cm]) \n\t"
433 "lbux %[res2], %[res2](%[cm]) \n\t"
434 "lbux %[res3], %[res3](%[cm]) \n\t"
435
436 "sb %[res0], 4(%[dst]) \n\t"
437 "sb %[res1], 5(%[dst]) \n\t"
438 "sb %[res2], 6(%[dst]) \n\t"
439 "sb %[res3], 7(%[dst]) \n\t"
440
441 "replv.ph %[left0], %[left0] \n\t"
442 "add %[dst], %[dst], %[stride] \n\t"
443
444 "addu.ph %[reshw], %[abovel], %[left0] \n\t"
445 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
446
447 "sll %[res2], %[reshw], 16 \n\t"
448 "sra %[res2], %[res2], 16 \n\t"
449 "sra %[res3], %[reshw], 16 \n\t"
450
451 "addu.ph %[reshw], %[abover], %[left0] \n\t"
452 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
453
454 "sll %[res0], %[reshw], 16 \n\t"
455 "sra %[res0], %[res0], 16 \n\t"
456 "sra %[res1], %[reshw], 16 \n\t"
457
458 "lbux %[res0], %[res0](%[cm]) \n\t"
459 "lbux %[res1], %[res1](%[cm]) \n\t"
460 "lbux %[res2], %[res2](%[cm]) \n\t"
461 "lbux %[res3], %[res3](%[cm]) \n\t"
462
463 "sb %[res0], (%[dst]) \n\t"
464 "sb %[res1], 1(%[dst]) \n\t"
465 "sb %[res2], 2(%[dst]) \n\t"
466 "sb %[res3], 3(%[dst]) \n\t"
467
468 "addu.ph %[reshw], %[abovel_1], %[left0] \n\t"
469 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
470
471 "sll %[res2], %[reshw], 16 \n\t"
472 "sra %[res2], %[res2], 16 \n\t"
473 "sra %[res3], %[reshw], 16 \n\t"
474
475 "addu.ph %[reshw], %[abover_1], %[left0] \n\t"
476 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
477
478 "sll %[res0], %[reshw], 16 \n\t"
479 "sra %[res0], %[res0], 16 \n\t"
480 "sra %[res1], %[reshw], 16 \n\t"
481
482 "lbu %[left0], 6(%[left]) \n\t"
483
484 "lbux %[res0], %[res0](%[cm]) \n\t"
485 "lbux %[res1], %[res1](%[cm]) \n\t"
486 "lbux %[res2], %[res2](%[cm]) \n\t"
487 "lbux %[res3], %[res3](%[cm]) \n\t"
488
489 "sb %[res0], 4(%[dst]) \n\t"
490 "sb %[res1], 5(%[dst]) \n\t"
491 "sb %[res2], 6(%[dst]) \n\t"
492 "sb %[res3], 7(%[dst]) \n\t"
493
494 "replv.ph %[left0], %[left0] \n\t"
495 "add %[dst], %[dst], %[stride] \n\t"
496
497 "addu.ph %[reshw], %[abovel], %[left0] \n\t"
498 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
499
500 "sll %[res2], %[reshw], 16 \n\t"
501 "sra %[res2], %[res2], 16 \n\t"
502 "sra %[res3], %[reshw], 16 \n\t"
503
504 "addu.ph %[reshw], %[abover], %[left0] \n\t"
505 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
506
507 "sll %[res0], %[reshw], 16 \n\t"
508 "sra %[res0], %[res0], 16 \n\t"
509 "sra %[res1], %[reshw], 16 \n\t"
510
511 "lbux %[res0], %[res0](%[cm]) \n\t"
512 "lbux %[res1], %[res1](%[cm]) \n\t"
513 "lbux %[res2], %[res2](%[cm]) \n\t"
514 "lbux %[res3], %[res3](%[cm]) \n\t"
515
516 "sb %[res0], (%[dst]) \n\t"
517 "sb %[res1], 1(%[dst]) \n\t"
518 "sb %[res2], 2(%[dst]) \n\t"
519 "sb %[res3], 3(%[dst]) \n\t"
520
521 "addu.ph %[reshw], %[abovel_1], %[left0] \n\t"
522 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
523
524 "sll %[res2], %[reshw], 16 \n\t"
525 "sra %[res2], %[res2], 16 \n\t"
526 "sra %[res3], %[reshw], 16 \n\t"
527
528 "addu.ph %[reshw], %[abover_1], %[left0] \n\t"
529 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
530
531 "sll %[res0], %[reshw], 16 \n\t"
532 "sra %[res0], %[res0], 16 \n\t"
533 "sra %[res1], %[reshw], 16 \n\t"
534
535 "lbu %[left0], 7(%[left]) \n\t"
536
537 "lbux %[res0], %[res0](%[cm]) \n\t"
538 "lbux %[res1], %[res1](%[cm]) \n\t"
539 "lbux %[res2], %[res2](%[cm]) \n\t"
540 "lbux %[res3], %[res3](%[cm]) \n\t"
541
542 "sb %[res0], 4(%[dst]) \n\t"
543 "sb %[res1], 5(%[dst]) \n\t"
544 "sb %[res2], 6(%[dst]) \n\t"
545 "sb %[res3], 7(%[dst]) \n\t"
546
547 "replv.ph %[left0], %[left0] \n\t"
548 "add %[dst], %[dst], %[stride] \n\t"
549
550 "addu.ph %[reshw], %[abovel], %[left0] \n\t"
551 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
552
553 "sll %[res2], %[reshw], 16 \n\t"
554 "sra %[res2], %[res2], 16 \n\t"
555 "sra %[res3], %[reshw], 16 \n\t"
556
557 "addu.ph %[reshw], %[abover], %[left0] \n\t"
558 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
559
560 "sll %[res0], %[reshw], 16 \n\t"
561 "sra %[res0], %[res0], 16 \n\t"
562 "sra %[res1], %[reshw], 16 \n\t"
563
564 "lbux %[res0], %[res0](%[cm]) \n\t"
565 "lbux %[res1], %[res1](%[cm]) \n\t"
566 "lbux %[res2], %[res2](%[cm]) \n\t"
567 "lbux %[res3], %[res3](%[cm]) \n\t"
568
569 "sb %[res0], (%[dst]) \n\t"
570 "sb %[res1], 1(%[dst]) \n\t"
571 "sb %[res2], 2(%[dst]) \n\t"
572 "sb %[res3], 3(%[dst]) \n\t"
573
574 "addu.ph %[reshw], %[abovel_1], %[left0] \n\t"
575 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
576
577 "sll %[res2], %[reshw], 16 \n\t"
578 "sra %[res2], %[res2], 16 \n\t"
579 "sra %[res3], %[reshw], 16 \n\t"
580
581 "addu.ph %[reshw], %[abover_1], %[left0] \n\t"
582 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
583
584 "sll %[res0], %[reshw], 16 \n\t"
585 "sra %[res0], %[res0], 16 \n\t"
586 "sra %[res1], %[reshw], 16 \n\t"
587
588 "lbux %[res0], %[res0](%[cm]) \n\t"
589 "lbux %[res1], %[res1](%[cm]) \n\t"
590 "lbux %[res2], %[res2](%[cm]) \n\t"
591 "lbux %[res3], %[res3](%[cm]) \n\t"
592
593 "sb %[res0], 4(%[dst]) \n\t"
594 "sb %[res1], 5(%[dst]) \n\t"
595 "sb %[res2], 6(%[dst]) \n\t"
596 "sb %[res3], 7(%[dst]) \n\t"
597
598 : [abovel] "=&r" (abovel), [abover] "=&r" (abover),
599 [abovel_1] "=&r" (abovel_1), [abover_1] "=&r" (abover_1),
600 [left0] "=&r" (left0), [res2] "=&r" (res2), [res3] "=&r" (res3),
601 [res0] "=&r" (res0), [res1] "=&r" (res1),
602 [reshw] "=&r" (reshw), [top_left] "=&r" (top_left)
603 : [above] "r" (above), [left] "r" (left),
604 [dst] "r" (dst), [stride] "r" (stride), [cm] "r" (cm)
605 );
606 }
607 #endif // #if HAVE_DSPR2
608