1 // Copyright 2014 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // Image transforms and color space conversion methods for lossless decoder.
11 //
12 // Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
13 // Jovan Zelincevic (jovan.zelincevic@imgtec.com)
14
15 #include "./dsp.h"
16
17 #if defined(WEBP_USE_MIPS_DSP_R2)
18
19 #include "./lossless.h"
20
21 #define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \
22 static void FUNC_NAME(const TYPE* src, \
23 const uint32_t* const color_map, \
24 TYPE* dst, int y_start, int y_end, \
25 int width) { \
26 int y; \
27 for (y = y_start; y < y_end; ++y) { \
28 int x; \
29 for (x = 0; x < (width >> 2); ++x) { \
30 int tmp1, tmp2, tmp3, tmp4; \
31 __asm__ volatile ( \
32 ".ifc " #TYPE ", uint8_t \n\t" \
33 "lbu %[tmp1], 0(%[src]) \n\t" \
34 "lbu %[tmp2], 1(%[src]) \n\t" \
35 "lbu %[tmp3], 2(%[src]) \n\t" \
36 "lbu %[tmp4], 3(%[src]) \n\t" \
37 "addiu %[src], %[src], 4 \n\t" \
38 ".endif \n\t" \
39 ".ifc " #TYPE ", uint32_t \n\t" \
40 "lw %[tmp1], 0(%[src]) \n\t" \
41 "lw %[tmp2], 4(%[src]) \n\t" \
42 "lw %[tmp3], 8(%[src]) \n\t" \
43 "lw %[tmp4], 12(%[src]) \n\t" \
44 "ext %[tmp1], %[tmp1], 8, 8 \n\t" \
45 "ext %[tmp2], %[tmp2], 8, 8 \n\t" \
46 "ext %[tmp3], %[tmp3], 8, 8 \n\t" \
47 "ext %[tmp4], %[tmp4], 8, 8 \n\t" \
48 "addiu %[src], %[src], 16 \n\t" \
49 ".endif \n\t" \
50 "sll %[tmp1], %[tmp1], 2 \n\t" \
51 "sll %[tmp2], %[tmp2], 2 \n\t" \
52 "sll %[tmp3], %[tmp3], 2 \n\t" \
53 "sll %[tmp4], %[tmp4], 2 \n\t" \
54 "lwx %[tmp1], %[tmp1](%[color_map]) \n\t" \
55 "lwx %[tmp2], %[tmp2](%[color_map]) \n\t" \
56 "lwx %[tmp3], %[tmp3](%[color_map]) \n\t" \
57 "lwx %[tmp4], %[tmp4](%[color_map]) \n\t" \
58 ".ifc " #TYPE ", uint8_t \n\t" \
59 "ext %[tmp1], %[tmp1], 8, 8 \n\t" \
60 "ext %[tmp2], %[tmp2], 8, 8 \n\t" \
61 "ext %[tmp3], %[tmp3], 8, 8 \n\t" \
62 "ext %[tmp4], %[tmp4], 8, 8 \n\t" \
63 "sb %[tmp1], 0(%[dst]) \n\t" \
64 "sb %[tmp2], 1(%[dst]) \n\t" \
65 "sb %[tmp3], 2(%[dst]) \n\t" \
66 "sb %[tmp4], 3(%[dst]) \n\t" \
67 "addiu %[dst], %[dst], 4 \n\t" \
68 ".endif \n\t" \
69 ".ifc " #TYPE ", uint32_t \n\t" \
70 "sw %[tmp1], 0(%[dst]) \n\t" \
71 "sw %[tmp2], 4(%[dst]) \n\t" \
72 "sw %[tmp3], 8(%[dst]) \n\t" \
73 "sw %[tmp4], 12(%[dst]) \n\t" \
74 "addiu %[dst], %[dst], 16 \n\t" \
75 ".endif \n\t" \
76 : [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3), \
77 [tmp4]"=&r"(tmp4), [src]"+&r"(src), [dst]"+r"(dst) \
78 : [color_map]"r"(color_map) \
79 : "memory" \
80 ); \
81 } \
82 for (x = 0; x < (width & 3); ++x) { \
83 *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \
84 } \
85 } \
86 }
87
MAP_COLOR_FUNCS(MapARGB,uint32_t,VP8GetARGBIndex,VP8GetARGBValue)88 MAP_COLOR_FUNCS(MapARGB, uint32_t, VP8GetARGBIndex, VP8GetARGBValue)
89 MAP_COLOR_FUNCS(MapAlpha, uint8_t, VP8GetAlphaIndex, VP8GetAlphaValue)
90
91 #undef MAP_COLOR_FUNCS
92
93 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
94 uint32_t c2) {
95 int temp0, temp1, temp2, temp3, temp4, temp5;
96 __asm__ volatile (
97 "preceu.ph.qbr %[temp1], %[c0] \n\t"
98 "preceu.ph.qbl %[temp2], %[c0] \n\t"
99 "preceu.ph.qbr %[temp3], %[c1] \n\t"
100 "preceu.ph.qbl %[temp4], %[c1] \n\t"
101 "preceu.ph.qbr %[temp5], %[c2] \n\t"
102 "preceu.ph.qbl %[temp0], %[c2] \n\t"
103 "subq.ph %[temp3], %[temp3], %[temp5] \n\t"
104 "subq.ph %[temp4], %[temp4], %[temp0] \n\t"
105 "addq.ph %[temp1], %[temp1], %[temp3] \n\t"
106 "addq.ph %[temp2], %[temp2], %[temp4] \n\t"
107 "shll_s.ph %[temp1], %[temp1], 7 \n\t"
108 "shll_s.ph %[temp2], %[temp2], 7 \n\t"
109 "precrqu_s.qb.ph %[temp2], %[temp2], %[temp1] \n\t"
110 : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
111 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5)
112 : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)
113 : "memory"
114 );
115 return temp2;
116 }
117
ClampedAddSubtractHalf(uint32_t c0,uint32_t c1,uint32_t c2)118 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
119 uint32_t c2) {
120 int temp0, temp1, temp2, temp3, temp4, temp5;
121 __asm__ volatile (
122 "adduh.qb %[temp5], %[c0], %[c1] \n\t"
123 "preceu.ph.qbr %[temp3], %[c2] \n\t"
124 "preceu.ph.qbr %[temp1], %[temp5] \n\t"
125 "preceu.ph.qbl %[temp2], %[temp5] \n\t"
126 "preceu.ph.qbl %[temp4], %[c2] \n\t"
127 "subq.ph %[temp3], %[temp1], %[temp3] \n\t"
128 "subq.ph %[temp4], %[temp2], %[temp4] \n\t"
129 "shrl.ph %[temp5], %[temp3], 15 \n\t"
130 "shrl.ph %[temp0], %[temp4], 15 \n\t"
131 "addq.ph %[temp3], %[temp3], %[temp5] \n\t"
132 "addq.ph %[temp4], %[temp0], %[temp4] \n\t"
133 "shra.ph %[temp3], %[temp3], 1 \n\t"
134 "shra.ph %[temp4], %[temp4], 1 \n\t"
135 "addq.ph %[temp1], %[temp1], %[temp3] \n\t"
136 "addq.ph %[temp2], %[temp2], %[temp4] \n\t"
137 "shll_s.ph %[temp1], %[temp1], 7 \n\t"
138 "shll_s.ph %[temp2], %[temp2], 7 \n\t"
139 "precrqu_s.qb.ph %[temp1], %[temp2], %[temp1] \n\t"
140 : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
141 [temp3]"=&r"(temp3), [temp4]"=r"(temp4), [temp5]"=&r"(temp5)
142 : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)
143 : "memory"
144 );
145 return temp1;
146 }
147
Select(uint32_t a,uint32_t b,uint32_t c)148 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
149 int temp0, temp1, temp2, temp3, temp4, temp5;
150 __asm__ volatile (
151 "cmpgdu.lt.qb %[temp1], %[c], %[b] \n\t"
152 "pick.qb %[temp1], %[b], %[c] \n\t"
153 "pick.qb %[temp2], %[c], %[b] \n\t"
154 "cmpgdu.lt.qb %[temp4], %[c], %[a] \n\t"
155 "pick.qb %[temp4], %[a], %[c] \n\t"
156 "pick.qb %[temp5], %[c], %[a] \n\t"
157 "subu.qb %[temp3], %[temp1], %[temp2] \n\t"
158 "subu.qb %[temp0], %[temp4], %[temp5] \n\t"
159 "raddu.w.qb %[temp3], %[temp3] \n\t"
160 "raddu.w.qb %[temp0], %[temp0] \n\t"
161 "subu %[temp3], %[temp3], %[temp0] \n\t"
162 "slti %[temp0], %[temp3], 0x1 \n\t"
163 "movz %[a], %[b], %[temp0] \n\t"
164 : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
165 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp0]"=&r"(temp0),
166 [a]"+&r"(a)
167 : [b]"r"(b), [c]"r"(c)
168 );
169 return a;
170 }
171
Average2(uint32_t a0,uint32_t a1)172 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
173 __asm__ volatile (
174 "adduh.qb %[a0], %[a0], %[a1] \n\t"
175 : [a0]"+r"(a0)
176 : [a1]"r"(a1)
177 );
178 return a0;
179 }
180
Average3(uint32_t a0,uint32_t a1,uint32_t a2)181 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
182 return Average2(Average2(a0, a2), a1);
183 }
184
Average4(uint32_t a0,uint32_t a1,uint32_t a2,uint32_t a3)185 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
186 uint32_t a2, uint32_t a3) {
187 return Average2(Average2(a0, a1), Average2(a2, a3));
188 }
189
Predictor5(uint32_t left,const uint32_t * const top)190 static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
191 return Average3(left, top[0], top[1]);
192 }
193
Predictor6(uint32_t left,const uint32_t * const top)194 static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
195 return Average2(left, top[-1]);
196 }
197
Predictor7(uint32_t left,const uint32_t * const top)198 static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
199 return Average2(left, top[0]);
200 }
201
Predictor8(uint32_t left,const uint32_t * const top)202 static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
203 (void)left;
204 return Average2(top[-1], top[0]);
205 }
206
Predictor9(uint32_t left,const uint32_t * const top)207 static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
208 (void)left;
209 return Average2(top[0], top[1]);
210 }
211
Predictor10(uint32_t left,const uint32_t * const top)212 static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
213 return Average4(left, top[-1], top[0], top[1]);
214 }
215
Predictor11(uint32_t left,const uint32_t * const top)216 static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
217 return Select(top[0], left, top[-1]);
218 }
219
Predictor12(uint32_t left,const uint32_t * const top)220 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
221 return ClampedAddSubtractFull(left, top[0], top[-1]);
222 }
223
Predictor13(uint32_t left,const uint32_t * const top)224 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
225 return ClampedAddSubtractHalf(left, top[0], top[-1]);
226 }
227
228 // Add green to blue and red channels (i.e. perform the inverse transform of
229 // 'subtract green').
AddGreenToBlueAndRed(uint32_t * data,int num_pixels)230 static void AddGreenToBlueAndRed(uint32_t* data, int num_pixels) {
231 uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
232 uint32_t* const p_loop1_end = data + (num_pixels & ~3);
233 uint32_t* const p_loop2_end = data + num_pixels;
234 __asm__ volatile (
235 ".set push \n\t"
236 ".set noreorder \n\t"
237 "beq %[data], %[p_loop1_end], 3f \n\t"
238 " nop \n\t"
239 "0: \n\t"
240 "lw %[temp0], 0(%[data]) \n\t"
241 "lw %[temp1], 4(%[data]) \n\t"
242 "lw %[temp2], 8(%[data]) \n\t"
243 "lw %[temp3], 12(%[data]) \n\t"
244 "ext %[temp4], %[temp0], 8, 8 \n\t"
245 "ext %[temp5], %[temp1], 8, 8 \n\t"
246 "ext %[temp6], %[temp2], 8, 8 \n\t"
247 "ext %[temp7], %[temp3], 8, 8 \n\t"
248 "addiu %[data], %[data], 16 \n\t"
249 "replv.ph %[temp4], %[temp4] \n\t"
250 "replv.ph %[temp5], %[temp5] \n\t"
251 "replv.ph %[temp6], %[temp6] \n\t"
252 "replv.ph %[temp7], %[temp7] \n\t"
253 "addu.qb %[temp0], %[temp0], %[temp4] \n\t"
254 "addu.qb %[temp1], %[temp1], %[temp5] \n\t"
255 "addu.qb %[temp2], %[temp2], %[temp6] \n\t"
256 "addu.qb %[temp3], %[temp3], %[temp7] \n\t"
257 "sw %[temp0], -16(%[data]) \n\t"
258 "sw %[temp1], -12(%[data]) \n\t"
259 "sw %[temp2], -8(%[data]) \n\t"
260 "bne %[data], %[p_loop1_end], 0b \n\t"
261 " sw %[temp3], -4(%[data]) \n\t"
262 "3: \n\t"
263 "beq %[data], %[p_loop2_end], 2f \n\t"
264 " nop \n\t"
265 "1: \n\t"
266 "lw %[temp0], 0(%[data]) \n\t"
267 "addiu %[data], %[data], 4 \n\t"
268 "ext %[temp4], %[temp0], 8, 8 \n\t"
269 "replv.ph %[temp4], %[temp4] \n\t"
270 "addu.qb %[temp0], %[temp0], %[temp4] \n\t"
271 "bne %[data], %[p_loop2_end], 1b \n\t"
272 " sw %[temp0], -4(%[data]) \n\t"
273 "2: \n\t"
274 ".set pop \n\t"
275 : [data]"+&r"(data), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
276 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
277 [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), [temp7]"=&r"(temp7)
278 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
279 : "memory"
280 );
281 }
282
TransformColorInverse(const VP8LMultipliers * const m,uint32_t * data,int num_pixels)283 static void TransformColorInverse(const VP8LMultipliers* const m,
284 uint32_t* data, int num_pixels) {
285 int temp0, temp1, temp2, temp3, temp4, temp5;
286 uint32_t argb, argb1, new_red;
287 const uint32_t G_to_R = m->green_to_red_;
288 const uint32_t G_to_B = m->green_to_blue_;
289 const uint32_t R_to_B = m->red_to_blue_;
290 uint32_t* const p_loop_end = data + (num_pixels & ~1);
291 __asm__ volatile (
292 ".set push \n\t"
293 ".set noreorder \n\t"
294 "beq %[data], %[p_loop_end], 1f \n\t"
295 " nop \n\t"
296 "replv.ph %[temp0], %[G_to_R] \n\t"
297 "replv.ph %[temp1], %[G_to_B] \n\t"
298 "replv.ph %[temp2], %[R_to_B] \n\t"
299 "shll.ph %[temp0], %[temp0], 8 \n\t"
300 "shll.ph %[temp1], %[temp1], 8 \n\t"
301 "shll.ph %[temp2], %[temp2], 8 \n\t"
302 "shra.ph %[temp0], %[temp0], 8 \n\t"
303 "shra.ph %[temp1], %[temp1], 8 \n\t"
304 "shra.ph %[temp2], %[temp2], 8 \n\t"
305 "0: \n\t"
306 "lw %[argb], 0(%[data]) \n\t"
307 "lw %[argb1], 4(%[data]) \n\t"
308 "addiu %[data], %[data], 8 \n\t"
309 "precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t"
310 "preceu.ph.qbra %[temp3], %[temp3] \n\t"
311 "shll.ph %[temp3], %[temp3], 8 \n\t"
312 "shra.ph %[temp3], %[temp3], 8 \n\t"
313 "mul.ph %[temp5], %[temp3], %[temp0] \n\t"
314 "mul.ph %[temp3], %[temp3], %[temp1] \n\t"
315 "precrq.ph.w %[new_red], %[argb], %[argb1] \n\t"
316 "ins %[argb1], %[argb], 16, 16 \n\t"
317 "shra.ph %[temp5], %[temp5], 5 \n\t"
318 "shra.ph %[temp3], %[temp3], 5 \n\t"
319 "addu.ph %[new_red], %[new_red], %[temp5] \n\t"
320 "addu.ph %[argb1], %[argb1], %[temp3] \n\t"
321 "preceu.ph.qbra %[temp5], %[new_red] \n\t"
322 "shll.ph %[temp4], %[temp5], 8 \n\t"
323 "shra.ph %[temp4], %[temp4], 8 \n\t"
324 "mul.ph %[temp4], %[temp4], %[temp2] \n\t"
325 "sb %[temp5], -2(%[data]) \n\t"
326 "sra %[temp5], %[temp5], 16 \n\t"
327 "shra.ph %[temp4], %[temp4], 5 \n\t"
328 "addu.ph %[argb1], %[argb1], %[temp4] \n\t"
329 "preceu.ph.qbra %[temp3], %[argb1] \n\t"
330 "sb %[temp5], -6(%[data]) \n\t"
331 "sb %[temp3], -4(%[data]) \n\t"
332 "sra %[temp3], %[temp3], 16 \n\t"
333 "bne %[data], %[p_loop_end], 0b \n\t"
334 " sb %[temp3], -8(%[data]) \n\t"
335 "1: \n\t"
336 ".set pop \n\t"
337 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
338 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
339 [new_red]"=&r"(new_red), [argb]"=&r"(argb),
340 [argb1]"=&r"(argb1), [data]"+&r"(data)
341 : [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B),
342 [G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end)
343 : "memory", "hi", "lo"
344 );
345
346 // Fall-back to C-version for left-overs.
347 if (num_pixels & 1) VP8LTransformColorInverse_C(m, data, 1);
348 }
349
ConvertBGRAToRGB(const uint32_t * src,int num_pixels,uint8_t * dst)350 static void ConvertBGRAToRGB(const uint32_t* src,
351 int num_pixels, uint8_t* dst) {
352 int temp0, temp1, temp2, temp3;
353 const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
354 const uint32_t* const p_loop2_end = src + num_pixels;
355 __asm__ volatile (
356 ".set push \n\t"
357 ".set noreorder \n\t"
358 "beq %[src], %[p_loop1_end], 3f \n\t"
359 " nop \n\t"
360 "0: \n\t"
361 "lw %[temp3], 12(%[src]) \n\t"
362 "lw %[temp2], 8(%[src]) \n\t"
363 "lw %[temp1], 4(%[src]) \n\t"
364 "lw %[temp0], 0(%[src]) \n\t"
365 "ins %[temp3], %[temp2], 24, 8 \n\t"
366 "sll %[temp2], %[temp2], 8 \n\t"
367 "rotr %[temp3], %[temp3], 16 \n\t"
368 "ins %[temp2], %[temp1], 0, 16 \n\t"
369 "sll %[temp1], %[temp1], 8 \n\t"
370 "wsbh %[temp3], %[temp3] \n\t"
371 "balign %[temp0], %[temp1], 1 \n\t"
372 "wsbh %[temp2], %[temp2] \n\t"
373 "wsbh %[temp0], %[temp0] \n\t"
374 "usw %[temp3], 8(%[dst]) \n\t"
375 "rotr %[temp0], %[temp0], 16 \n\t"
376 "usw %[temp2], 4(%[dst]) \n\t"
377 "addiu %[src], %[src], 16 \n\t"
378 "usw %[temp0], 0(%[dst]) \n\t"
379 "bne %[src], %[p_loop1_end], 0b \n\t"
380 " addiu %[dst], %[dst], 12 \n\t"
381 "3: \n\t"
382 "beq %[src], %[p_loop2_end], 2f \n\t"
383 " nop \n\t"
384 "1: \n\t"
385 "lw %[temp0], 0(%[src]) \n\t"
386 "addiu %[src], %[src], 4 \n\t"
387 "wsbh %[temp1], %[temp0] \n\t"
388 "addiu %[dst], %[dst], 3 \n\t"
389 "ush %[temp1], -2(%[dst]) \n\t"
390 "sra %[temp0], %[temp0], 16 \n\t"
391 "bne %[src], %[p_loop2_end], 1b \n\t"
392 " sb %[temp0], -3(%[dst]) \n\t"
393 "2: \n\t"
394 ".set pop \n\t"
395 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
396 [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
397 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
398 : "memory"
399 );
400 }
401
ConvertBGRAToRGBA(const uint32_t * src,int num_pixels,uint8_t * dst)402 static void ConvertBGRAToRGBA(const uint32_t* src,
403 int num_pixels, uint8_t* dst) {
404 int temp0, temp1, temp2, temp3;
405 const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
406 const uint32_t* const p_loop2_end = src + num_pixels;
407 __asm__ volatile (
408 ".set push \n\t"
409 ".set noreorder \n\t"
410 "beq %[src], %[p_loop1_end], 3f \n\t"
411 " nop \n\t"
412 "0: \n\t"
413 "lw %[temp0], 0(%[src]) \n\t"
414 "lw %[temp1], 4(%[src]) \n\t"
415 "lw %[temp2], 8(%[src]) \n\t"
416 "lw %[temp3], 12(%[src]) \n\t"
417 "wsbh %[temp0], %[temp0] \n\t"
418 "wsbh %[temp1], %[temp1] \n\t"
419 "wsbh %[temp2], %[temp2] \n\t"
420 "wsbh %[temp3], %[temp3] \n\t"
421 "addiu %[src], %[src], 16 \n\t"
422 "balign %[temp0], %[temp0], 1 \n\t"
423 "balign %[temp1], %[temp1], 1 \n\t"
424 "balign %[temp2], %[temp2], 1 \n\t"
425 "balign %[temp3], %[temp3], 1 \n\t"
426 "usw %[temp0], 0(%[dst]) \n\t"
427 "usw %[temp1], 4(%[dst]) \n\t"
428 "usw %[temp2], 8(%[dst]) \n\t"
429 "usw %[temp3], 12(%[dst]) \n\t"
430 "bne %[src], %[p_loop1_end], 0b \n\t"
431 " addiu %[dst], %[dst], 16 \n\t"
432 "3: \n\t"
433 "beq %[src], %[p_loop2_end], 2f \n\t"
434 " nop \n\t"
435 "1: \n\t"
436 "lw %[temp0], 0(%[src]) \n\t"
437 "wsbh %[temp0], %[temp0] \n\t"
438 "addiu %[src], %[src], 4 \n\t"
439 "balign %[temp0], %[temp0], 1 \n\t"
440 "usw %[temp0], 0(%[dst]) \n\t"
441 "bne %[src], %[p_loop2_end], 1b \n\t"
442 " addiu %[dst], %[dst], 4 \n\t"
443 "2: \n\t"
444 ".set pop \n\t"
445 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
446 [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
447 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
448 : "memory"
449 );
450 }
451
ConvertBGRAToRGBA4444(const uint32_t * src,int num_pixels,uint8_t * dst)452 static void ConvertBGRAToRGBA4444(const uint32_t* src,
453 int num_pixels, uint8_t* dst) {
454 int temp0, temp1, temp2, temp3, temp4, temp5;
455 const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
456 const uint32_t* const p_loop2_end = src + num_pixels;
457 __asm__ volatile (
458 ".set push \n\t"
459 ".set noreorder \n\t"
460 "beq %[src], %[p_loop1_end], 3f \n\t"
461 " nop \n\t"
462 "0: \n\t"
463 "lw %[temp0], 0(%[src]) \n\t"
464 "lw %[temp1], 4(%[src]) \n\t"
465 "lw %[temp2], 8(%[src]) \n\t"
466 "lw %[temp3], 12(%[src]) \n\t"
467 "ext %[temp4], %[temp0], 28, 4 \n\t"
468 "ext %[temp5], %[temp0], 12, 4 \n\t"
469 "ins %[temp0], %[temp4], 0, 4 \n\t"
470 "ext %[temp4], %[temp1], 28, 4 \n\t"
471 "ins %[temp0], %[temp5], 16, 4 \n\t"
472 "ext %[temp5], %[temp1], 12, 4 \n\t"
473 "ins %[temp1], %[temp4], 0, 4 \n\t"
474 "ext %[temp4], %[temp2], 28, 4 \n\t"
475 "ins %[temp1], %[temp5], 16, 4 \n\t"
476 "ext %[temp5], %[temp2], 12, 4 \n\t"
477 "ins %[temp2], %[temp4], 0, 4 \n\t"
478 "ext %[temp4], %[temp3], 28, 4 \n\t"
479 "ins %[temp2], %[temp5], 16, 4 \n\t"
480 "ext %[temp5], %[temp3], 12, 4 \n\t"
481 "ins %[temp3], %[temp4], 0, 4 \n\t"
482 "precr.qb.ph %[temp1], %[temp1], %[temp0] \n\t"
483 "ins %[temp3], %[temp5], 16, 4 \n\t"
484 "addiu %[src], %[src], 16 \n\t"
485 "precr.qb.ph %[temp3], %[temp3], %[temp2] \n\t"
486 #ifdef WEBP_SWAP_16BIT_CSP
487 "usw %[temp1], 0(%[dst]) \n\t"
488 "usw %[temp3], 4(%[dst]) \n\t"
489 #else
490 "wsbh %[temp1], %[temp1] \n\t"
491 "wsbh %[temp3], %[temp3] \n\t"
492 "usw %[temp1], 0(%[dst]) \n\t"
493 "usw %[temp3], 4(%[dst]) \n\t"
494 #endif
495 "bne %[src], %[p_loop1_end], 0b \n\t"
496 " addiu %[dst], %[dst], 8 \n\t"
497 "3: \n\t"
498 "beq %[src], %[p_loop2_end], 2f \n\t"
499 " nop \n\t"
500 "1: \n\t"
501 "lw %[temp0], 0(%[src]) \n\t"
502 "ext %[temp4], %[temp0], 28, 4 \n\t"
503 "ext %[temp5], %[temp0], 12, 4 \n\t"
504 "ins %[temp0], %[temp4], 0, 4 \n\t"
505 "ins %[temp0], %[temp5], 16, 4 \n\t"
506 "addiu %[src], %[src], 4 \n\t"
507 "precr.qb.ph %[temp0], %[temp0], %[temp0] \n\t"
508 #ifdef WEBP_SWAP_16BIT_CSP
509 "ush %[temp0], 0(%[dst]) \n\t"
510 #else
511 "wsbh %[temp0], %[temp0] \n\t"
512 "ush %[temp0], 0(%[dst]) \n\t"
513 #endif
514 "bne %[src], %[p_loop2_end], 1b \n\t"
515 " addiu %[dst], %[dst], 2 \n\t"
516 "2: \n\t"
517 ".set pop \n\t"
518 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
519 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
520 [dst]"+&r"(dst), [src]"+&r"(src)
521 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
522 : "memory"
523 );
524 }
525
ConvertBGRAToRGB565(const uint32_t * src,int num_pixels,uint8_t * dst)526 static void ConvertBGRAToRGB565(const uint32_t* src,
527 int num_pixels, uint8_t* dst) {
528 int temp0, temp1, temp2, temp3, temp4, temp5;
529 const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
530 const uint32_t* const p_loop2_end = src + num_pixels;
531 __asm__ volatile (
532 ".set push \n\t"
533 ".set noreorder \n\t"
534 "beq %[src], %[p_loop1_end], 3f \n\t"
535 " nop \n\t"
536 "0: \n\t"
537 "lw %[temp0], 0(%[src]) \n\t"
538 "lw %[temp1], 4(%[src]) \n\t"
539 "lw %[temp2], 8(%[src]) \n\t"
540 "lw %[temp3], 12(%[src]) \n\t"
541 "ext %[temp4], %[temp0], 8, 16 \n\t"
542 "ext %[temp5], %[temp0], 5, 11 \n\t"
543 "ext %[temp0], %[temp0], 3, 5 \n\t"
544 "ins %[temp4], %[temp5], 0, 11 \n\t"
545 "ext %[temp5], %[temp1], 5, 11 \n\t"
546 "ins %[temp4], %[temp0], 0, 5 \n\t"
547 "ext %[temp0], %[temp1], 8, 16 \n\t"
548 "ext %[temp1], %[temp1], 3, 5 \n\t"
549 "ins %[temp0], %[temp5], 0, 11 \n\t"
550 "ext %[temp5], %[temp2], 5, 11 \n\t"
551 "ins %[temp0], %[temp1], 0, 5 \n\t"
552 "ext %[temp1], %[temp2], 8, 16 \n\t"
553 "ext %[temp2], %[temp2], 3, 5 \n\t"
554 "ins %[temp1], %[temp5], 0, 11 \n\t"
555 "ext %[temp5], %[temp3], 5, 11 \n\t"
556 "ins %[temp1], %[temp2], 0, 5 \n\t"
557 "ext %[temp2], %[temp3], 8, 16 \n\t"
558 "ext %[temp3], %[temp3], 3, 5 \n\t"
559 "ins %[temp2], %[temp5], 0, 11 \n\t"
560 "append %[temp0], %[temp4], 16 \n\t"
561 "ins %[temp2], %[temp3], 0, 5 \n\t"
562 "addiu %[src], %[src], 16 \n\t"
563 "append %[temp2], %[temp1], 16 \n\t"
564 #ifdef WEBP_SWAP_16BIT_CSP
565 "usw %[temp0], 0(%[dst]) \n\t"
566 "usw %[temp2], 4(%[dst]) \n\t"
567 #else
568 "wsbh %[temp0], %[temp0] \n\t"
569 "wsbh %[temp2], %[temp2] \n\t"
570 "usw %[temp0], 0(%[dst]) \n\t"
571 "usw %[temp2], 4(%[dst]) \n\t"
572 #endif
573 "bne %[src], %[p_loop1_end], 0b \n\t"
574 " addiu %[dst], %[dst], 8 \n\t"
575 "3: \n\t"
576 "beq %[src], %[p_loop2_end], 2f \n\t"
577 " nop \n\t"
578 "1: \n\t"
579 "lw %[temp0], 0(%[src]) \n\t"
580 "ext %[temp4], %[temp0], 8, 16 \n\t"
581 "ext %[temp5], %[temp0], 5, 11 \n\t"
582 "ext %[temp0], %[temp0], 3, 5 \n\t"
583 "ins %[temp4], %[temp5], 0, 11 \n\t"
584 "addiu %[src], %[src], 4 \n\t"
585 "ins %[temp4], %[temp0], 0, 5 \n\t"
586 #ifdef WEBP_SWAP_16BIT_CSP
587 "ush %[temp4], 0(%[dst]) \n\t"
588 #else
589 "wsbh %[temp4], %[temp4] \n\t"
590 "ush %[temp4], 0(%[dst]) \n\t"
591 #endif
592 "bne %[src], %[p_loop2_end], 1b \n\t"
593 " addiu %[dst], %[dst], 2 \n\t"
594 "2: \n\t"
595 ".set pop \n\t"
596 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
597 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
598 [dst]"+&r"(dst), [src]"+&r"(src)
599 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
600 : "memory"
601 );
602 }
603
ConvertBGRAToBGR(const uint32_t * src,int num_pixels,uint8_t * dst)604 static void ConvertBGRAToBGR(const uint32_t* src,
605 int num_pixels, uint8_t* dst) {
606 int temp0, temp1, temp2, temp3;
607 const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
608 const uint32_t* const p_loop2_end = src + num_pixels;
609 __asm__ volatile (
610 ".set push \n\t"
611 ".set noreorder \n\t"
612 "beq %[src], %[p_loop1_end], 3f \n\t"
613 " nop \n\t"
614 "0: \n\t"
615 "lw %[temp0], 0(%[src]) \n\t"
616 "lw %[temp1], 4(%[src]) \n\t"
617 "lw %[temp2], 8(%[src]) \n\t"
618 "lw %[temp3], 12(%[src]) \n\t"
619 "ins %[temp0], %[temp1], 24, 8 \n\t"
620 "sra %[temp1], %[temp1], 8 \n\t"
621 "ins %[temp1], %[temp2], 16, 16 \n\t"
622 "sll %[temp2], %[temp2], 8 \n\t"
623 "balign %[temp3], %[temp2], 1 \n\t"
624 "addiu %[src], %[src], 16 \n\t"
625 "usw %[temp0], 0(%[dst]) \n\t"
626 "usw %[temp1], 4(%[dst]) \n\t"
627 "usw %[temp3], 8(%[dst]) \n\t"
628 "bne %[src], %[p_loop1_end], 0b \n\t"
629 " addiu %[dst], %[dst], 12 \n\t"
630 "3: \n\t"
631 "beq %[src], %[p_loop2_end], 2f \n\t"
632 " nop \n\t"
633 "1: \n\t"
634 "lw %[temp0], 0(%[src]) \n\t"
635 "addiu %[src], %[src], 4 \n\t"
636 "addiu %[dst], %[dst], 3 \n\t"
637 "ush %[temp0], -3(%[dst]) \n\t"
638 "sra %[temp0], %[temp0], 16 \n\t"
639 "bne %[src], %[p_loop2_end], 1b \n\t"
640 " sb %[temp0], -1(%[dst]) \n\t"
641 "2: \n\t"
642 ".set pop \n\t"
643 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
644 [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
645 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
646 : "memory"
647 );
648 }
649
650 //------------------------------------------------------------------------------
651 // Entry point
652
653 extern void VP8LDspInitMIPSdspR2(void);
654
VP8LDspInitMIPSdspR2(void)655 WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitMIPSdspR2(void) {
656 VP8LMapColor32b = MapARGB;
657 VP8LMapColor8b = MapAlpha;
658 VP8LPredictors[5] = Predictor5;
659 VP8LPredictors[6] = Predictor6;
660 VP8LPredictors[7] = Predictor7;
661 VP8LPredictors[8] = Predictor8;
662 VP8LPredictors[9] = Predictor9;
663 VP8LPredictors[10] = Predictor10;
664 VP8LPredictors[11] = Predictor11;
665 VP8LPredictors[12] = Predictor12;
666 VP8LPredictors[13] = Predictor13;
667 VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
668 VP8LTransformColorInverse = TransformColorInverse;
669 VP8LConvertBGRAToRGB = ConvertBGRAToRGB;
670 VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA;
671 VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444;
672 VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565;
673 VP8LConvertBGRAToBGR = ConvertBGRAToBGR;
674 }
675
676 #else // !WEBP_USE_MIPS_DSP_R2
677
678 WEBP_DSP_INIT_STUB(VP8LDspInitMIPSdspR2)
679
680 #endif // WEBP_USE_MIPS_DSP_R2
681